Merge
This commit is contained in:
commit
cedf3a37f9
32
getgrams.py
32
getgrams.py
@ -2,6 +2,8 @@
|
||||
|
||||
from collections import Counter
|
||||
from argparse import ArgumentParser
|
||||
import matplotlib.pyplot as plt
|
||||
import re
|
||||
|
||||
|
||||
def checkArguments():
|
||||
@ -25,9 +27,6 @@ def readFile(filename):
|
||||
return text
|
||||
|
||||
def getLetters(text, pos):
|
||||
# return Counter(text[idx : idx + pos] for idx in range(len(text) - 1))
|
||||
###### TODO
|
||||
# Sorted by value and to get x data from the dictionary
|
||||
return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)])
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -39,4 +38,29 @@ if __name__ == "__main__":
|
||||
if args.trigram:
|
||||
data = getLetters(text, 3)
|
||||
|
||||
print(data)
|
||||
dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])}
|
||||
dataSorted = list(dataSorted.items())
|
||||
newDataSorted = []
|
||||
|
||||
if args.bigram:
|
||||
reg = "[A-Za-z]{2}"
|
||||
if args.trigram:
|
||||
reg = "[A-Za-z]{3}"
|
||||
|
||||
for entry in dataSorted:
|
||||
if re.match(reg, entry[0]):
|
||||
newDataSorted.append((
|
||||
entry[0], entry[1]
|
||||
))
|
||||
|
||||
lenDataSorted = len(newDataSorted)
|
||||
|
||||
x = []
|
||||
y = []
|
||||
for entry in newDataSorted[lenDataSorted - 10:]:
|
||||
x.append(entry[0])
|
||||
y.append(entry[1])
|
||||
|
||||
largeur = 0.5
|
||||
plt.bar(x, y, largeur)
|
||||
plt.show()
|
||||
|
Loading…
Reference in New Issue
Block a user