Update get grams

This commit is contained in:
gbucchino 2022-06-30 10:45:58 +02:00
parent b19a4a46f0
commit 0ea3a9f18a

@ -3,6 +3,7 @@
from collections import Counter
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import re
def checkArguments():
@ -26,7 +27,6 @@ def readFile(filename):
return text
def getLetters(text, pos):
# return Counter(text[idx : idx + pos] for idx in range(len(text) - 1))
return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)])
if __name__ == "__main__":
@ -39,21 +39,23 @@ if __name__ == "__main__":
data = getLetters(text, 3)
dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])}
lenDataSorted = len(dataSorted)
invalidChar = [" ", "!", "?", ":", "\n", "'", ","]
dataSorted = list(dataSorted.items())
newData1 = []
newData2 = []
for entry in dataSorted[lenDataSorted - 10:]:
#newData[entry[0]] = entry[1]
newData1.append(entry[0])
newData2.append(entry[1])
print(newData1)
print(newData2)
x = [x for x in newData1]
y = [tmp for tmp in newData2]
newDataSorted = []
for entry in dataSorted:
if re.match("[A-Za-z]{2}", entry[0]):
newDataSorted.append((
entry[0], entry[1]
))
lenDataSorted = len(newDataSorted)
x = []
y = []
for entry in newDataSorted[lenDataSorted - 10:]:
x.append(entry[0])
y.append(entry[1])
largeur = 0.5
plt.bar(x, y, largeur)
plt.show()