From 0ea3a9f18a321c7b70d836e69206cd633b2aeed4 Mon Sep 17 00:00:00 2001 From: gbucchino Date: Thu, 30 Jun 2022 10:45:58 +0200 Subject: [PATCH] Update get grams --- getgrams.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/getgrams.py b/getgrams.py index 3653a67..274ce03 100644 --- a/getgrams.py +++ b/getgrams.py @@ -3,6 +3,7 @@ from collections import Counter from argparse import ArgumentParser import matplotlib.pyplot as plt +import re def checkArguments(): @@ -26,7 +27,6 @@ def readFile(filename): return text def getLetters(text, pos): - # return Counter(text[idx : idx + pos] for idx in range(len(text) - 1)) return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) if __name__ == "__main__": @@ -39,21 +39,23 @@ if __name__ == "__main__": data = getLetters(text, 3) dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} - lenDataSorted = len(dataSorted) - invalidChar = [" ", "!", "?", ":", "\n", "'", ","] dataSorted = list(dataSorted.items()) - newData1 = [] - newData2 = [] - for entry in dataSorted[lenDataSorted - 10:]: - #newData[entry[0]] = entry[1] - newData1.append(entry[0]) - newData2.append(entry[1]) - print(newData1) - print(newData2) - - x = [x for x in newData1] - y = [tmp for tmp in newData2] + newDataSorted = [] + for entry in dataSorted: + if re.match("[A-Za-z]{2}", entry[0]): + newDataSorted.append(( + entry[0], entry[1] + )) + + lenDataSorted = len(newDataSorted) + + x = [] + y = [] + for entry in newDataSorted[lenDataSorted - 10:]: + x.append(entry[0]) + y.append(entry[1]) + largeur = 0.5 plt.bar(x, y, largeur) plt.show()