diff --git a/getgrams.py b/getgrams.py index c6f1896..a3c191e 100644 --- a/getgrams.py +++ b/getgrams.py @@ -2,41 +2,65 @@ from collections import Counter from argparse import ArgumentParser +import matplotlib.pyplot as plt +import re def checkArguments(): - args = ArgumentParser(description='Cryptanalyst') - args.add_argument('-f', '--filename', help='Text to analyze') - args.add_argument('-b', '--bigram', help='Get bigram', action="store_true") - args.add_argument('-t', '--trigram', help='Get trigram', action="store_true") - return args.parse_args() + args = ArgumentParser(description='Cryptanalyst') + args.add_argument('-f', '--filename', help='Text to analyze') + args.add_argument('-b', '--bigram', help='Get bigram', action="store_true") + args.add_argument('-t', '--trigram', help='Get trigram', action="store_true") + return args.parse_args() def readFile(filename): - """ - This read the file passed in argument and return the data of it - """ - text = str() - with open(filename, 'r') as f: - data = f.readlines() + """ + This read the file passed in argument and return the data of it + """ + text = str() + with open(filename, 'r') as f: + data = f.readlines() - for entry in data: - text += entry + for entry in data: + text += entry - return text + return text def getLetters(text, pos): - # return Counter(text[idx : idx + pos] for idx in range(len(text) - 1)) - ###### TODO - # Sorted by value and to get x data from the dictionary - return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) + return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) if __name__ == "__main__": - args = checkArguments() - text = readFile(args.filename) + args = checkArguments() + text = readFile(args.filename) - if args.bigram: - data = getLetters(text, 2) - if args.trigram: - data = getLetters(text, 3) + if args.bigram: + data = getLetters(text, 2) + if args.trigram: + data = getLetters(text, 3) - print(data) + dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} + dataSorted = list(dataSorted.items()) + newDataSorted = [] + + if args.bigram: + reg = "[A-Za-z]{2}" + if args.trigram: + reg = "[A-Za-z]{3}" + + for entry in dataSorted: + if re.match(reg, entry[0]): + newDataSorted.append(( + entry[0], entry[1] + )) + + lenDataSorted = len(newDataSorted) + + x = [] + y = [] + for entry in newDataSorted[lenDataSorted - 10:]: + x.append(entry[0]) + y.append(entry[1]) + + largeur = 0.5 + plt.bar(x, y, largeur) + plt.show()