From b19a4a46f016bc0d8a6eab859d3b59f88516c70d Mon Sep 17 00:00:00 2001 From: gbucchino Date: Wed, 29 Jun 2022 15:45:34 +0200 Subject: [PATCH 1/3] Update getgrams --- getgrams.py | 65 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/getgrams.py b/getgrams.py index ed22886..3653a67 100644 --- a/getgrams.py +++ b/getgrams.py @@ -2,39 +2,58 @@ from collections import Counter from argparse import ArgumentParser +import matplotlib.pyplot as plt def checkArguments(): - args = ArgumentParser(description='Cryptanalyst') - args.add_argument('-f', '--filename', help='Text to analyze') - args.add_argument('-b', '--bigram', help='Get bigram', action="store_true") - args.add_argument('-t', '--trigram', help='Get trigram', action="store_true") - return args.parse_args() + args = ArgumentParser(description='Cryptanalyst') + args.add_argument('-f', '--filename', help='Text to analyze') + args.add_argument('-b', '--bigram', help='Get bigram', action="store_true") + args.add_argument('-t', '--trigram', help='Get trigram', action="store_true") + return args.parse_args() def readFile(filename): - """ - This read the file passed in argument and return the data of it - """ - text = str() - with open(filename, 'r') as f: - data = f.readlines() + """ + This read the file passed in argument and return the data of it + """ + text = str() + with open(filename, 'r') as f: + data = f.readlines() - for entry in data: - text += entry + for entry in data: + text += entry - return text + return text def getLetters(text, pos): - # return Counter(text[idx : idx + pos] for idx in range(len(text) - 1)) - return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) + # return Counter(text[idx : idx + pos] for idx in range(len(text) - 1)) + return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) if __name__ == "__main__": - args = checkArguments() - text = readFile(args.filename) + args = checkArguments() + text = readFile(args.filename) - if args.bigram: - data = getLetters(text, 2) - if args.trigram: - data = getLetters(text, 3) + if args.bigram: + data = getLetters(text, 2) + if args.trigram: + data = getLetters(text, 3) - print(data) + dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} + lenDataSorted = len(dataSorted) + invalidChar = [" ", "!", "?", ":", "\n", "'", ","] + dataSorted = list(dataSorted.items()) + newData1 = [] + newData2 = [] + for entry in dataSorted[lenDataSorted - 10:]: + #newData[entry[0]] = entry[1] + newData1.append(entry[0]) + newData2.append(entry[1]) + print(newData1) + print(newData2) + + x = [x for x in newData1] + y = [tmp for tmp in newData2] + + largeur = 0.5 + plt.bar(x, y, largeur) + plt.show() From 0ea3a9f18a321c7b70d836e69206cd633b2aeed4 Mon Sep 17 00:00:00 2001 From: gbucchino Date: Thu, 30 Jun 2022 10:45:58 +0200 Subject: [PATCH 2/3] Update get grams --- getgrams.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/getgrams.py b/getgrams.py index 3653a67..274ce03 100644 --- a/getgrams.py +++ b/getgrams.py @@ -3,6 +3,7 @@ from collections import Counter from argparse import ArgumentParser import matplotlib.pyplot as plt +import re def checkArguments(): @@ -26,7 +27,6 @@ def readFile(filename): return text def getLetters(text, pos): - # return Counter(text[idx : idx + pos] for idx in range(len(text) - 1)) return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)]) if __name__ == "__main__": @@ -39,21 +39,23 @@ if __name__ == "__main__": data = getLetters(text, 3) dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} - lenDataSorted = len(dataSorted) - invalidChar = [" ", "!", "?", ":", "\n", "'", ","] dataSorted = list(dataSorted.items()) - newData1 = [] - newData2 = [] - for entry in dataSorted[lenDataSorted - 10:]: - #newData[entry[0]] = entry[1] - newData1.append(entry[0]) - newData2.append(entry[1]) - print(newData1) - print(newData2) - - x = [x for x in newData1] - y = [tmp for tmp in newData2] + newDataSorted = [] + for entry in dataSorted: + if re.match("[A-Za-z]{2}", entry[0]): + newDataSorted.append(( + entry[0], entry[1] + )) + + lenDataSorted = len(newDataSorted) + + x = [] + y = [] + for entry in newDataSorted[lenDataSorted - 10:]: + x.append(entry[0]) + y.append(entry[1]) + largeur = 0.5 plt.bar(x, y, largeur) plt.show() From c4cb117f5befd0e674e1ea5d83a66a0e3140bda6 Mon Sep 17 00:00:00 2001 From: gbucchino Date: Thu, 30 Jun 2022 12:11:59 +0200 Subject: [PATCH 3/3] Update get grams --- getgrams.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/getgrams.py b/getgrams.py index 274ce03..a3c191e 100644 --- a/getgrams.py +++ b/getgrams.py @@ -42,8 +42,13 @@ if __name__ == "__main__": dataSorted = list(dataSorted.items()) newDataSorted = [] + if args.bigram: + reg = "[A-Za-z]{2}" + if args.trigram: + reg = "[A-Za-z]{3}" + for entry in dataSorted: - if re.match("[A-Za-z]{2}", entry[0]): + if re.match(reg, entry[0]): newDataSorted.append(( entry[0], entry[1] ))