cryptanalysis/getgrams.py
2022-06-30 12:11:59 +02:00

67 lines
1.6 KiB
Python

# coding: utf-8
from collections import Counter
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import re
def checkArguments():
args = ArgumentParser(description='Cryptanalyst')
args.add_argument('-f', '--filename', help='Text to analyze')
args.add_argument('-b', '--bigram', help='Get bigram', action="store_true")
args.add_argument('-t', '--trigram', help='Get trigram', action="store_true")
return args.parse_args()
def readFile(filename):
"""
This read the file passed in argument and return the data of it
"""
text = str()
with open(filename, 'r') as f:
data = f.readlines()
for entry in data:
text += entry
return text
def getLetters(text, pos):
return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)])
if __name__ == "__main__":
args = checkArguments()
text = readFile(args.filename)
if args.bigram:
data = getLetters(text, 2)
if args.trigram:
data = getLetters(text, 3)
dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])}
dataSorted = list(dataSorted.items())
newDataSorted = []
if args.bigram:
reg = "[A-Za-z]{2}"
if args.trigram:
reg = "[A-Za-z]{3}"
for entry in dataSorted:
if re.match(reg, entry[0]):
newDataSorted.append((
entry[0], entry[1]
))
lenDataSorted = len(newDataSorted)
x = []
y = []
for entry in newDataSorted[lenDataSorted - 10:]:
x.append(entry[0])
y.append(entry[1])
largeur = 0.5
plt.bar(x, y, largeur)
plt.show()