Merge
This commit is contained in:
commit
cedf3a37f9
74
getgrams.py
74
getgrams.py
@ -2,41 +2,65 @@
|
|||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def checkArguments():
|
def checkArguments():
|
||||||
args = ArgumentParser(description='Cryptanalyst')
|
args = ArgumentParser(description='Cryptanalyst')
|
||||||
args.add_argument('-f', '--filename', help='Text to analyze')
|
args.add_argument('-f', '--filename', help='Text to analyze')
|
||||||
args.add_argument('-b', '--bigram', help='Get bigram', action="store_true")
|
args.add_argument('-b', '--bigram', help='Get bigram', action="store_true")
|
||||||
args.add_argument('-t', '--trigram', help='Get trigram', action="store_true")
|
args.add_argument('-t', '--trigram', help='Get trigram', action="store_true")
|
||||||
return args.parse_args()
|
return args.parse_args()
|
||||||
|
|
||||||
def readFile(filename):
|
def readFile(filename):
|
||||||
"""
|
"""
|
||||||
This read the file passed in argument and return the data of it
|
This read the file passed in argument and return the data of it
|
||||||
"""
|
"""
|
||||||
text = str()
|
text = str()
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as f:
|
||||||
data = f.readlines()
|
data = f.readlines()
|
||||||
|
|
||||||
for entry in data:
|
for entry in data:
|
||||||
text += entry
|
text += entry
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def getLetters(text, pos):
|
def getLetters(text, pos):
|
||||||
# return Counter(text[idx : idx + pos] for idx in range(len(text) - 1))
|
return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)])
|
||||||
###### TODO
|
|
||||||
# Sorted by value and to get x data from the dictionary
|
|
||||||
return Counter([text[idx: idx + pos] for idx in range(len(text) - 1)])
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = checkArguments()
|
args = checkArguments()
|
||||||
text = readFile(args.filename)
|
text = readFile(args.filename)
|
||||||
|
|
||||||
if args.bigram:
|
if args.bigram:
|
||||||
data = getLetters(text, 2)
|
data = getLetters(text, 2)
|
||||||
if args.trigram:
|
if args.trigram:
|
||||||
data = getLetters(text, 3)
|
data = getLetters(text, 3)
|
||||||
|
|
||||||
print(data)
|
dataSorted = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])}
|
||||||
|
dataSorted = list(dataSorted.items())
|
||||||
|
newDataSorted = []
|
||||||
|
|
||||||
|
if args.bigram:
|
||||||
|
reg = "[A-Za-z]{2}"
|
||||||
|
if args.trigram:
|
||||||
|
reg = "[A-Za-z]{3}"
|
||||||
|
|
||||||
|
for entry in dataSorted:
|
||||||
|
if re.match(reg, entry[0]):
|
||||||
|
newDataSorted.append((
|
||||||
|
entry[0], entry[1]
|
||||||
|
))
|
||||||
|
|
||||||
|
lenDataSorted = len(newDataSorted)
|
||||||
|
|
||||||
|
x = []
|
||||||
|
y = []
|
||||||
|
for entry in newDataSorted[lenDataSorted - 10:]:
|
||||||
|
x.append(entry[0])
|
||||||
|
y.append(entry[1])
|
||||||
|
|
||||||
|
largeur = 0.5
|
||||||
|
plt.bar(x, y, largeur)
|
||||||
|
plt.show()
|
||||||
|
Loading…
Reference in New Issue
Block a user