52 lines
1.0 KiB
Python
52 lines
1.0 KiB
Python
# coding: utf-8
|
|
|
|
from argparse import ArgumentParser
|
|
from unidecode import unidecode
|
|
from re import sub
|
|
|
|
|
|
def checkArguments():
|
|
args = ArgumentParser(description='Cryptanalyst')
|
|
args.add_argument('-f', '--filename', help='Text to analyze')
|
|
return args.parse_args()
|
|
|
|
def main():
|
|
args = checkArguments()
|
|
|
|
# Read file
|
|
dataFile = str()
|
|
with open(args.filename, 'r') as f:
|
|
data = f.readlines()
|
|
|
|
for d in data:
|
|
u = unidecode(d.lower(), 'utf-8')
|
|
dataFile += sub(r'[^a-z]', '', u)
|
|
|
|
# Get number of letters
|
|
totalLetters = 0
|
|
for letter in dataFile:
|
|
totalLetters += 1
|
|
|
|
counts = []
|
|
|
|
for i in range (0, 26):
|
|
counts.append(0)
|
|
|
|
# Count the number of letter in our text
|
|
letterToAscii = ord('a') # Get the ASCII code
|
|
for i in dataFile:
|
|
l = ord(i) - letterToAscii
|
|
if l >= 0 and l <= 26:
|
|
counts[l] += 1
|
|
|
|
# Get the sum
|
|
sum = 0
|
|
for i in range (0, 26):
|
|
sum += counts[i] * (counts[i] - 1)
|
|
|
|
ic = sum / (totalLetters * (totalLetters - 1))
|
|
print(f"Index of coincidence: {ic}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|