# coding: utf-8 from argparse import ArgumentParser from unidecode import unidecode from re import sub def checkArguments(): args = ArgumentParser(description='Cryptanalyst') args.add_argument('-f', '--filename', help='Text to analyze') return args.parse_args() def main(): args = checkArguments() # Read file dataFile = str() with open(args.filename, 'r') as f: data = f.readlines() for d in data: u = unidecode(d.lower(), 'utf-8') dataFile += sub(r'[^a-z]', '', u) # Get number of letters totalLetters = 0 for letter in dataFile: totalLetters += 1 counts = [] for i in range (0, 26): counts.append(0) # Count the number of letter in our text letterToAscii = ord('a') # Get the ASCII code for i in dataFile: l = ord(i) - letterToAscii if l >= 0 and l <= 26: counts[l] += 1 # Get the sum sum = 0 for i in range (0, 26): sum += counts[i] * (counts[i] - 1) ic = sum / (totalLetters * (totalLetters - 1)) print(f"Index of coincidence: {ic}") if __name__ == "__main__": main()