Another programming challenge from work to solve Ceaser ciphered sentences and return the correct shift value. Automatic solving of the cypher is easy enough but the hard part comes to automatically detecting if the resulting shifted sentence is English. I have posted before about detecting English using ngrams and used a similar process here.
Here is the code which contains the four test cases.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
#!/bin/python class Ngram: # Sources # https://en.wikipedia.org/wiki/Letter_frequency # https://en.wikipedia.org/wiki/Bigram # https://en.wikipedia.org/wiki/Trigram def __init__(self): self.ngrams = ['the', 'and', 'tha', 'ent', 'ing', 'ion', 'tio', 'for', 'nde', 'has', 'nce', 'edt', 'tis', 'oft', 'sth', 'men', 'th', 'he', 'ed', 'of', 'in', 'to', 'er', 'as', 'her', 'ng', 'of', 'an', 'in', 'ld', 'de', 'ra', 'de', 'se', 'ur', 'ea', 'hi', 'or', 'to', 'st', 'nt', 'll', 'ee', 'ss', 'oo', 'tt', 'ff', 'rr', 'nn', 'pp', 'cc'] self.probability = 0.0 def parse_text(self, text): count = 0.0 self.probability = 0.0 for ngram in self.ngrams: count += text.count(ngram) self.probability = (count / len(self.ngrams)) * 100 return self.probability def get_probability(self): return self.probability class CeaserCipher: def __init__(self): self.ngram = Ngram() self.alphabet = list('abcdefghijklmnopqrstuvwxyz') def shiftWord(self, text, shift): l_word = list(text) for i, c in enumerate(l_word): if c in self.alphabet: p = (self.alphabet.index(c) + shift) % 26 l_word[i] = self.alphabet[p] return "".join(l_word) def decrypt(self, text): highest_probability = 0 best_phrase = '' best_shift = 0 for shift in range(-13, 14): shifted = self.shiftWord(text.lower(), shift) self.ngram.parse_text(shifted) if self.ngram.get_probability() > highest_probability: highest_probability = self.ngram.get_probability() best_phrase = shifted best_shift = shift #print "{0} : SHIFT - {1} IS ENGLISH? - {2}".format(shifted, shift, self.ngram.parse_text(shifted)) print "PHRASE: {0}\nSHIFT: {1}\n".format(best_phrase, best_shift) if __name__ == "__main__": phrases = ["BYFFI QILFX", "DRO OKQVO RKC VKXNON", "MCI AIGH IBQCJSF HVS HFIHV HC IBQCJSF HVS ZWS", "GOOD WORK"] ceaser = CeaserCipher() for phrase in phrases: ceaser.decrypt(phrase) |
Questions? Comments?