3 from itertools import combinations
5 from ocesavac import ocesat
7 from koincidence import index_koincidence
9 ABECEDA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
10 MABECEDA = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ'
12 def nova_tabulka(tabulka, abc=MABECEDA):
13 celkem = sum(sum(tab2.values()) for tab2 in tabulka.values())
17 tabulka[i][j] /= float(celkem)
20 def poradi_dle_frekvence(freq):
21 return sorted(freq.keys(), key=freq.get, reverse=True)
24 def __init__(self, tref, komentar=''):
25 self.abeceda = tref.abeceda
26 self.poradi = poradi_dle_frekvence(tref.frekvence)
27 self.tabulka = nova_tabulka(tref.tabulka, tref.abeceda)
28 self.komentar = komentar
31 def __init__(self, mezery=True):
33 self.abeceda = MABECEDA
35 self.abeceda = ABECEDA
37 self.frekvence = dict()
38 for c in self.abeceda:
41 for i in self.abeceda:
42 self.tabulka[i] = dict()
43 for j in self.abeceda:
44 self.tabulka[i][j] = 0
47 def pridej(self, soubor):
48 f = codecs.open(soubor, encoding='UTF-8')
49 text = ocesat(f.read(), self.mezery)
51 for i in range(len(text) - 1):
52 self.tabulka[text[i]][text[i+1]] += 1
53 self.frekvence[text[i]] += 1
55 self.frekvence[text[i+1]] += 1
56 self.soubory.append(soubor)
64 return pickle.load(open(odkud))
66 def prumerny_index(tref):
68 for soubor in ref.soubory:
69 f = codecs.open(soubor, encoding='UTF-8')
70 text = ocesat(f.read(), tref.mezery)
72 suma += index_koincidence(text[:len(text)/2], text[len(text)/2:])
73 return float(suma)/len(tref.soubory)