3 from itertools import combinations
5 from ocesavac import ocesat
8 ABECEDA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
9 MABECEDA = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ'
11 def nova_tabulka(tabulka, abc=MABECEDA):
12 celkem = sum(sum(tab2.values()) for tab2 in tabulka.values())
16 tabulka[i][j] /= float(celkem)
19 def poradi_dle_frekvence(freq):
20 return sorted(freq.keys(), key=freq.get, reverse=True)
23 def __init__(self, tref, komentar=''):
24 self.abeceda = tref.abeceda
25 self.poradi = poradi_dle_frekvence(tref.frekvence)
26 self.tabulka = nova_tabulka(tref.tabulka, tref.abeceda)
27 self.komentar = komentar
30 def __init__(self, mezery=True):
32 self.abeceda = MABECEDA
34 self.abeceda = ABECEDA
36 self.frekvence = dict()
37 for c in self.abeceda:
40 for i in self.abeceda:
41 self.tabulka[i] = dict()
42 for j in self.abeceda:
43 self.tabulka[i][j] = 0
46 def pridej(self, soubor):
47 f = codecs.open(soubor, encoding='UTF-8')
48 text = ocesat(f.read(), self.mezery)
50 for i in range(len(text) - 1):
51 self.tabulka[text[i]][text[i+1]] += 1
52 self.frekvence[text[i]] += 1
54 self.frekvence[text[i+1]] += 1
55 self.soubory.append(soubor)
63 return pickle.load(open(odkud))