3 from itertools import combinations
5 from ocesavac import ocesat
7 from koincidence import index_koincidence
8 from spolecne import ABECEDA, MABECEDA
10 def nova_tabulka(tabulka, abc=MABECEDA):
11 celkem = sum(sum(tab2.values()) for tab2 in tabulka.values())
15 tabulka[i][j] /= float(celkem)
18 def poradi_dle_frekvence(freq):
19 return sorted(freq.keys(), key=freq.get, reverse=True)
22 def __init__(self, tref, komentar=''):
23 self.abeceda = tref.abeceda
24 self.poradi = poradi_dle_frekvence(tref.frekvence)
25 self.tabulka = nova_tabulka(tref.tabulka, tref.abeceda)
26 self.komentar = komentar
29 def __init__(self, mezery=True):
31 self.abeceda = MABECEDA
33 self.abeceda = ABECEDA
35 self.frekvence = dict()
36 for c in self.abeceda:
39 for i in self.abeceda:
40 self.tabulka[i] = dict()
41 for j in self.abeceda:
42 self.tabulka[i][j] = 0
45 def pridej(self, soubor):
46 f = codecs.open(soubor, encoding='UTF-8')
47 text = ocesat(f.read(), self.mezery)
49 for i in range(len(text) - 1):
50 self.tabulka[text[i]][text[i+1]] += 1
51 self.frekvence[text[i]] += 1
53 self.frekvence[text[i+1]] += 1
54 self.soubory.append(soubor)
62 return pickle.load(open(odkud))
64 def prumerny_index(tref):
66 for soubor in ref.soubory:
67 f = codecs.open(soubor, encoding='UTF-8')
68 text = ocesat(f.read(), tref.mezery)
70 suma += index_koincidence2(text[:len(text)/2], text[len(text)/2:])
71 return float(suma)/len(tref.soubory)