3 """Modul pro praci s referencnimi tabulkami."""
6 from ocesavac import ocesat
8 from koincidence import index_koincidence2
9 from spolecne import ABECEDA, MABECEDA
11 def nova_tabulka(tabulka, abc=MABECEDA):
12 """Z tabulky cetnosti vyskytu udela tabulku percentualni cetnosti."""
13 celkem = sum(sum(tab2.values()) for tab2 in tabulka.values())
17 tabulka[i][j] /= float(celkem)
20 def poradi_dle_frekvence(freq):
21 return sorted(freq.keys(), key=freq.get, reverse=True)
24 def __init__(self, tref, komentar=''):
25 self.abeceda = tref.abeceda
26 self.poradi = poradi_dle_frekvence(tref.frekvence)
27 self.tabulka = nova_tabulka(tref.tabulka, tref.abeceda)
28 self.komentar = komentar
31 def __init__(self, mezery=True):
33 self.abeceda = MABECEDA
35 self.abeceda = ABECEDA
37 self.frekvence = dict()
38 for c in self.abeceda:
41 for i in self.abeceda:
42 self.tabulka[i] = dict()
43 for j in self.abeceda:
44 self.tabulka[i][j] = 0
47 def pridej(self, soubor):
48 f = codecs.open(soubor, encoding='UTF-8')
49 text = ocesat(f.read(), self.mezery)
51 for i in range(len(text) - 1):
52 self.tabulka[text[i]][text[i+1]] += 1
53 self.frekvence[text[i]] += 1
55 self.frekvence[text[i+1]] += 1
56 self.soubory.append(soubor)
59 soubor = open(kam, 'w')
60 pickle.dump(co, soubor)
64 return pickle.load(open(odkud))
68 slovnik = dict(zip(r.abeceda, r.frekvence))
69 for i in range(len(r.abeceda)):
70 print "{0} {1:>6.2%} {2} {3:>6.2%}".format(
74 slovnik[r.poradi[i]] )
76 print "Index koincidence: {0:>5.2%}".format(r.index_koinc)
78 for char in r.abeceda:
79 print char + " " + " ".join(
80 ["{:0>5.2f}".format(r.tabulka[char][ch2] * 1000)
81 for ch2 in r.abeceda])
83 def prumerny_index(tref):
85 for soubor in tref.soubory:
86 f = codecs.open(soubor, encoding='UTF-8')
87 text = ocesat(f.read(), tref.mezery)
89 suma += index_koincidence2(text[:len(text)/2], text[len(text)/2:])
90 return float(suma)/len(tref.soubory)