referencni.py

   1 #!/usr/bin/env python
   2
   3 from itertools import combinations
   4 import pickle
   5 from ocesavac import ocesat
   6 import codecs
   7 from koincidence import index_koincidence
   8
   9 ABECEDA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  10 MABECEDA = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  11
  12 def nova_tabulka(tabulka, abc=MABECEDA):
  13     celkem = sum(sum(tab2.values()) for tab2 in tabulka.values())
  14     if celkem != 0:
  15         for i in abc:
  16             for j in abc:
  17                 tabulka[i][j] /= float(celkem)
  18     return tabulka
  19
  20 def poradi_dle_frekvence(freq):
  21     return sorted(freq.keys(), key=freq.get, reverse=True)
  22
  23 class reference:
  24     def __init__(self, tref, komentar=''):
  25         self.abeceda = tref.abeceda
  26         self.poradi = poradi_dle_frekvence(tref.frekvence)
  27         self.tabulka = nova_tabulka(tref.tabulka, tref.abeceda)
  28         self.komentar = komentar
  29
  30 class treference:
  31     def __init__(self, mezery=True):
  32         if mezery:
  33             self.abeceda = MABECEDA
  34         else:
  35             self.abeceda = ABECEDA
  36         self.mezery = mezery
  37         self.frekvence = dict()
  38         for c in self.abeceda:
  39             self.frekvence[c] = 0
  40         self.tabulka = dict()
  41         for i in self.abeceda:
  42             self.tabulka[i] = dict()
  43             for j in self.abeceda:
  44                 self.tabulka[i][j] = 0
  45         self.soubory = []
  46
  47     def pridej(self, soubor):
  48         f = codecs.open(soubor, encoding='UTF-8')
  49         text = ocesat(f.read(), self.mezery)
  50         f.close()
  51         for i in range(len(text) - 1):
  52             self.tabulka[text[i]][text[i+1]] += 1
  53             self.frekvence[text[i]] += 1
  54         else:
  55             self.frekvence[text[i+1]] += 1
  56         self.soubory.append(soubor)
  57
  58 def uloz(co, kam):
  59     f = open(kam, 'w')
  60     pickle.dump(co, f)
  61     f.close()
  62
  63 def nacti(odkud):
  64     return pickle.load(open(odkud))
  65
  66 def prumerny_index(tref):
  67     suma = 0
  68     for soubor in ref.soubory:
  69         f = codecs.open(soubor, encoding='UTF-8')
  70         text = ocesat(f.read(), tref.mezery)
  71         f.close()
  72         suma += index_koincidence(text[:len(text)/2], text[len(text)/2:])
  73     return float(suma)/len(tref.soubory)