X-Git-Url: http://git.tomasm.cz/krypto.git/blobdiff_plain/09f076927cd6294ae6c3db9268496dc3a8fc87f4..e3f91bea0ff143bca677d5d14151990a3f08108e:/ocesavac.py?ds=sidebyside diff --git a/ocesavac.py b/ocesavac.py index 31e49fb..f762333 100644 --- a/ocesavac.py +++ b/ocesavac.py @@ -1,26 +1,41 @@ -import unicodedata +"""Modul pro odstranovani diakritiky a zvlastnich znaku.""" -ABECEDA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +import unicodedata +from spolecne import ABECEDA def deaccent(unistr): + """Vrati text bez akcentu. Pochybna metoda pouzivajici unicodedata.""" return ''.join(aChar for aChar in unicodedata.normalize('NFD', unistr) if not unicodedata.combining(aChar)) def ocesat(text, mezery=True): + """Odstrani z textu akcenty, zvlastni znaky nahradi mezerami, posloupnost + mezer jednou mezerou a prevede vsechna pismena na velka.""" text = deaccent(unicode(text)).upper() if mezery: pole = [' '] - lastWh = True - for c in text: - if c in ABECEDA: - pole.append(c) - lastWh = False - elif not lastWh: + last_white = True + for char in text: + if char in ABECEDA: + pole.append(char) + last_white = False + elif not last_white: pole.append(' ') - lastWh = True - if not lastWh: + last_white = True + if not last_white: pole.append(' ') return ''.join(pole) else: # bez mezer return ''.join([c for c in text if c in ABECEDA]) + +def interactive(): + import sys + import codecs + sys.stdin = codecs.getreader('utf-8')(sys.stdin) + sys.stdout = codecs.getwriter('utf-8')(sys.stdout) + for line in sys.stdin.readlines(): + print " ".join(deaccent(line).lower().split()) + +if __name__ == '__main__': + interactive()