diff --git a/pynizelib.py b/pynizelib.py index 47fee38..51567a2 100644 --- a/pynizelib.py +++ b/pynizelib.py @@ -3,9 +3,11 @@ import sys import os import re import copy +import pprint +from collections import OrderedDict """ -Library for PinYin-izing a either a file or a block of text +Library for PinYin-izing a either a file or a block of text. Sadly, python > 2.7 is required... """ NOT = 0 @@ -25,12 +27,8 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F Will pinyin-ize either a string or a file (open(foo,"X")) given as "hanzitf". Pinyinization can be controlled by the mode parameter. The following are possible: - PINYIN: - PINYIN_TRANSL - PINYIN_HANZI - PINYIN_TRANSL_HANZI - TRANSL - TRANSL_HANZI + + With the defined return-value semantics. The pinyinization can be controlled via the simplified, traditional @@ -46,13 +44,13 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F is copied to output. """ - if(type(hanzitf)=="file"): + if type(hanzitf)==file: hanzitf.seek(0) hanzi = hanzitf.read() - else if(type(hanzitf)=="str"): + elif type(hanzitf)==str: hanzi = hanzitf else: - print "hanzitf was neither text nor file" + print "hanzitf was neither text nor file, was: %s"%type(hanzitf) return hanzi.strip().lower() @@ -64,7 +62,8 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F slconn = sqlite3.connect(sqlitef) hzwork = list(hanzi.decode("UTF-8")) - finres = "" + finres = OrderedDict() + while hzwork: print "\nhzwork still %s"%hzwork tmpstr = "" @@ -74,46 +73,55 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F tmpstr += hzwork[0] print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) - sqlstr_s = 'SELECT pinyin FROM entries WHERE simplified="%s";'%(tmpstr,tmpstr) - sqlstr_t = 'SELECT pinyin FROM entries WHERE traditional="%s";'%(tmpstr,tmpstr) + sqlstr_s = 'SELECT pinyin, dict FROM entries WHERE simplified="%s";'%tmpstr + sqlstr_t = 'SELECT pinyin, dict FROM entries WHERE traditional="%s";'%tmpstr res_s = None res_t = None res = None - if simplified =! NOT: + if simplified != NOT: res_s = slconn.execute(sqlstr_s).fetchall() - if traditional =! NOT: + if traditional != NOT: res_t = slconn.execute(sqlstr_t).fetchall() if simplified == PRIMARY: res = res_s - if not res_s && traditional == FALLBACK: + if not res_s and traditional == FALLBACK: res = res_t - else if traditional == PRIMARY: + elif traditional == PRIMARY: res = res_t - if not res_t && simplified == FALLBACK: + if not res_t and simplified == FALLBACK: res = res_s print "res are: %s"%res # now for the result work... if res: # sequence of chars not found, using last result - lastres = res[0][0] + print res[0] + lastres = [res[0]] hzwork.pop(0) print "found and consumed %s"%res[0][0] elif not res and not lastres: # first char not found, using fallback barf if original: - lastres = '%s'%tmpstr + lastres = [('%s', 'no translation / string found, using input')]%tmpstr hzwork.pop(0) print "%s not found, adding raw and breaking"%tmpstr break else: print "got empty result, breaking" - lastres += " " + lastres.append( (" ","") ) break - finres += lastres + finres[tmpstr] = lastres print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) - print '\nfinished with:\n """\n%s\n"""\n'%finres + pprint.pprint(finres) + print "\n\n\n" + for k,v in finres.iteritems(): + print k + for i in v: + print v + print v[0] + print v[1] + print "" return finres