diff --git a/pinyinize.py b/pinyinize.py index 6d801a4..d4d905c 100644 --- a/pinyinize.py +++ b/pinyinize.py @@ -2,6 +2,7 @@ import sqlite3 import sys import os import re +import copy if len(sys.argv) != 3: print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]" @@ -30,15 +31,20 @@ while hzwork: print "\nhzwork still %s"%hzwork tmpstr = "" lastres = "" - for c in hzwork: + + for c in copy.deepcopy(hzwork): tmpstr += hzwork[0] + print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) - res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s"'%tmpstr).fetchall() - if res != []: # sequence of chars not found, using last result + res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s" OR traditional="%s";'%(tmpstr,tmpstr)).fetchall() + print "res are: %s"%res + + # now for the result work... + if res: # sequence of chars not found, using last result lastres = res[0][0] hzwork.pop(0) print "found and consumed %s"%res[0][0] - elif res == [] and not lastres: # first char not found, using fallback barf + elif not res and not lastres: # first char not found, using fallback barf lastres = '[%s]'%tmpstr hzwork.pop(0) print "%s not found, adding raw and breaking"%tmpstr @@ -46,6 +52,7 @@ while hzwork: else: print "got empty result, breaking" break + finres += lastres+" " print "appending and deleting lastres %s, finres now: %s"%(lastres,finres)