import sqlite3 import sys import os import re import copy if len(sys.argv) != 3: print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]" exit hanzitf=sys.argv[1] sqlitef=sys.argv[2] if os.path.exists(hanzitf): hanzi = open(hanzitf,"r").read() else: hanzi = hanzitf hanzi.strip().lower() if not os.path.exists(sqlitef): print "sqlite database file not found" sys.exit() slconn = sqlite3.connect(sqlitef) hzwork = list(hanzi.decode("UTF-8")) finres = "" while hzwork: print "\nhzwork still %s"%hzwork tmpstr = "" lastres = "" for c in copy.deepcopy(hzwork): tmpstr += hzwork[0] print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s" OR traditional="%s";'%(tmpstr,tmpstr)).fetchall() print "res are: %s"%res # now for the result work... if res: # sequence of chars not found, using last result lastres = res[0][0] hzwork.pop(0) print "found and consumed %s"%res[0][0] elif not res and not lastres: # first char not found, using fallback barf lastres = '%s'%tmpstr hzwork.pop(0) print "%s not found, adding raw and breaking"%tmpstr break else: print "got empty result, breaking" lastres += " " break finres += lastres print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) print '\nfinished with:\n """\n%s\n"""\n'%finres