import sqlite3 import sys import os import re if len(sys.argv) != 3: print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]" exit hanzitf=sys.argv[1] sqlitef=sys.argv[2] if os.path.exists(hanzitf): hanzi = open(hanzitf,"r").read() else: hanzi = hanzitf hanzi.strip().lower() if not os.path.exists(sqlitef): print "sqlite database file not found" sys.exit() slconn = sqlite3.connect(sqlitef) hzwork = list(hanzi.decode("UTF-8")) finres = "" while hzwork: print "\nhzwork still %s"%hzwork tmpstr = "" lastres = "" for c in hzwork: tmpstr += hzwork[0] print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s"'%tmpstr).fetchall() if res != []: # sequence of chars not found, using last result lastres = res[0][0] hzwork.pop(0) print "found and consumed %s"%res[0][0] elif res == [] and not lastres: # first char not found, using fallback barf lastres = '[%s]'%tmpstr hzwork.pop(0) print "%s not found, adding raw and breaking"%tmpstr break else: print "got empty result, breaking" break finres += lastres+" " print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) print "\nfinished with: %s\n"%finres