|
|
|
|
import sqlite3
|
|
|
|
|
import sys
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import copy
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) != 3:
|
|
|
|
|
print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]"
|
|
|
|
|
exit
|
|
|
|
|
|
|
|
|
|
hanzitf=sys.argv[1]
|
|
|
|
|
sqlitef=sys.argv[2]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(hanzitf):
|
|
|
|
|
hanzi = open(hanzitf,"r").read()
|
|
|
|
|
else:
|
|
|
|
|
hanzi = hanzitf
|
|
|
|
|
|
|
|
|
|
hanzi.strip().lower()
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(sqlitef):
|
|
|
|
|
print "sqlite database file not found"
|
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
|
slconn = sqlite3.connect(sqlitef)
|
|
|
|
|
|
|
|
|
|
hzwork = list(hanzi.decode("UTF-8"))
|
|
|
|
|
finres = ""
|
|
|
|
|
while hzwork:
|
|
|
|
|
print "\nhzwork still %s"%hzwork
|
|
|
|
|
tmpstr = ""
|
|
|
|
|
lastres = ""
|
|
|
|
|
|
|
|
|
|
for c in copy.deepcopy(hzwork):
|
|
|
|
|
tmpstr += hzwork[0]
|
|
|
|
|
|
|
|
|
|
print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__())
|
|
|
|
|
res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s" OR traditional="%s";'%(tmpstr,tmpstr)).fetchall()
|
|
|
|
|
print "res are: %s"%res
|
|
|
|
|
|
|
|
|
|
# now for the result work...
|
|
|
|
|
if res: # sequence of chars not found, using last result
|
|
|
|
|
lastres = res[0][0]
|
|
|
|
|
hzwork.pop(0)
|
|
|
|
|
print "found and consumed %s"%res[0][0]
|
|
|
|
|
elif not res and not lastres: # first char not found, using fallback barf
|
|
|
|
|
lastres = '%s'%tmpstr
|
|
|
|
|
hzwork.pop(0)
|
|
|
|
|
print "%s not found, adding raw and breaking"%tmpstr
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
print "got empty result, breaking"
|
|
|
|
|
lastres += " "
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
finres += lastres
|
|
|
|
|
print "appending and deleting lastres %s, finres now: %s"%(lastres,finres)
|
|
|
|
|
|
|
|
|
|
print '\nfinished with:\n """\n%s\n"""\n'%finres
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|