You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.4 KiB

import sqlite3
import sys
import os
import re
if len(sys.argv) != 3:
print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]"
exit
hanzitf=sys.argv[1]
sqlitef=sys.argv[2]
if os.path.exists(hanzitf):
hanzi = open(hanzitf,"r").read()
else:
hanzi = hanzitf
hanzi.strip().lower()
if not os.path.exists(sqlitef):
print "sqlite database file not found"
sys.exit()
slconn = sqlite3.connect(sqlitef)
hzwork = list(hanzi.decode("UTF-8"))
finres = ""
while hzwork:
print "\nhzwork still %s"%hzwork
tmpstr = ""
lastres = ""
for c in hzwork:
tmpstr += hzwork[0]
print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__())
res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s"'%tmpstr).fetchall()
if res != []: # sequence of chars not found, using last result
lastres = res[0][0]
hzwork.pop(0)
print "found and consumed %s"%res[0][0]
elif res == [] and not lastres: # first char not found, using fallback barf
lastres = '[%s]'%tmpstr
hzwork.pop(0)
print "%s not found, adding raw and breaking"%tmpstr
break
else:
print "got empty result, breaking"
break
finres += lastres+" "
print "appending and deleting lastres %s, finres now: %s"%(lastres,finres)
print "\nfinished with: %s\n"%finres