From f9685346ce17e125793b6d2d956175cae18b53e3 Mon Sep 17 00:00:00 2001 From: Dario Ernst Date: Sun, 15 Jan 2012 13:24:41 +0100 Subject: [PATCH] starting to write pinyinizer --- pinyinize.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 pinyinize.py diff --git a/pinyinize.py b/pinyinize.py new file mode 100644 index 0000000..2b20856 --- /dev/null +++ b/pinyinize.py @@ -0,0 +1,53 @@ +import sqlite3 +import sys +import os +import re + +if len(sys.argv) != 3: + print "Usage: cedict2sqlite [hanzi-text or file] [sqlite-file.sqlite]" + exit + +hanzitf=sys.argv[1] +sqlitef=sys.argv[2] + + +if os.path.exists(hanzitf): + hanzi = open(hanzitf,"r").read() +else: + hanzi = hanzitf + +hanzi.strip().lower() + +if not os.path.exists(sqlitef): + print "sqlite database file not found" + sys.exit() + +slconn = sqlite3.connect(sqlitef) + +hzwork = list(hanzi.decode("UTF-8")) +finres = "" +while hzwork: + print "\nhzwork still %s"%hzwork + tmpstr = "" + lastres = "" + for c in hzwork: + tmpstr += hzwork[0] + print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) + res = slconn.execute('SELECT pinyin FROM entries WHERE simplified="%s"'%tmpstr).fetchall() + if res != []: + lastres = res[0][0] + hzwork.pop(0) + print "found and consumed %s"%res[0][0] + else: + print "got empty result, breaking" + break + finres += lastres+" " + print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) + +print "\nfinished with: %s\n"%finres + + + + + +