working commit

master
Dario Ernst 14 years ago
parent b36d6bd288
commit 2dc4fe66c2

@ -3,9 +3,11 @@ import sys
import os
import re
import copy
import pprint
from collections import OrderedDict
"""
Library for PinYin-izing a either a file or a block of text
Library for PinYin-izing a either a file or a block of text. Sadly, python > 2.7 is required...
"""
NOT = 0
@ -25,12 +27,8 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F
Will pinyin-ize either a string or a file (open(foo,"X")) given as "hanzitf".
Pinyinization can be controlled by the mode parameter. The following
are possible:
PINYIN:
PINYIN_TRANSL
PINYIN_HANZI
PINYIN_TRANSL_HANZI
TRANSL
TRANSL_HANZI
With the defined return-value semantics.
The pinyinization can be controlled via the simplified, traditional
@ -46,13 +44,13 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F
is copied to output.
"""
if(type(hanzitf)=="file"):
if type(hanzitf)==file:
hanzitf.seek(0)
hanzi = hanzitf.read()
else if(type(hanzitf)=="str"):
elif type(hanzitf)==str:
hanzi = hanzitf
else:
print "hanzitf was neither text nor file"
print "hanzitf was neither text nor file, was: %s"%type(hanzitf)
return
hanzi.strip().lower()
@ -64,7 +62,8 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F
slconn = sqlite3.connect(sqlitef)
hzwork = list(hanzi.decode("UTF-8"))
finres = ""
finres = OrderedDict()
while hzwork:
print "\nhzwork still %s"%hzwork
tmpstr = ""
@ -74,46 +73,55 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F
tmpstr += hzwork[0]
print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__())
sqlstr_s = 'SELECT pinyin FROM entries WHERE simplified="%s";'%(tmpstr,tmpstr)
sqlstr_t = 'SELECT pinyin FROM entries WHERE traditional="%s";'%(tmpstr,tmpstr)
sqlstr_s = 'SELECT pinyin, dict FROM entries WHERE simplified="%s";'%tmpstr
sqlstr_t = 'SELECT pinyin, dict FROM entries WHERE traditional="%s";'%tmpstr
res_s = None
res_t = None
res = None
if simplified =! NOT:
if simplified != NOT:
res_s = slconn.execute(sqlstr_s).fetchall()
if traditional =! NOT:
if traditional != NOT:
res_t = slconn.execute(sqlstr_t).fetchall()
if simplified == PRIMARY:
res = res_s
if not res_s && traditional == FALLBACK:
if not res_s and traditional == FALLBACK:
res = res_t
else if traditional == PRIMARY:
elif traditional == PRIMARY:
res = res_t
if not res_t && simplified == FALLBACK:
if not res_t and simplified == FALLBACK:
res = res_s
print "res are: %s"%res
# now for the result work...
if res: # sequence of chars not found, using last result
lastres = res[0][0]
print res[0]
lastres = [res[0]]
hzwork.pop(0)
print "found and consumed %s"%res[0][0]
elif not res and not lastres: # first char not found, using fallback barf
if original:
lastres = '%s'%tmpstr
lastres = [('%s', 'no translation / string found, using input')]%tmpstr
hzwork.pop(0)
print "%s not found, adding raw and breaking"%tmpstr
break
else:
print "got empty result, breaking"
lastres += " "
lastres.append( (" ","") )
break
finres += lastres
finres[tmpstr] = lastres
print "appending and deleting lastres %s, finres now: %s"%(lastres,finres)
print '\nfinished with:\n """\n%s\n"""\n'%finres
pprint.pprint(finres)
print "\n\n\n"
for k,v in finres.iteritems():
print k
for i in v:
print v
print v[0]
print v[1]
print ""
return finres

Loading…
Cancel
Save