diff --git a/pynizelib.py b/pynizelib.py index 51567a2..04be42d 100644 --- a/pynizelib.py +++ b/pynizelib.py @@ -3,8 +3,8 @@ import sys import os import re import copy -import pprint from collections import OrderedDict +from json import JSONEncoder """ Library for PinYin-izing a either a file or a block of text. Sadly, python > 2.7 is required... @@ -22,14 +22,9 @@ TRANS = 4 TRANSL_HANZI = 5 -def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=FALLBACK, original = True): +def pinyinize(hanzitf, sqlitef, simplified=PRIMARY, traditional=FALLBACK, original = True, json = False): """ Will pinyin-ize either a string or a file (open(foo,"X")) given as "hanzitf". - Pinyinization can be controlled by the mode parameter. The following - are possible: - - - With the defined return-value semantics. The pinyinization can be controlled via the simplified, traditional and original paramters. simplified and traditional can be set on @@ -42,6 +37,18 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F something as simplified, then traditional is attempt. If original is set to True instead of False, then the original input hanzi is copied to output. + + The json parameter makes the function return its data "json" formatted for use in web systems + + The return value is a OrderedDict, the recognized input sequences + are the keys, the values are lists of lists. The Structure is: + {'input_str[n:m]': [ + [ 'pinyin', 'translation' ], + [ 'pinyin', 'translation' ], + ... + [ ' ', '' ] # space at the end so you can nicely #print these + ], + ... """ if type(hanzitf)==file: @@ -50,13 +57,13 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F elif type(hanzitf)==str: hanzi = hanzitf else: - print "hanzitf was neither text nor file, was: %s"%type(hanzitf) + #print "hanzitf was neither text nor file, was: %s"%type(hanzitf) return hanzi.strip().lower() if not os.path.exists(sqlitef): - print "sqlite database file not found" + #print "sqlite database file not found" return None slconn = sqlite3.connect(sqlitef) @@ -65,14 +72,14 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F finres = OrderedDict() while hzwork: - print "\nhzwork still %s"%hzwork + #print "\nhzwork still %s"%hzwork tmpstr = "" lastres = "" for c in copy.deepcopy(hzwork): tmpstr += hzwork[0] - print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) + #print "checking for %s (%s)"%(tmpstr,tmpstr.__repr__()) sqlstr_s = 'SELECT pinyin, dict FROM entries WHERE simplified="%s";'%tmpstr sqlstr_t = 'SELECT pinyin, dict FROM entries WHERE traditional="%s";'%tmpstr res_s = None @@ -93,35 +100,30 @@ def pinyinize(hanzitf, sqlitef, mode = PINYIN, simplified=PRIMARY, traditional=F if not res_t and simplified == FALLBACK: res = res_s - print "res are: %s"%res + #print "res are: %s"%res # now for the result work... if res: # sequence of chars not found, using last result - print res[0] - lastres = [res[0]] + #print res[0] + lastres = [ list(res[0]) ] hzwork.pop(0) - print "found and consumed %s"%res[0][0] + #print "found and consumed %s"%res[0][0] elif not res and not lastres: # first char not found, using fallback barf if original: - lastres = [('%s', 'no translation / string found, using input')]%tmpstr + lastres = [['%s', 'no translation / string found, using input']]%tmpstr hzwork.pop(0) - print "%s not found, adding raw and breaking"%tmpstr + #print "%s not found, adding raw and breaking"%tmpstr break else: - print "got empty result, breaking" - lastres.append( (" ","") ) + #print "got empty result, breaking" + lastres.append( [" ",""] ) break finres[tmpstr] = lastres - print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) - - pprint.pprint(finres) - print "\n\n\n" - for k,v in finres.iteritems(): - print k - for i in v: - print v - print v[0] - print v[1] - print "" - return finres + #print "appending and deleting lastres %s, finres now: %s"%(lastres,finres) + + if not json: + return finres + else: + enc = JSONEncoder() + return enc.encode( finres )