From 6b73baf72add58306ae012015feab94bf454fdcf Mon Sep 17 00:00:00 2001 From: Dario Ernst Date: Fri, 13 Jan 2012 21:52:01 +0100 Subject: [PATCH] initial ci --- cedict2sqlite.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 cedict2sqlite.py diff --git a/cedict2sqlite.py b/cedict2sqlite.py new file mode 100644 index 0000000..91e7e89 --- /dev/null +++ b/cedict2sqlite.py @@ -0,0 +1,44 @@ +import sqlite3 +import sys +import os +import re + +if len(sys.argv) != 3: + print "Usage: cedict2sqlite [cedict-file] [sqlite-file.sqlite]" + exit + +cedictf=sys.argv[1] +sqlitef=sys.argv[2] + +if not os.path.exists(cedictf): + print "Error: cedict does not exist, aborting." + exit + +if os.path.exists(sqlitef): + print "Error: sqlite database already exists. Will NOT overwrite, please remove it or specify a new database file" + exit + +slconn = sqlite3.connect(sqlitef) +slconn.execute("CREATE TABLE entries (simplified text, traditional text, pinyin text, dict text)") + + + +f = open(cedictf,'r') +regex = re.compile("(?P .+?)\ (?P .+?)\ \[(?P .+)\]\ /(?P .*)/",re.IGNORECASE|re.UNICODE|re.VERBOSE) + +print "finished setup, now inserting" +for l in f.readlines(): + if not l.startswith("#"): + r=regex.search(l) + dct = r.groupdict() + dct.pop('dict') + dct['dict'] = r.groupdict()['dict'].split("/") + sqlstr = 'INSERT INTO entries VALUES ("%(simpl)s", "%(trad)s", "%(pinyin)s", "'%dct + for i in dct['dict']: + sqlstr += i.replace('"', "'")+"|||" + sqlstr += '");' + print sqlstr + slconn.execute(sqlstr) +print "finished inserting, writing back sqlite db" +slconn.commit() +slconn.close()