import sqlite3 import sys import os import re if len(sys.argv) != 3: print "Usage: cedict2sqlite [cedict-file] [sqlite-file.sqlite]" exit cedictf=sys.argv[1] sqlitef=sys.argv[2] if not os.path.exists(cedictf): print "Error: cedict does not exist, aborting." exit if os.path.exists(sqlitef): print "Error: sqlite database already exists. Will NOT overwrite, please remove it or specify a new database file" exit slconn = sqlite3.connect(sqlitef) slconn.execute("CREATE TABLE entries (simplified text, traditional text, pinyin text, dict text)") f = open(cedictf,'r') regex = re.compile("(?P .+?)\ (?P .+?)\ \[(?P .+)\]\ /(?P .*)/",re.IGNORECASE|re.UNICODE|re.VERBOSE) print "finished setup, now inserting" for l in f.readlines(): if not l.startswith("#"): r=regex.search(l) dct = r.groupdict() dct.pop('dict') dct['dict'] = r.groupdict()['dict'].split("/") sqlstr = 'INSERT INTO entries VALUES ("%(simpl)s", "%(trad)s", "%(pinyin)s", "'%dct for i in dct['dict']: sqlstr += i.replace('"', "'")+"|||" sqlstr += '");' print sqlstr slconn.execute(sqlstr) print "finished inserting, writing back sqlite db" slconn.commit() slconn.close()