You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

47 lines
1.3 KiB

import sqlite3
import sys
import os
import re
if len(sys.argv) != 3:
print "Usage: cedict2sqlite [cedict-file] [sqlite-file.sqlite]"
exit
cedictf=sys.argv[1]
sqlitef=sys.argv[2]
if not os.path.exists(cedictf):
print "Error: cedict does not exist, aborting."
exit
if os.path.exists(sqlitef):
print "Error: sqlite database already exists. Will NOT overwrite, please remove it or specify a new database file"
exit
slconn = sqlite3.connect(sqlitef)
slconn.execute("CREATE TABLE entries (simplified text, traditional text, pinyin text, dict text)")
f = open(cedictf,'r')
regex = re.compile("(?P<simpl> .+?)\ (?P<trad> .+?)\ \[(?P<pinyin> .+)\]\ /(?P<dict> .*)/",re.IGNORECASE|re.UNICODE|re.VERBOSE)
print "finished setup, now inserting"
for l in f.readlines():
if not l.startswith("#"):
r=regex.search(l)
dct = r.groupdict()
dct.pop('dict')
dct['dict'] = r.groupdict()['dict'].split("/")
sqlstr = 'INSERT INTO entries VALUES ("%(simpl)s", "%(trad)s", "%(pinyin)s", "'%dct
i=0
for e in dct['dict']:
sqlstr += e.replace('"', "'")
if i < len(dct['dict'])-1: sqlstr += "|||"
i += 1
sqlstr += '");'
slconn.execute(sqlstr)
print "finished inserting, writing back sqlite db"
slconn.commit()
slconn.close()