You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import sqlite3
|
|
|
|
|
import sys
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) != 3:
|
|
|
|
|
print "Usage: cedict2sqlite [cedict-file] [sqlite-file.sqlite]"
|
|
|
|
|
exit
|
|
|
|
|
|
|
|
|
|
cedictf=sys.argv[1]
|
|
|
|
|
sqlitef=sys.argv[2]
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(cedictf):
|
|
|
|
|
print "Error: cedict does not exist, aborting."
|
|
|
|
|
exit
|
|
|
|
|
|
|
|
|
|
if os.path.exists(sqlitef):
|
|
|
|
|
print "Error: sqlite database already exists. Will NOT overwrite, please remove it or specify a new database file"
|
|
|
|
|
exit
|
|
|
|
|
|
|
|
|
|
slconn = sqlite3.connect(sqlitef)
|
|
|
|
|
slconn.execute("CREATE TABLE entries (simplified text, traditional text, pinyin text, dict text)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
f = open(cedictf,'r')
|
|
|
|
|
regex = re.compile("(?P<simpl> .+?)\ (?P<trad> .+?)\ \[(?P<pinyin> .+)\]\ /(?P<dict> .*)/",re.IGNORECASE|re.UNICODE|re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
print "finished setup, now inserting"
|
|
|
|
|
for l in f.readlines():
|
|
|
|
|
if not l.startswith("#"):
|
|
|
|
|
r=regex.search(l)
|
|
|
|
|
dct = r.groupdict()
|
|
|
|
|
dct.pop('dict')
|
|
|
|
|
dct['dict'] = r.groupdict()['dict'].split("/")
|
|
|
|
|
sqlstr = 'INSERT INTO entries VALUES ("%(simpl)s", "%(trad)s", "%(pinyin)s", "'%dct
|
|
|
|
|
i=0
|
|
|
|
|
for e in dct['dict']:
|
|
|
|
|
sqlstr += e.replace('"', "'")
|
|
|
|
|
if i < len(dct['dict'])-1: sqlstr += "|||"
|
|
|
|
|
i += 1
|
|
|
|
|
sqlstr += '");'
|
|
|
|
|
slconn.execute(sqlstr)
|
|
|
|
|
print "finished inserting, writing back sqlite db"
|
|
|
|
|
slconn.commit()
|
|
|
|
|
slconn.close()
|