commit
6b73baf72a
@ -0,0 +1,44 @@
|
||||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print "Usage: cedict2sqlite [cedict-file] [sqlite-file.sqlite]"
|
||||
exit
|
||||
|
||||
cedictf=sys.argv[1]
|
||||
sqlitef=sys.argv[2]
|
||||
|
||||
if not os.path.exists(cedictf):
|
||||
print "Error: cedict does not exist, aborting."
|
||||
exit
|
||||
|
||||
if os.path.exists(sqlitef):
|
||||
print "Error: sqlite database already exists. Will NOT overwrite, please remove it or specify a new database file"
|
||||
exit
|
||||
|
||||
slconn = sqlite3.connect(sqlitef)
|
||||
slconn.execute("CREATE TABLE entries (simplified text, traditional text, pinyin text, dict text)")
|
||||
|
||||
|
||||
|
||||
f = open(cedictf,'r')
|
||||
regex = re.compile("(?P<simpl> .+?)\ (?P<trad> .+?)\ \[(?P<pinyin> .+)\]\ /(?P<dict> .*)/",re.IGNORECASE|re.UNICODE|re.VERBOSE)
|
||||
|
||||
print "finished setup, now inserting"
|
||||
for l in f.readlines():
|
||||
if not l.startswith("#"):
|
||||
r=regex.search(l)
|
||||
dct = r.groupdict()
|
||||
dct.pop('dict')
|
||||
dct['dict'] = r.groupdict()['dict'].split("/")
|
||||
sqlstr = 'INSERT INTO entries VALUES ("%(simpl)s", "%(trad)s", "%(pinyin)s", "'%dct
|
||||
for i in dct['dict']:
|
||||
sqlstr += i.replace('"', "'")+"|||"
|
||||
sqlstr += '");'
|
||||
print sqlstr
|
||||
slconn.execute(sqlstr)
|
||||
print "finished inserting, writing back sqlite db"
|
||||
slconn.commit()
|
||||
slconn.close()
|
||||
Loading…
Reference in new issue