#!/usr/bin/env python3 """Build an SQLite database from definition files.""" import gzip import lzma import os import sqlite3 import sys from pathlib import Path def main(): tlfi_root = os.environ.get("TLFI_ROOT") if not tlfi_root: sys.exit("No TLFI_ROOT environment variable.") tlfi_root_path = Path(tlfi_root) lexical_forms_path = tlfi_root_path / "lexical_forms.txt" definitions_path = tlfi_root_path / "definitions" db_path = Path.cwd() / "tlfi.db" conn = sqlite3.connect(db_path) create_database(conn) fill_database(conn, lexical_forms_path, definitions_path) conn.commit() conn.close() def create_database(conn: sqlite3.Connection): cursor = conn.cursor() cursor.execute( "create table if not exists definitions (" " id integer primary key," " lexical_form text," " definition blob not null" ");" ) cursor.execute( "create index if not exists idx_forms on definitions(lexical_form);" ) def fill_database( conn: sqlite3.Connection, lexical_forms_path: Path, definitions_path: Path ): with open(lexical_forms_path, "rt") as lexical_forms_file: forms = [form.rstrip() for form in lexical_forms_file.readlines()] print(f"{len(forms)} forms in the lexical forms file.") cursor = conn.cursor() for letter in definitions_path.iterdir(): if not letter.is_dir(): continue for form_dir in letter.iterdir(): form = form_dir.name print(form) insert_forms(cursor, form, form_dir) def insert_forms(cursor: sqlite3.Cursor, form: str, form_dir: Path): for gz_file in form_dir.iterdir(): with gzip.open(gz_file, "rt") as form_file: html = form_file.read() def_data = lzma.compress(html.encode()) cursor.execute( "insert into definitions (lexical_form, definition) values (?, ?)", (form, def_data) ) if __name__ == "__main__": main()