Spaces:
Sleeping
Sleeping
| import sqlite3 | |
| import json | |
| import re | |
| from pathlib import Path | |
| DB_PATH = Path(__file__).parent / "nl_qa.db" | |
| SEED_DIR = Path(__file__).parent | |
| def get_conn() -> sqlite3.Connection: | |
| # timeout ์ํฅ + WAL ๋ชจ๋๋ก ๋ค์ค ์ ์์ ํ๊ฒฝ ๋์์ฑ ๋ณด๊ฐ | |
| conn = sqlite3.connect(DB_PATH, timeout=30.0, check_same_thread=False) | |
| conn.row_factory = sqlite3.Row | |
| try: | |
| conn.execute("PRAGMA journal_mode=WAL") | |
| conn.execute("PRAGMA synchronous=NORMAL") | |
| conn.execute("PRAGMA busy_timeout=30000") | |
| except Exception: | |
| pass | |
| return conn | |
| # ๋ต๋ณ ๋ณธ๋ฌธ์์ ISBN(10/13์๋ฆฌ) ๋ฐ ์ฒญ๊ตฌ๊ธฐํธ ํจํด ์ถ์ถ์ฉ ์ ๊ท์ | |
| _ISBN_RE = re.compile(r"(?<!\d)(97[89][\-\s]?\d{1,5}[\-\s]?\d{1,7}[\-\s]?\d{1,7}[\-\s]?\d|\d{10}|\d{9}X)(?!\d)") | |
| _CALLNO_RE = re.compile(r"\b\d{3}(?:\.\d{1,4})?\s?[๊ฐ-ํฃA-Za-z]\d{1,5}(?:[๊ฐ-ํฃA-Za-z])?\b") | |
| def extract_metadata(answer_text: str) -> dict: | |
| """๋ต๋ณ ๋ณธ๋ฌธ์์ ISBNยท์ฒญ๊ตฌ๊ธฐํธ๋ฅผ ์ถ์ถํ์ฌ dict๋ก ๋ฐํ.""" | |
| if not answer_text: | |
| return {"isbns": [], "call_numbers": []} | |
| isbns = list({m.replace(" ", "").replace("-", "") for m in _ISBN_RE.findall(answer_text)}) | |
| callnos = list({m.strip() for m in _CALLNO_RE.findall(answer_text)}) | |
| return {"isbns": isbns[:10], "call_numbers": callnos[:10]} | |
| def init_db() -> None: | |
| with get_conn() as conn: | |
| conn.execute(""" | |
| CREATE TABLE IF NOT EXISTS qa_items ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| rec_key TEXT UNIQUE NOT NULL, | |
| question TEXT NOT NULL DEFAULT '', | |
| answer TEXT NOT NULL DEFAULT '', | |
| subject TEXT DEFAULT '', | |
| reg_date TEXT DEFAULT '', | |
| answer_date TEXT DEFAULT '', | |
| answer_lib TEXT DEFAULT '', | |
| isbns TEXT DEFAULT '', | |
| call_numbers TEXT DEFAULT '', | |
| updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
| created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| )""") | |
| # ๊ธฐ์กด DB ํธํ: ์ปฌ๋ผ์ด ์์ผ๋ฉด ์ถ๊ฐ | |
| for ddl in ( | |
| "ALTER TABLE qa_items ADD COLUMN isbns TEXT DEFAULT ''", | |
| "ALTER TABLE qa_items ADD COLUMN call_numbers TEXT DEFAULT ''", | |
| "ALTER TABLE qa_items ADD COLUMN updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP", | |
| ): | |
| try: | |
| conn.execute(ddl) | |
| except sqlite3.OperationalError: | |
| pass | |
| conn.execute(""" | |
| CREATE VIRTUAL TABLE IF NOT EXISTS qa_fts USING fts5( | |
| question, | |
| answer, | |
| subject, | |
| content = qa_items, | |
| content_rowid = id, | |
| tokenize = 'unicode61' | |
| )""") | |
| conn.execute(""" | |
| CREATE TRIGGER IF NOT EXISTS qa_ai AFTER INSERT ON qa_items BEGIN | |
| INSERT INTO qa_fts(rowid, question, answer, subject) | |
| VALUES (new.id, new.question, new.answer, new.subject); | |
| END""") | |
| conn.execute(""" | |
| CREATE TRIGGER IF NOT EXISTS qa_au AFTER UPDATE ON qa_items BEGIN | |
| INSERT INTO qa_fts(qa_fts, rowid, question, answer, subject) | |
| VALUES ('delete', old.id, old.question, old.answer, old.subject); | |
| INSERT INTO qa_fts(rowid, question, answer, subject) | |
| VALUES (new.id, new.question, new.answer, new.subject); | |
| END""") | |
| # ์ด์ฉ์ ํผ๋๋ฐฑ ํ ์ด๋ธ (์๊ณ๊ฐ ์๋ ํ๋ ๋ฐ์ดํฐ ์์ง์ฉ) | |
| conn.execute(""" | |
| CREATE TABLE IF NOT EXISTS feedback ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| query TEXT NOT NULL DEFAULT '', | |
| response_tier TEXT NOT NULL DEFAULT '', | |
| top_score REAL DEFAULT 0, | |
| norm_score REAL DEFAULT 0, | |
| helpful INTEGER NOT NULL DEFAULT 0, | |
| created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| )""") | |
| conn.commit() | |
| def seed_from_json() -> int: | |
| """DB๊ฐ ๋น์ด์์ผ๋ฉด seed JSON ํ์ผ๋ค์์ ๋ฐ์ดํฐ๋ฅผ ์ผ๊ด ์ฝ์ . ์ฝ์ ๊ฑด์ ๋ฐํ.""" | |
| if count_items() > 0: | |
| return 0 | |
| seed_files = sorted(SEED_DIR.glob("nl_qa_seed_*.json")) | |
| if not seed_files: | |
| return 0 | |
| total = 0 | |
| with get_conn() as conn: | |
| for path in seed_files: | |
| with open(path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| conn.executemany(""" | |
| INSERT OR IGNORE INTO qa_items | |
| (rec_key, question, answer, subject, answer_date, answer_lib) | |
| VALUES (:rec_key, :question, :answer, :subject, :answer_date, :answer_lib) | |
| """, data) | |
| total += len(data) | |
| conn.commit() | |
| return total | |
| def upsert_item(rec_key: str, question: str, answer: str, subject: str, | |
| reg_date: str = "", answer_date: str = "", answer_lib: str = "") -> bool: | |
| """Insert or update a Q&A item. ISBNยท์ฒญ๊ตฌ๊ธฐํธ๋ ํจ๊ป ์ถ์ถ ์ ์ฅ. Returns True if newly inserted.""" | |
| meta = extract_metadata(answer) | |
| isbns_str = ",".join(meta["isbns"]) | |
| callno_str = ",".join(meta["call_numbers"]) | |
| with get_conn() as conn: | |
| existing = conn.execute( | |
| "SELECT id FROM qa_items WHERE rec_key = ?", (rec_key,) | |
| ).fetchone() | |
| if existing: | |
| conn.execute(""" | |
| UPDATE qa_items SET question=?, answer=?, subject=?, | |
| reg_date=?, answer_date=?, answer_lib=?, | |
| isbns=?, call_numbers=?, updated_at=CURRENT_TIMESTAMP | |
| WHERE rec_key=? | |
| """, (question, answer, subject, reg_date, answer_date, answer_lib, | |
| isbns_str, callno_str, rec_key)) | |
| conn.commit() | |
| return False | |
| else: | |
| conn.execute(""" | |
| INSERT INTO qa_items | |
| (rec_key, question, answer, subject, reg_date, answer_date, answer_lib, | |
| isbns, call_numbers) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) | |
| """, (rec_key, question, answer, subject, reg_date, answer_date, answer_lib, | |
| isbns_str, callno_str)) | |
| conn.commit() | |
| return True | |
| def get_known_keys() -> set: | |
| with get_conn() as conn: | |
| rows = conn.execute("SELECT rec_key FROM qa_items").fetchall() | |
| return {r["rec_key"] for r in rows} | |
| def count_items() -> int: | |
| with get_conn() as conn: | |
| return conn.execute("SELECT COUNT(*) FROM qa_items").fetchone()[0] | |
| def get_items_for_index() -> list: | |
| with get_conn() as conn: | |
| rows = conn.execute( | |
| "SELECT id, rec_key, question, answer, subject FROM qa_items" | |
| ).fetchall() | |
| return [dict(r) for r in rows] | |