mbtibooktalk / server /database.py
dongle0516's picture
feat: 3๋‹จ๊ณ„ ์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’ ์ฒด๊ณ„ ์ ์šฉ (5์ฐจ ๊ฐœ์„  โ€” ์ž„๊ณ„๊ฐ’ ์žฌ์„ค๊ณ„์•ˆ)
a7db5be
import sqlite3
import json
import re
from pathlib import Path
DB_PATH = Path(__file__).parent / "nl_qa.db"
SEED_DIR = Path(__file__).parent
def get_conn() -> sqlite3.Connection:
# timeout ์ƒํ–ฅ + WAL ๋ชจ๋“œ๋กœ ๋‹ค์ค‘ ์ ‘์†์ž ํ™˜๊ฒฝ ๋™์‹œ์„ฑ ๋ณด๊ฐ•
conn = sqlite3.connect(DB_PATH, timeout=30.0, check_same_thread=False)
conn.row_factory = sqlite3.Row
try:
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA synchronous=NORMAL")
conn.execute("PRAGMA busy_timeout=30000")
except Exception:
pass
return conn
# ๋‹ต๋ณ€ ๋ณธ๋ฌธ์—์„œ ISBN(10/13์ž๋ฆฌ) ๋ฐ ์ฒญ๊ตฌ๊ธฐํ˜ธ ํŒจํ„ด ์ถ”์ถœ์šฉ ์ •๊ทœ์‹
_ISBN_RE = re.compile(r"(?<!\d)(97[89][\-\s]?\d{1,5}[\-\s]?\d{1,7}[\-\s]?\d{1,7}[\-\s]?\d|\d{10}|\d{9}X)(?!\d)")
_CALLNO_RE = re.compile(r"\b\d{3}(?:\.\d{1,4})?\s?[๊ฐ€-ํžฃA-Za-z]\d{1,5}(?:[๊ฐ€-ํžฃA-Za-z])?\b")
def extract_metadata(answer_text: str) -> dict:
"""๋‹ต๋ณ€ ๋ณธ๋ฌธ์—์„œ ISBNยท์ฒญ๊ตฌ๊ธฐํ˜ธ๋ฅผ ์ถ”์ถœํ•˜์—ฌ dict๋กœ ๋ฐ˜ํ™˜."""
if not answer_text:
return {"isbns": [], "call_numbers": []}
isbns = list({m.replace(" ", "").replace("-", "") for m in _ISBN_RE.findall(answer_text)})
callnos = list({m.strip() for m in _CALLNO_RE.findall(answer_text)})
return {"isbns": isbns[:10], "call_numbers": callnos[:10]}
def init_db() -> None:
with get_conn() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS qa_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rec_key TEXT UNIQUE NOT NULL,
question TEXT NOT NULL DEFAULT '',
answer TEXT NOT NULL DEFAULT '',
subject TEXT DEFAULT '',
reg_date TEXT DEFAULT '',
answer_date TEXT DEFAULT '',
answer_lib TEXT DEFAULT '',
isbns TEXT DEFAULT '',
call_numbers TEXT DEFAULT '',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)""")
# ๊ธฐ์กด DB ํ˜ธํ™˜: ์ปฌ๋Ÿผ์ด ์—†์œผ๋ฉด ์ถ”๊ฐ€
for ddl in (
"ALTER TABLE qa_items ADD COLUMN isbns TEXT DEFAULT ''",
"ALTER TABLE qa_items ADD COLUMN call_numbers TEXT DEFAULT ''",
"ALTER TABLE qa_items ADD COLUMN updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
):
try:
conn.execute(ddl)
except sqlite3.OperationalError:
pass
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS qa_fts USING fts5(
question,
answer,
subject,
content = qa_items,
content_rowid = id,
tokenize = 'unicode61'
)""")
conn.execute("""
CREATE TRIGGER IF NOT EXISTS qa_ai AFTER INSERT ON qa_items BEGIN
INSERT INTO qa_fts(rowid, question, answer, subject)
VALUES (new.id, new.question, new.answer, new.subject);
END""")
conn.execute("""
CREATE TRIGGER IF NOT EXISTS qa_au AFTER UPDATE ON qa_items BEGIN
INSERT INTO qa_fts(qa_fts, rowid, question, answer, subject)
VALUES ('delete', old.id, old.question, old.answer, old.subject);
INSERT INTO qa_fts(rowid, question, answer, subject)
VALUES (new.id, new.question, new.answer, new.subject);
END""")
# ์ด์šฉ์ž ํ”ผ๋“œ๋ฐฑ ํ…Œ์ด๋ธ” (์ž„๊ณ„๊ฐ’ ์ž๋™ ํŠœ๋‹ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘์šฉ)
conn.execute("""
CREATE TABLE IF NOT EXISTS feedback (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query TEXT NOT NULL DEFAULT '',
response_tier TEXT NOT NULL DEFAULT '',
top_score REAL DEFAULT 0,
norm_score REAL DEFAULT 0,
helpful INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)""")
conn.commit()
def seed_from_json() -> int:
"""DB๊ฐ€ ๋น„์–ด์žˆ์œผ๋ฉด seed JSON ํŒŒ์ผ๋“ค์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ผ๊ด„ ์‚ฝ์ž…. ์‚ฝ์ž… ๊ฑด์ˆ˜ ๋ฐ˜ํ™˜."""
if count_items() > 0:
return 0
seed_files = sorted(SEED_DIR.glob("nl_qa_seed_*.json"))
if not seed_files:
return 0
total = 0
with get_conn() as conn:
for path in seed_files:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
conn.executemany("""
INSERT OR IGNORE INTO qa_items
(rec_key, question, answer, subject, answer_date, answer_lib)
VALUES (:rec_key, :question, :answer, :subject, :answer_date, :answer_lib)
""", data)
total += len(data)
conn.commit()
return total
def upsert_item(rec_key: str, question: str, answer: str, subject: str,
reg_date: str = "", answer_date: str = "", answer_lib: str = "") -> bool:
"""Insert or update a Q&A item. ISBNยท์ฒญ๊ตฌ๊ธฐํ˜ธ๋„ ํ•จ๊ป˜ ์ถ”์ถœ ์ €์žฅ. Returns True if newly inserted."""
meta = extract_metadata(answer)
isbns_str = ",".join(meta["isbns"])
callno_str = ",".join(meta["call_numbers"])
with get_conn() as conn:
existing = conn.execute(
"SELECT id FROM qa_items WHERE rec_key = ?", (rec_key,)
).fetchone()
if existing:
conn.execute("""
UPDATE qa_items SET question=?, answer=?, subject=?,
reg_date=?, answer_date=?, answer_lib=?,
isbns=?, call_numbers=?, updated_at=CURRENT_TIMESTAMP
WHERE rec_key=?
""", (question, answer, subject, reg_date, answer_date, answer_lib,
isbns_str, callno_str, rec_key))
conn.commit()
return False
else:
conn.execute("""
INSERT INTO qa_items
(rec_key, question, answer, subject, reg_date, answer_date, answer_lib,
isbns, call_numbers)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (rec_key, question, answer, subject, reg_date, answer_date, answer_lib,
isbns_str, callno_str))
conn.commit()
return True
def get_known_keys() -> set:
with get_conn() as conn:
rows = conn.execute("SELECT rec_key FROM qa_items").fetchall()
return {r["rec_key"] for r in rows}
def count_items() -> int:
with get_conn() as conn:
return conn.execute("SELECT COUNT(*) FROM qa_items").fetchone()[0]
def get_items_for_index() -> list:
with get_conn() as conn:
rows = conn.execute(
"SELECT id, rec_key, question, answer, subject FROM qa_items"
).fetchall()
return [dict(r) for r in rows]