File size: 6,740 Bytes
66a735d
925ee37
1fa3750
66a735d
 
4dd66d2
 
66a735d
 
 
1fa3750
 
66a735d
1fa3750
 
 
 
 
 
66a735d
 
 
1fa3750
 
 
 
 
 
 
 
 
 
 
 
 
 
66a735d
 
 
 
 
 
 
 
 
 
 
 
1fa3750
 
 
66a735d
 
1fa3750
 
 
 
 
 
 
 
 
 
66a735d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7db5be
 
 
 
 
 
 
 
 
 
 
 
66a735d
 
 
925ee37
4dd66d2
 
925ee37
4dd66d2
 
 
 
925ee37
4dd66d2
 
 
 
 
 
 
 
 
925ee37
4dd66d2
925ee37
 
66a735d
 
1fa3750
 
 
 
66a735d
 
 
 
 
 
 
 
1fa3750
 
66a735d
1fa3750
 
66a735d
 
 
 
1fa3750
 
 
 
 
 
66a735d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import sqlite3
import json
import re
from pathlib import Path

DB_PATH    = Path(__file__).parent / "nl_qa.db"
SEED_DIR   = Path(__file__).parent


def get_conn() -> sqlite3.Connection:
    # timeout 상향 + WAL 모드로 다중 접속자 환경 동시성 보강
    conn = sqlite3.connect(DB_PATH, timeout=30.0, check_same_thread=False)
    conn.row_factory = sqlite3.Row
    try:
        conn.execute("PRAGMA journal_mode=WAL")
        conn.execute("PRAGMA synchronous=NORMAL")
        conn.execute("PRAGMA busy_timeout=30000")
    except Exception:
        pass
    return conn


# 답변 본문에서 ISBN(10/13자리) 및 청구기호 패턴 추출용 정규식
_ISBN_RE = re.compile(r"(?<!\d)(97[89][\-\s]?\d{1,5}[\-\s]?\d{1,7}[\-\s]?\d{1,7}[\-\s]?\d|\d{10}|\d{9}X)(?!\d)")
_CALLNO_RE = re.compile(r"\b\d{3}(?:\.\d{1,4})?\s?[가-힣A-Za-z]\d{1,5}(?:[가-힣A-Za-z])?\b")


def extract_metadata(answer_text: str) -> dict:
    """답변 본문에서 ISBN·청구기호를 추출하여 dict로 반환."""
    if not answer_text:
        return {"isbns": [], "call_numbers": []}
    isbns = list({m.replace(" ", "").replace("-", "") for m in _ISBN_RE.findall(answer_text)})
    callnos = list({m.strip() for m in _CALLNO_RE.findall(answer_text)})
    return {"isbns": isbns[:10], "call_numbers": callnos[:10]}


def init_db() -> None:
    with get_conn() as conn:
        conn.execute("""
        CREATE TABLE IF NOT EXISTS qa_items (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            rec_key     TEXT    UNIQUE NOT NULL,
            question    TEXT    NOT NULL DEFAULT '',
            answer      TEXT    NOT NULL DEFAULT '',
            subject     TEXT    DEFAULT '',
            reg_date    TEXT    DEFAULT '',
            answer_date TEXT    DEFAULT '',
            answer_lib  TEXT    DEFAULT '',
            isbns       TEXT    DEFAULT '',
            call_numbers TEXT   DEFAULT '',
            updated_at  TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            created_at  TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )""")
        # 기존 DB 호환: 컬럼이 없으면 추가
        for ddl in (
            "ALTER TABLE qa_items ADD COLUMN isbns TEXT DEFAULT ''",
            "ALTER TABLE qa_items ADD COLUMN call_numbers TEXT DEFAULT ''",
            "ALTER TABLE qa_items ADD COLUMN updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
        ):
            try:
                conn.execute(ddl)
            except sqlite3.OperationalError:
                pass

        conn.execute("""
        CREATE VIRTUAL TABLE IF NOT EXISTS qa_fts USING fts5(
            question,
            answer,
            subject,
            content     = qa_items,
            content_rowid = id,
            tokenize    = 'unicode61'
        )""")

        conn.execute("""
        CREATE TRIGGER IF NOT EXISTS qa_ai AFTER INSERT ON qa_items BEGIN
            INSERT INTO qa_fts(rowid, question, answer, subject)
            VALUES (new.id, new.question, new.answer, new.subject);
        END""")

        conn.execute("""
        CREATE TRIGGER IF NOT EXISTS qa_au AFTER UPDATE ON qa_items BEGIN
            INSERT INTO qa_fts(qa_fts, rowid, question, answer, subject)
            VALUES ('delete', old.id, old.question, old.answer, old.subject);
            INSERT INTO qa_fts(rowid, question, answer, subject)
            VALUES (new.id, new.question, new.answer, new.subject);
        END""")

        # 이용자 피드백 테이블 (임계값 자동 튜닝 데이터 수집용)
        conn.execute("""
        CREATE TABLE IF NOT EXISTS feedback (
            id           INTEGER PRIMARY KEY AUTOINCREMENT,
            query        TEXT    NOT NULL DEFAULT '',
            response_tier TEXT   NOT NULL DEFAULT '',
            top_score    REAL    DEFAULT 0,
            norm_score   REAL    DEFAULT 0,
            helpful      INTEGER NOT NULL DEFAULT 0,
            created_at   TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )""")

        conn.commit()


def seed_from_json() -> int:
    """DB가 비어있으면 seed JSON 파일들에서 데이터를 일괄 삽입. 삽입 건수 반환."""
    if count_items() > 0:
        return 0
    seed_files = sorted(SEED_DIR.glob("nl_qa_seed_*.json"))
    if not seed_files:
        return 0
    total = 0
    with get_conn() as conn:
        for path in seed_files:
            with open(path, "r", encoding="utf-8") as f:
                data = json.load(f)
            conn.executemany("""
                INSERT OR IGNORE INTO qa_items
                    (rec_key, question, answer, subject, answer_date, answer_lib)
                VALUES (:rec_key, :question, :answer, :subject, :answer_date, :answer_lib)
            """, data)
            total += len(data)
        conn.commit()
    return total


def upsert_item(rec_key: str, question: str, answer: str, subject: str,
                reg_date: str = "", answer_date: str = "", answer_lib: str = "") -> bool:
    """Insert or update a Q&A item. ISBN·청구기호도 함께 추출 저장. Returns True if newly inserted."""
    meta = extract_metadata(answer)
    isbns_str = ",".join(meta["isbns"])
    callno_str = ",".join(meta["call_numbers"])
    with get_conn() as conn:
        existing = conn.execute(
            "SELECT id FROM qa_items WHERE rec_key = ?", (rec_key,)
        ).fetchone()

        if existing:
            conn.execute("""
            UPDATE qa_items SET question=?, answer=?, subject=?,
                reg_date=?, answer_date=?, answer_lib=?,
                isbns=?, call_numbers=?, updated_at=CURRENT_TIMESTAMP
            WHERE rec_key=?
            """, (question, answer, subject, reg_date, answer_date, answer_lib,
                  isbns_str, callno_str, rec_key))
            conn.commit()
            return False
        else:
            conn.execute("""
            INSERT INTO qa_items
                (rec_key, question, answer, subject, reg_date, answer_date, answer_lib,
                 isbns, call_numbers)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (rec_key, question, answer, subject, reg_date, answer_date, answer_lib,
                  isbns_str, callno_str))
            conn.commit()
            return True


def get_known_keys() -> set:
    with get_conn() as conn:
        rows = conn.execute("SELECT rec_key FROM qa_items").fetchall()
    return {r["rec_key"] for r in rows}


def count_items() -> int:
    with get_conn() as conn:
        return conn.execute("SELECT COUNT(*) FROM qa_items").fetchone()[0]


def get_items_for_index() -> list:
    with get_conn() as conn:
        rows = conn.execute(
            "SELECT id, rec_key, question, answer, subject FROM qa_items"
        ).fetchall()
    return [dict(r) for r in rows]