Heng2004's picture
Update data/qa_index.py
7629442 verified
raw
history blame
1.44 kB
# data/qa_index.py – QA index from dataset + manual answers
from typing import Dict, Optional
from .loader import ENTRIES
from .text_utils import normalize_question
from .manual_answers import MANUAL_ANSWERS
QA_INDEX: Dict[str, str] = {}
def _build_qa_index():
for obj in ENTRIES:
for pair in obj.get("qa", []):
q = pair.get("q", "")
a = pair.get("a", "")
if q and a:
norm_q = normalize_question(q)
QA_INDEX[norm_q] = a.strip()
_build_qa_index()
def answer_from_qa(question: str) -> Optional[str]:
"""
1) Manual teacher answers (MANUAL_ANSWERS) – highest priority.
2) Exact match from dataset QA_INDEX.
3) Fuzzy match over dataset QA_INDEX.
"""
norm_q = normalize_question(question)
# 1) manual perfect answers
if norm_q in MANUAL_ANSWERS:
return MANUAL_ANSWERS[norm_q]
# 2) exact match from dataset
if norm_q in QA_INDEX:
return QA_INDEX[norm_q]
# 3) fuzzy match over dataset
if not QA_INDEX:
return None
best_score = 0
best_answer: Optional[str] = None
for stored_q, a in QA_INDEX.items():
score = 0
for ch in norm_q:
if ch and ch in stored_q:
score += 1
if score > best_score:
best_score = score
best_answer = a
if best_score > 0:
return best_answer
return None