File size: 1,292 Bytes
7171447 7502f70 7171447 7502f70 7171447 7502f70 7171447 076631b 7502f70 b4ddaff 076631b 7ced8a6 b4ddaff 7ced8a6 7171447 7502f70 7171447 7502f70 7171447 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# qa_store.py
from typing import List, Dict, Any
import re
# Textbook chunks
ENTRIES: List[Dict[str, Any]] = []
RAW_KNOWLEDGE: str = ""
# QA from textbook JSONL (auto-generated from textbook)
AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = []
# Manual QA managed by teacher (manual_qa.jsonl)
MANUAL_QA_LIST: List[Dict[str, Any]] = []
MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {}
# Combined index for fast lookup (auto + manual)
QA_INDEX: Dict[str, str] = {}
ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = []
# Counter for new manual IDs
NEXT_MANUAL_ID: int = 1
# Embeddings for textbook entries (one vector per ENTRIES item)
# Will be set to a torch.Tensor by _build_entry_embeddings() in model_utils.py
TEXT_EMBEDDINGS = None # torch.Tensor
# π Add these for glossary
GLOSSARY: List[Dict[str, Any]] = [] # list of glossary items (dicts)
GLOSSARY_EMBEDDINGS = None # numpy.ndarray
def normalize_question(q: str) -> str:
"""
Normalize Lao/English question text for matching.
Lowercase + remove punctuation + collapse spaces.
"""
q = (q or "").lower()
# remove common punctuation (including Lao/English quotes)
q = re.sub(r"[?!οΌοΌ\.\,\:\;\"ββ'ββ]", " ", q)
# collapse multiple spaces
q = re.sub(r"\s+", " ", q)
return q.strip()
|