# qa_store.py from typing import List, Dict, Any import re # Textbook chunks ENTRIES: List[Dict[str, Any]] = [] RAW_KNOWLEDGE: str = "" # QA from textbook JSONL (auto-generated from textbook) AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = [] # Manual QA managed by teacher (manual_qa.jsonl) MANUAL_QA_LIST: List[Dict[str, Any]] = [] MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {} # Combined index for fast lookup (auto + manual) QA_INDEX: Dict[str, str] = {} ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = [] # Counter for new manual IDs NEXT_MANUAL_ID: int = 1 # Embeddings for textbook entries (one vector per ENTRIES item) # Will be set to a torch.Tensor by _build_entry_embeddings() in model_utils.py TEXT_EMBEDDINGS = None def normalize_question(q: str) -> str: """ Normalize Lao/English question text for matching. Lowercase + remove punctuation + collapse spaces. """ q = (q or "").lower() # remove common punctuation (including Lao/English quotes) q = re.sub(r"[?!?!\.\,\:\;\"“”'‘’]", " ", q) # collapse multiple spaces q = re.sub(r"\s+", " ", q) return q.strip()