|
|
|
|
|
from typing import List, Dict, Any |
|
|
import re |
|
|
|
|
|
|
|
|
ENTRIES: List[Dict[str, Any]] = [] |
|
|
RAW_KNOWLEDGE: str = "" |
|
|
|
|
|
|
|
|
AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = [] |
|
|
|
|
|
|
|
|
MANUAL_QA_LIST: List[Dict[str, Any]] = [] |
|
|
MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {} |
|
|
|
|
|
|
|
|
QA_INDEX: Dict[str, str] = {} |
|
|
ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = [] |
|
|
|
|
|
|
|
|
NEXT_MANUAL_ID: int = 1 |
|
|
|
|
|
|
|
|
|
|
|
TEXT_EMBEDDINGS = None |
|
|
|
|
|
|
|
|
GLOSSARY: List[Dict[str, Any]] = [] |
|
|
GLOSSARY_EMBEDDINGS = None |
|
|
|
|
|
|
|
|
def normalize_question(q: str) -> str: |
|
|
""" |
|
|
Normalize Lao/English question text for matching. |
|
|
Lowercase + remove punctuation + collapse spaces. |
|
|
""" |
|
|
q = (q or "").lower() |
|
|
|
|
|
q = re.sub(r"[?!οΌοΌ\.\,\:\;\"ββ'ββ]", " ", q) |
|
|
|
|
|
q = re.sub(r"\s+", " ", q) |
|
|
return q.strip() |
|
|
|