Spaces:

Heng2004
/

Laos-Natural-Science-Chatbot

Running

Laos-Natural-Science-Chatbot / qa_store.py

Update qa_store.py

7502f70 verified 5 days ago

1.12 kB

	# qa_store.py
	from typing import List, Dict, Any
	import re

	# Textbook chunks
	ENTRIES: List[Dict[str, Any]] = []
	RAW_KNOWLEDGE: str = ""

	# QA from textbook JSONL (auto-generated from textbook)
	AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = []

	# Manual QA managed by teacher (manual_qa.jsonl)
	MANUAL_QA_LIST: List[Dict[str, Any]] = []
	MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {}

	# Combined index for fast lookup (auto + manual)
	QA_INDEX: Dict[str, str] = {}
	ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = []

	# Counter for new manual IDs
	NEXT_MANUAL_ID: int = 1

	# Embeddings for textbook entries (one vector per ENTRIES item)
	# Will be set to a torch.Tensor by _build_entry_embeddings() in model_utils.py
	TEXT_EMBEDDINGS = None


	def normalize_question(q: str) -> str:
	"""
	Normalize Lao/English question text for matching.
	Lowercase + remove punctuation + collapse spaces.
	"""
	q = (q or "").lower()
	# remove common punctuation (including Lao/English quotes)
	q = re.sub(r"[?!？！\.\,\:\;\"“”'‘’]", " ", q)
	# collapse multiple spaces
	q = re.sub(r"\s+", " ", q)
	return q.strip()