Heng2004 commited on
Commit
7502f70
Β·
verified Β·
1 Parent(s): bcbd5e8

Update qa_store.py

Browse files
Files changed (1) hide show
  1. qa_store.py +7 -5
qa_store.py CHANGED
@@ -6,14 +6,14 @@ import re
6
  ENTRIES: List[Dict[str, Any]] = []
7
  RAW_KNOWLEDGE: str = ""
8
 
9
- # QA from textbook JSONL
10
  AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = []
11
 
12
- # Manual QA managed by teacher
13
  MANUAL_QA_LIST: List[Dict[str, Any]] = []
14
  MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {}
15
 
16
- # Combined index for fast lookup
17
  QA_INDEX: Dict[str, str] = {}
18
  ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = []
19
 
@@ -21,8 +21,8 @@ ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = []
21
  NEXT_MANUAL_ID: int = 1
22
 
23
  # Embeddings for textbook entries (one vector per ENTRIES item)
24
- TEXT_EMBEDDINGS = None # will be a torch.Tensor or None
25
-
26
 
27
 
28
  def normalize_question(q: str) -> str:
@@ -31,6 +31,8 @@ def normalize_question(q: str) -> str:
31
  Lowercase + remove punctuation + collapse spaces.
32
  """
33
  q = (q or "").lower()
 
34
  q = re.sub(r"[?!?!\.\,\:\;\"β€œβ€'β€˜β€™]", " ", q)
 
35
  q = re.sub(r"\s+", " ", q)
36
  return q.strip()
 
6
  ENTRIES: List[Dict[str, Any]] = []
7
  RAW_KNOWLEDGE: str = ""
8
 
9
+ # QA from textbook JSONL (auto-generated from textbook)
10
  AUTO_QA_KNOWLEDGE: List[Dict[str, Any]] = []
11
 
12
+ # Manual QA managed by teacher (manual_qa.jsonl)
13
  MANUAL_QA_LIST: List[Dict[str, Any]] = []
14
  MANUAL_QA_INDEX: Dict[str, Dict[str, Any]] = {}
15
 
16
+ # Combined index for fast lookup (auto + manual)
17
  QA_INDEX: Dict[str, str] = {}
18
  ALL_QA_KNOWLEDGE: List[Dict[str, Any]] = []
19
 
 
21
  NEXT_MANUAL_ID: int = 1
22
 
23
  # Embeddings for textbook entries (one vector per ENTRIES item)
24
+ # Will be set to a torch.Tensor by _build_entry_embeddings() in model_utils.py
25
+ TEXT_EMBEDDINGS = None
26
 
27
 
28
  def normalize_question(q: str) -> str:
 
31
  Lowercase + remove punctuation + collapse spaces.
32
  """
33
  q = (q or "").lower()
34
+ # remove common punctuation (including Lao/English quotes)
35
  q = re.sub(r"[?!?!\.\,\:\;\"β€œβ€'β€˜β€™]", " ", q)
36
+ # collapse multiple spaces
37
  q = re.sub(r"\s+", " ", q)
38
  return q.strip()