Heng2004 commited on
Commit
1b5b80c
·
verified ·
1 Parent(s): 7ced8a6

Update loader.py

Browse files
Files changed (1) hide show
  1. loader.py +24 -1
loader.py CHANGED
@@ -12,7 +12,7 @@ DATA_DIR = os.path.join(BASE_DIR, "data")
12
  CURRICULUM_PATH = os.path.join(DATA_DIR, "M_1_U_1.jsonl")
13
  MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
14
 
15
-
16
 
17
  def load_curriculum() -> None:
18
  """
@@ -66,6 +66,29 @@ def load_curriculum() -> None:
66
  qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້."
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def load_manual_qa() -> None:
70
  """
71
  Load manual_qa.jsonl into qa_store.MANUAL_QA_LIST and MANUAL_QA_INDEX.
 
12
  CURRICULUM_PATH = os.path.join(DATA_DIR, "M_1_U_1.jsonl")
13
  MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
14
 
15
+ GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
16
 
17
  def load_curriculum() -> None:
18
  """
 
66
  qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້."
67
 
68
 
69
+ def load_glossary() -> None:
70
+ """Load glossary entries into qa_store.GLOSSARY."""
71
+ qa_store.GLOSSARY.clear()
72
+
73
+ if not os.path.exists(GLOSSARY_PATH):
74
+ print(f"[WARN] Glossary file not found: {GLOSSARY_PATH}")
75
+ return
76
+
77
+ with open(GLOSSARY_PATH, "r", encoding="utf-8") as f:
78
+ for line in f:
79
+ line = line.strip()
80
+ if not line:
81
+ continue
82
+ try:
83
+ obj = json.loads(line)
84
+ except json.JSONDecodeError:
85
+ print("[WARN] Skipping invalid glossary JSON line")
86
+ continue
87
+ qa_store.GLOSSARY.append(obj)
88
+
89
+ print(f"[INFO] Loaded {len(qa_store.GLOSSARY)} glossary terms.")
90
+
91
+
92
  def load_manual_qa() -> None:
93
  """
94
  Load manual_qa.jsonl into qa_store.MANUAL_QA_LIST and MANUAL_QA_INDEX.