Update loader.py
Browse files
loader.py
CHANGED
|
@@ -12,7 +12,7 @@ DATA_DIR = os.path.join(BASE_DIR, "data")
|
|
| 12 |
CURRICULUM_PATH = os.path.join(DATA_DIR, "M_1_U_1.jsonl")
|
| 13 |
MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
def load_curriculum() -> None:
|
| 18 |
"""
|
|
@@ -66,6 +66,29 @@ def load_curriculum() -> None:
|
|
| 66 |
qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້."
|
| 67 |
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def load_manual_qa() -> None:
|
| 70 |
"""
|
| 71 |
Load manual_qa.jsonl into qa_store.MANUAL_QA_LIST and MANUAL_QA_INDEX.
|
|
|
|
| 12 |
CURRICULUM_PATH = os.path.join(DATA_DIR, "M_1_U_1.jsonl")
|
| 13 |
MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
|
| 14 |
|
| 15 |
+
GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
|
| 16 |
|
| 17 |
def load_curriculum() -> None:
|
| 18 |
"""
|
|
|
|
| 66 |
qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້."
|
| 67 |
|
| 68 |
|
| 69 |
+
def load_glossary() -> None:
|
| 70 |
+
"""Load glossary entries into qa_store.GLOSSARY."""
|
| 71 |
+
qa_store.GLOSSARY.clear()
|
| 72 |
+
|
| 73 |
+
if not os.path.exists(GLOSSARY_PATH):
|
| 74 |
+
print(f"[WARN] Glossary file not found: {GLOSSARY_PATH}")
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
with open(GLOSSARY_PATH, "r", encoding="utf-8") as f:
|
| 78 |
+
for line in f:
|
| 79 |
+
line = line.strip()
|
| 80 |
+
if not line:
|
| 81 |
+
continue
|
| 82 |
+
try:
|
| 83 |
+
obj = json.loads(line)
|
| 84 |
+
except json.JSONDecodeError:
|
| 85 |
+
print("[WARN] Skipping invalid glossary JSON line")
|
| 86 |
+
continue
|
| 87 |
+
qa_store.GLOSSARY.append(obj)
|
| 88 |
+
|
| 89 |
+
print(f"[INFO] Loaded {len(qa_store.GLOSSARY)} glossary terms.")
|
| 90 |
+
|
| 91 |
+
|
| 92 |
def load_manual_qa() -> None:
|
| 93 |
"""
|
| 94 |
Load manual_qa.jsonl into qa_store.MANUAL_QA_LIST and MANUAL_QA_INDEX.
|