Update loader.py
Browse files
loader.py
CHANGED
|
@@ -5,8 +5,13 @@ from typing import List, Dict, Any
|
|
| 5 |
|
| 6 |
import qa_store
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def load_curriculum() -> None:
|
|
@@ -17,9 +22,11 @@ def load_curriculum() -> None:
|
|
| 17 |
qa_store.AUTO_QA_KNOWLEDGE.clear()
|
| 18 |
|
| 19 |
if not os.path.exists(CURRICULUM_PATH):
|
|
|
|
| 20 |
qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດຖືກໂຫຼດ."
|
| 21 |
return
|
| 22 |
|
|
|
|
| 23 |
with open(CURRICULUM_PATH, "r", encoding="utf-8") as f:
|
| 24 |
for line in f:
|
| 25 |
line = line.strip()
|
|
@@ -28,8 +35,10 @@ def load_curriculum() -> None:
|
|
| 28 |
try:
|
| 29 |
obj: Dict[str, Any] = json.loads(line)
|
| 30 |
except json.JSONDecodeError:
|
|
|
|
| 31 |
continue
|
| 32 |
|
|
|
|
| 33 |
if "text" not in obj:
|
| 34 |
continue
|
| 35 |
|
|
@@ -66,9 +75,11 @@ def load_manual_qa() -> None:
|
|
| 66 |
max_num = 0
|
| 67 |
|
| 68 |
if not os.path.exists(MANUAL_QA_PATH):
|
|
|
|
| 69 |
qa_store.NEXT_MANUAL_ID = 1
|
| 70 |
return
|
| 71 |
|
|
|
|
| 72 |
with open(MANUAL_QA_PATH, "r", encoding="utf-8") as f:
|
| 73 |
for line in f:
|
| 74 |
line = line.strip()
|
|
@@ -77,8 +88,10 @@ def load_manual_qa() -> None:
|
|
| 77 |
try:
|
| 78 |
obj = json.loads(line)
|
| 79 |
except json.JSONDecodeError:
|
|
|
|
| 80 |
continue
|
| 81 |
|
|
|
|
| 82 |
q = (obj.get("q") or "").strip()
|
| 83 |
a = (obj.get("a") or "").strip()
|
| 84 |
if not q or not a:
|
|
|
|
| 5 |
|
| 6 |
import qa_store
|
| 7 |
|
| 8 |
+
# Base paths (make them relative to this file)
|
| 9 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
DATA_DIR = os.path.join(BASE_DIR, "data")
|
| 11 |
+
|
| 12 |
+
CURRICULUM_PATH = os.path.join(DATA_DIR, "1_Year_U_1.jsonl")
|
| 13 |
+
MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
|
| 14 |
+
|
| 15 |
|
| 16 |
|
| 17 |
def load_curriculum() -> None:
|
|
|
|
| 22 |
qa_store.AUTO_QA_KNOWLEDGE.clear()
|
| 23 |
|
| 24 |
if not os.path.exists(CURRICULUM_PATH):
|
| 25 |
+
print(f"[WARN] Curriculum file not found: {CURRICULUM_PATH}")
|
| 26 |
qa_store.RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດຖືກໂຫຼດ."
|
| 27 |
return
|
| 28 |
|
| 29 |
+
|
| 30 |
with open(CURRICULUM_PATH, "r", encoding="utf-8") as f:
|
| 31 |
for line in f:
|
| 32 |
line = line.strip()
|
|
|
|
| 35 |
try:
|
| 36 |
obj: Dict[str, Any] = json.loads(line)
|
| 37 |
except json.JSONDecodeError:
|
| 38 |
+
print("[WARN] Skipping invalid JSON line in curriculum file.")
|
| 39 |
continue
|
| 40 |
|
| 41 |
+
|
| 42 |
if "text" not in obj:
|
| 43 |
continue
|
| 44 |
|
|
|
|
| 75 |
max_num = 0
|
| 76 |
|
| 77 |
if not os.path.exists(MANUAL_QA_PATH):
|
| 78 |
+
print(f"[WARN] Manual QA file not found: {MANUAL_QA_PATH}")
|
| 79 |
qa_store.NEXT_MANUAL_ID = 1
|
| 80 |
return
|
| 81 |
|
| 82 |
+
|
| 83 |
with open(MANUAL_QA_PATH, "r", encoding="utf-8") as f:
|
| 84 |
for line in f:
|
| 85 |
line = line.strip()
|
|
|
|
| 88 |
try:
|
| 89 |
obj = json.loads(line)
|
| 90 |
except json.JSONDecodeError:
|
| 91 |
+
print("[WARN] Skipping invalid JSON line in manual QA file.")
|
| 92 |
continue
|
| 93 |
|
| 94 |
+
|
| 95 |
q = (obj.get("q") or "").strip()
|
| 96 |
a = (obj.get("a") or "").strip()
|
| 97 |
if not q or not a:
|