Heng2004's picture
Update data/loader.py
c99a2e2 verified
raw
history blame
923 Bytes
# data/loader.py – load 1_Year_U_1.jsonl
import os
import json
from typing import List, Dict, Any
DATA_PATH = "data/1_Year_U_1.jsonl"
ENTRIES: List[Dict[str, Any]] = []
RAW_KNOWLEDGE: str = ""
if os.path.exists(DATA_PATH):
with open(DATA_PATH, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if "text" in obj:
ENTRIES.append(obj)
except json.JSONDecodeError:
continue
if ENTRIES:
RAW_KNOWLEDGE = "\n\n".join(e["text"] for e in ENTRIES)
else:
RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້."
else:
RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດຖືກໂຫຼດ."