|
|
|
|
|
|
|
|
import os |
|
|
import json |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
DATA_PATH = "data/1_Year_U_1.jsonl" |
|
|
|
|
|
ENTRIES: List[Dict[str, Any]] = [] |
|
|
RAW_KNOWLEDGE: str = "" |
|
|
|
|
|
|
|
|
if os.path.exists(DATA_PATH): |
|
|
with open(DATA_PATH, "r", encoding="utf-8") as f: |
|
|
for line in f: |
|
|
line = line.strip() |
|
|
if not line: |
|
|
continue |
|
|
try: |
|
|
obj = json.loads(line) |
|
|
if "text" in obj: |
|
|
ENTRIES.append(obj) |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
|
|
|
if ENTRIES: |
|
|
RAW_KNOWLEDGE = "\n\n".join(e["text"] for e in ENTRIES) |
|
|
else: |
|
|
RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດທີ່ອ່ານໄດ້." |
|
|
else: |
|
|
RAW_KNOWLEDGE = "ຍັງບໍ່ມີຂໍ້ມູນປະຫວັດສາດຖືກໂຫຼດ." |
|
|
|