import json, pathlib from .data_schemas import Doc def load_jsonl(path: str, text_fields=("question","answer")): p = pathlib.Path(path) docs = [] with p.open(encoding="utf-8") as f: for i, line in enumerate(f): row = json.loads(line) # Collect fields; allow either "text" or joined fields if "text" in row and row["text"]: combined = row["text"] else: combined = " ".join([row.get(tf, "") for tf in text_fields]).strip() title = row.get("title") or row.get("category") or "" docs.append(Doc( id=str(row.get("id", f"{p.stem}:{i}")), text=combined, title=title, meta=row )) return docs