from typing import List, Dict, Optional from pathlib import Path def extract_text_from_files(paths: Optional[List[str]]) -> Dict[str, List[str]]: """Return text chunks for RAG. Safe on None/empty.""" paths = paths or [] chunks: List[str] = [] artifacts: List[Dict] = [] for p in paths: ext = Path(p).suffix.lower() if ext in {".txt", ".md"}: try: with open(p, "r", encoding="utf-8", errors="ignore") as f: text = f.read() for i in range(0, len(text), 1500): chunks.append(text[i:i+1500]) artifacts.append({"path": p, "type": ext}) except Exception: pass # (PDF/Docx parsing can be added later.) return {"chunks": chunks, "artifacts": artifacts}