Medica_DecisionSupportAI / upload_ingest.py
Rajan Sharma
Update upload_ingest.py
521ffa1 verified
raw
history blame
822 Bytes
from typing import List, Dict, Optional
from pathlib import Path
def extract_text_from_files(paths: Optional[List[str]]) -> Dict[str, List[str]]:
"""Return text chunks for RAG. Safe on None/empty."""
paths = paths or []
chunks: List[str] = []
artifacts: List[Dict] = []
for p in paths:
ext = Path(p).suffix.lower()
if ext in {".txt", ".md"}:
try:
with open(p, "r", encoding="utf-8", errors="ignore") as f:
text = f.read()
for i in range(0, len(text), 1500):
chunks.append(text[i:i+1500])
artifacts.append({"path": p, "type": ext})
except Exception:
pass
# (PDF/Docx parsing can be added later.)
return {"chunks": chunks, "artifacts": artifacts}