import PyPDF2 import docx from pathlib import Path def load_text(path: str) -> str: """ Load text from TXT, PDF, or DOCX files. Returns the extracted text as a string. """ path_obj = Path(path) if not path_obj.exists(): raise FileNotFoundError(f"{path} does not exist.") if path_obj.suffix.lower() == ".txt": return path_obj.read_text(encoding="utf-8") elif path_obj.suffix.lower() == ".pdf": text = "" with open(path_obj, "rb") as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text elif path_obj.suffix.lower() == ".docx": doc = docx.Document(path_obj) return "\n".join([p.text for p in doc.paragraphs]) else: raise ValueError(f"Unsupported file type: {path_obj.suffix}")