import PyPDF2 import docx def load_txt(file_path): with open(file_path, "r", encoding="utf-8") as f: return f.read() def load_pdf(file_path): text = "" with open(file_path, "rb") as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: if page.extract_text(): text += page.extract_text() return text def load_docx(file_path): doc = docx.Document(file_path) return "\n".join([p.text for p in doc.paragraphs])