import PyPDF2 import docx def extract_text_from_file(uploaded_file): if uploaded_file.name.endswith(".pdf"): return extract_text_from_pdf(uploaded_file) elif uploaded_file.name.endswith(".docx"): return extract_text_from_docx(uploaded_file) else: return "Unsupported file format." def extract_text_from_pdf(uploaded_file): pdf_reader = PyPDF2.PdfReader(uploaded_file) text = "".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()]) return text def extract_text_from_docx(uploaded_file): doc = docx.Document(uploaded_file) return "\n".join([para.text for para in doc.paragraphs])