import PyPDF2 import io import streamlit as st def extract_text_from_pdf(pdf_file): """ Extract text from a PDF file """ pdf_text = "" try: pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] pdf_text += page.extract_text() + "\n\n" return pdf_text except Exception as e: st.error(f"Error extracting text from PDF: {e}") return None def get_document_text(uploaded_file, file_type): """ Get text from an uploaded document (PDF or TXT) """ if file_type == "Text (.txt)": # Read text file return uploaded_file.getvalue().decode("utf-8") else: # Extract text from PDF return extract_text_from_pdf(io.BytesIO(uploaded_file.getvalue()))