Spaces:
Sleeping
Sleeping
| import PyPDF2 | |
| import io | |
| import streamlit as st | |
| def extract_text_from_pdf(pdf_file): | |
| """ | |
| Extract text from a PDF file | |
| """ | |
| pdf_text = "" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| pdf_text += page.extract_text() + "\n\n" | |
| return pdf_text | |
| except Exception as e: | |
| st.error(f"Error extracting text from PDF: {e}") | |
| return None | |
| def get_document_text(uploaded_file, file_type): | |
| """ | |
| Get text from an uploaded document (PDF or TXT) | |
| """ | |
| if file_type == "Text (.txt)": | |
| # Read text file | |
| return uploaded_file.getvalue().decode("utf-8") | |
| else: | |
| # Extract text from PDF | |
| return extract_text_from_pdf(io.BytesIO(uploaded_file.getvalue())) |