Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import io | |
| from pypdf import PdfReader | |
| def process_uploaded_file(uploaded_file): | |
| """Extracts text from uploaded PDF or Excel files""" | |
| if uploaded_file.type == "application/pdf": | |
| return extract_text_from_pdf(uploaded_file) | |
| elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
| return extract_text_from_excel(uploaded_file) | |
| else: | |
| return "Unsupported file format." | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from a PDF""" | |
| reader = PdfReader(pdf_file) | |
| text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
| return text | |
| def extract_text_from_excel(excel_file): | |
| """Extract text from an Excel file""" | |
| df = pd.read_excel(excel_file, sheet_name=None) | |
| text = "" | |
| for sheet, data in df.items(): | |
| text += f"\nSheet: {sheet}\n" + data.to_string(index=False) | |
| return text | |