Spaces:
Build error
Build error
| """ | |
| PDF Processing Utilities | |
| """ | |
| import io | |
| class PDFProcessor: | |
| """Handles PDF text extraction.""" | |
| def __init__(self): | |
| try: | |
| import PyPDF2 | |
| self.PyPDF2 = PyPDF2 | |
| except ImportError: | |
| self.PyPDF2 = None | |
| print("Warning: PyPDF2 not available") | |
| def extract_text(self, pdf_file): | |
| """Extract text from PDF file object.""" | |
| if not self.PyPDF2: | |
| return "PDF processing not available. Please install PyPDF2." | |
| try: | |
| reader = self.PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error extracting PDF text: {str(e)}" | |
| def extract_text_from_bytes(self, pdf_bytes): | |
| """Extract text from PDF bytes.""" | |
| if not self.PyPDF2: | |
| return "PDF processing not available. Please install PyPDF2." | |
| try: | |
| pdf_file = io.BytesIO(pdf_bytes) | |
| reader = self.PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error extracting PDF text: {str(e)}" | |