Spaces:
Runtime error
Runtime error
| import PyPDF2 | |
| import re | |
| def extract_text_from_pdf(pdf_file_path): | |
| """Extracts text from a PDF file with improved error handling.""" | |
| try: | |
| with open(pdf_file_path, 'rb') as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = ' '.join(page.extract_text() or '' for page in pdf_reader.pages) | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def extract_text_from_txt(txt_file_path): | |
| """Extracts text from a TXT file with encoding fallbacks.""" | |
| encodings = ['utf-8', 'latin-1', 'ascii'] | |
| for encoding in encodings: | |
| try: | |
| with open(txt_file_path, 'r', encoding=encoding) as txt_file: | |
| return txt_file.read().strip() | |
| except UnicodeDecodeError: | |
| continue | |
| except Exception as e: | |
| return f"Error reading TXT: {str(e)}" | |
| return "Error: Unable to decode file with supported encodings" |