import PyPDF2 import re def extract_text_from_pdf(pdf_file_path): """Extracts text from a PDF file with improved error handling.""" try: with open(pdf_file_path, 'rb') as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) text = ' '.join(page.extract_text() or '' for page in pdf_reader.pages) return text.strip() except Exception as e: return f"Error reading PDF: {str(e)}" def extract_text_from_txt(txt_file_path): """Extracts text from a TXT file with encoding fallbacks.""" encodings = ['utf-8', 'latin-1', 'ascii'] for encoding in encodings: try: with open(txt_file_path, 'r', encoding=encoding) as txt_file: return txt_file.read().strip() except UnicodeDecodeError: continue except Exception as e: return f"Error reading TXT: {str(e)}" return "Error: Unable to decode file with supported encodings"