import pypdf # Changed from PyPDF2 from PIL import Image import pytesseract class DocumentLoader: def load_pdf(self, file_path): text = "" with open(file_path, 'rb') as file: pdf_reader = pypdf.PdfReader(file) # Changed here for page in pdf_reader.pages: text += page.extract_text() return text def load_image(self, file_path): image = Image.open(file_path) text = pytesseract.image_to_string(image) return text