aitutor / documents /loader.py
AptlyDigital's picture
Update documents/loader.py
8eb157f verified
import pypdf # Changed from PyPDF2
from PIL import Image
import pytesseract
class DocumentLoader:
def load_pdf(self, file_path):
text = ""
with open(file_path, 'rb') as file:
pdf_reader = pypdf.PdfReader(file) # Changed here
for page in pdf_reader.pages:
text += page.extract_text()
return text
def load_image(self, file_path):
image = Image.open(file_path)
text = pytesseract.image_to_string(image)
return text