repro-0.2.0 / server /pdf_processor.py
Yusufarsh's picture
Upload folder using huggingface_hub
2ea6af1 verified
import fitz # PyMuPDF
def extract_text_from_pdf(file_path: str) -> str:
"""
Extracts text from a PDF file using PyMuPDF.
"""
text = ""
try:
doc = fitz.open(file_path)
for page in doc:
text += page.get_text()
doc.close()
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return ""
return text