File size: 486 Bytes
96df7b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import fitz  # PyMuPDF
import logging

logger = logging.getLogger(__name__)

def extract_text_from_pdf(filepath: str) -> str:
    """Extract all text from a PDF file."""
    text = ""
    try:
        with fitz.open(filepath) as doc:
            for page in doc:
                text += page.get_text()
    except Exception as e:
        logger.error(f"PDF extraction error for {filepath}: {e}")
        raise ValueError(f"Could not extract text from PDF: {e}")
    return text.strip()