"""PDF text extraction utilities.""" import io from typing import Optional from PyPDF2 import PdfReader def extract_text_from_pdf(pdf_bytes: bytes) -> Optional[str]: """ Extract text content from a PDF file. Args: pdf_bytes: PDF file content as bytes Returns: Extracted text as string, or None if extraction fails """ try: pdf_file = io.BytesIO(pdf_bytes) reader = PdfReader(pdf_file) text_parts = [] for page in reader.pages: text = page.extract_text() if text: text_parts.append(text) full_text = "\n\n".join(text_parts) return full_text if full_text.strip() else None except Exception as e: print(f"Error extracting text from PDF: {e}") return None