import fitz # PyMuPDF from typing import List, Dict, Any import logging logger = logging.getLogger(__name__) def extract_pdf_text(pdf_path: str) -> str: """Extract all text from a PDF file""" try: doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text() doc.close() return text except Exception as e: logger.error(f"Error extracting PDF text: {e}") return "" def get_pdf_metadata(pdf_path: str) -> Dict[str, Any]: """Get detailed metadata from PDF""" try: doc = fitz.open(pdf_path) metadata = doc.metadata doc.close() return metadata except Exception as e: logger.error(f"Error getting PDF metadata: {e}") return {} def count_pdf_pages(pdf_path: str) -> int: """Count the number of pages in a PDF""" try: doc = fitz.open(pdf_path) page_count = len(doc) doc.close() return page_count except Exception as e: logger.error(f"Error counting PDF pages: {e}") return 0 def split_pdf(pdf_path: str, output_dir: str, pages_per_file: int = 1) -> List[str]: """Split PDF into multiple files""" try: doc = fitz.open(pdf_path) output_files = [] for i in range(0, len(doc), pages_per_file): new_doc = fitz.open() new_doc.insert_pdf(doc, from_page=i, to_page=min(i + pages_per_file - 1, len(doc) - 1)) output_path = os.path.join(output_dir, f"split_{i//pages_per_file + 1}.pdf") new_doc.save(output_path) new_doc.close() output_files.append(output_path) doc.close() return output_files except Exception as e: logger.error(f"Error splitting PDF: {e}") return [] def merge_pdfs(pdf_paths: List[str], output_path: str) -> bool: """Merge multiple PDF files into one""" try: merger = fitz.open() for pdf_path in pdf_paths: merger.insert_pdf(fitz.open(pdf_path)) merger.save(output_path) merger.close() return True except Exception as e: logger.error(f"Error merging PDFs: {e}") return False def rotate_pdf_pages(pdf_path: str, output_path: str, rotation: int = 90) -> bool: """Rotate all pages in a PDF by specified degrees""" try: doc = fitz.open(pdf_path) for page in doc: page.set_rotation(rotation) doc.save(output_path) doc.close() return True except Exception as e: logger.error(f"Error rotating PDF: {e}") return False def compress_pdf(pdf_path: str, output_path: str, quality: int = 80) -> bool: """Compress PDF file""" try: doc = fitz.open(pdf_path) for page in doc: pix = page.get_pixmap() img = page.get_pixmap() # This is a simplified compression - in real implementation, # you would use more sophisticated compression techniques page.set_pixmap(img) doc.save(output_path, garbage=4, deflate=True, clean=True) doc.close() return True except Exception as e: logger.error(f"Error compressing PDF: {e}") return False<|end_of_box|>