from __future__ import annotations import os from concurrent.futures import ThreadPoolExecutor import pillow_heif from pdf2image import convert_from_path INPUT_DIR = "downloads" OUTPUT_DIR = "images" PDF_DPI = 200 HEIF_QUALITY = 90 MAX_WORKERS = 10 pillow_heif.register_heif_opener() def pdf_to_heif(pdf_path: str, output_folder: str) -> None: os.makedirs(output_folder, exist_ok=True) print(f"Converting PDF: {pdf_path}") pages = convert_from_path(pdf_path, dpi=PDF_DPI) pdf_name = os.path.splitext(os.path.basename(pdf_path))[0] for page_index, page in enumerate(pages, start=1): filename = f"{pdf_name}_page_{page_index}.heif" filepath = os.path.join(output_folder, filename) page.save(filepath, "HEIF", quality=HEIF_QUALITY) print(f"Saved: {filename}") print(f"Converted {len(pages)} pages for {pdf_name}.") def convert_single_pdf(filename: str) -> int: returni = 0 # prevents pdf from converting if not filename.lower().endswith(".pdf"): return returni pdf_stem = os.path.splitext(filename)[0] output_path = os.path.join(OUTPUT_DIR, pdf_stem) pdf_path = os.path.join(INPUT_DIR, filename) if os.path.isdir(output_path) and os.listdir(output_path): print(f"Skipping {filename}: output already exists.") return returni try: pdf_to_heif(pdf_path, output_path) returni = 1 except Exception as exc: print(f"Failed conversion for {filename}: {exc}") return returni def run() -> None: os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True) # Don't exclude already converted? files = sorted(os.listdir(INPUT_DIR)) print(f"Found {len(files)} files in {INPUT_DIR}.") with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: executor.map(convert_single_pdf, files) print("Conversion process completed.") if __name__ == "__main__": run()