Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import os | |
| from concurrent.futures import ThreadPoolExecutor | |
| import pillow_heif | |
| from pdf2image import convert_from_path | |
| INPUT_DIR = "downloads" | |
| OUTPUT_DIR = "images" | |
| PDF_DPI = 200 | |
| HEIF_QUALITY = 90 | |
| MAX_WORKERS = 10 | |
| pillow_heif.register_heif_opener() | |
| def pdf_to_heif(pdf_path: str, output_folder: str) -> None: | |
| os.makedirs(output_folder, exist_ok=True) | |
| print(f"Converting PDF: {pdf_path}") | |
| pages = convert_from_path(pdf_path, dpi=PDF_DPI) | |
| pdf_name = os.path.splitext(os.path.basename(pdf_path))[0] | |
| for page_index, page in enumerate(pages, start=1): | |
| filename = f"{pdf_name}_page_{page_index}.heif" | |
| filepath = os.path.join(output_folder, filename) | |
| page.save(filepath, "HEIF", quality=HEIF_QUALITY) | |
| print(f"Saved: {filename}") | |
| print(f"Converted {len(pages)} pages for {pdf_name}.") | |
| def convert_single_pdf(filename: str) -> int: | |
| returni = 0 | |
| # prevents pdf from converting | |
| if not filename.lower().endswith(".pdf"): | |
| return returni | |
| pdf_stem = os.path.splitext(filename)[0] | |
| output_path = os.path.join(OUTPUT_DIR, pdf_stem) | |
| pdf_path = os.path.join(INPUT_DIR, filename) | |
| if os.path.isdir(output_path) and os.listdir(output_path): | |
| print(f"Skipping {filename}: output already exists.") | |
| return returni | |
| try: | |
| pdf_to_heif(pdf_path, output_path) | |
| returni = 1 | |
| except Exception as exc: | |
| print(f"Failed conversion for {filename}: {exc}") | |
| return returni | |
| def run() -> None: | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| os.makedirs(INPUT_DIR, exist_ok=True) | |
| # Don't exclude already converted? | |
| files = sorted(os.listdir(INPUT_DIR)) | |
| print(f"Found {len(files)} files in {INPUT_DIR}.") | |
| with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: | |
| executor.map(convert_single_pdf, files) | |
| print("Conversion process completed.") | |
| if __name__ == "__main__": | |
| run() | |