| |
| from __future__ import annotations |
| from pathlib import Path |
| from typing import List, Optional, Tuple |
| import platform, shutil, tempfile, zipfile |
|
|
| from pdf2image import convert_from_path, convert_from_bytes |
|
|
| IS_WINDOWS = platform.system() == "Windows" |
| |
| POPPLER_PATH = r"C:\poppler-25.07.0\Library\bin" if IS_WINDOWS else None |
|
|
|
|
| def _ensure_poppler() -> None: |
| """Garante que o Poppler está disponível no ambiente.""" |
| if IS_WINDOWS: |
| p = Path(POPPLER_PATH or "") |
| if not p.exists(): |
| raise RuntimeError( |
| "Poppler não encontrado. Ajuste POPPLER_PATH para ...\\poppler-XX\\Library\\bin" |
| ) |
| else: |
| |
| if shutil.which("pdftoppm") is None: |
| raise RuntimeError( |
| "pdftoppm não encontrado. No Hugging Face inclua 'packages.txt' com 'poppler-utils'." |
| ) |
|
|
|
|
| def _ext(formato: str) -> str: |
| """Normaliza a extensão.""" |
| return "jpg" if formato.lower() in ("jpeg", "jpg") else formato.lower() |
|
|
|
|
| def _convert_bytes(data: bytes, formato: str, paginas: Optional[List[int]]): |
| fmt = _ext(formato) |
| if not paginas: |
| return convert_from_bytes(data, dpi=200, fmt=fmt, poppler_path=POPPLER_PATH) |
| |
| out = [] |
| for p in paginas: |
| img = convert_from_bytes( |
| data, dpi=200, fmt=fmt, first_page=p, last_page=p, poppler_path=POPPLER_PATH |
| )[0] |
| out.append(img) |
| return out |
|
|
|
|
| def _convert_path(pdf_path: Path, formato: str, paginas: Optional[List[int]]): |
| fmt = _ext(formato) |
| if not paginas: |
| return convert_from_path(str(pdf_path), dpi=200, fmt=fmt, poppler_path=POPPLER_PATH) |
| out = [] |
| for p in paginas: |
| img = convert_from_path( |
| str(pdf_path), dpi=200, fmt=fmt, first_page=p, last_page=p, poppler_path=POPPLER_PATH |
| )[0] |
| out.append(img) |
| return out |
|
|
|
|
| def processar_misto( |
| paths: List[Path], modo: str, paginas: Optional[List[int]], formato: str |
| ) -> Tuple[List[Tuple[str, str]], str]: |
| """ |
| paths: lista de caminhos (PDFs ou ZIPs contendo PDFs) |
| modo: "Extrair todas as páginas" | "Extrair páginas específicas" |
| paginas: lista de ints (ou None) quando modo = específicas |
| formato: "jpeg" | "png" | "bmp" | "ico" |
| Retorna: [(caminho_imagem_temp, legenda)], caminho_zip_temp |
| """ |
| _ensure_poppler() |
|
|
| usar_paginas = paginas if "Específicas" in modo else None |
| ext = _ext(formato) |
|
|
| galeria: List[Tuple[str, str]] = [] |
| tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip") |
| with zipfile.ZipFile(tmp_zip.name, "w", compression=zipfile.ZIP_DEFLATED) as zout: |
| for path in paths: |
| if path.suffix.lower() == ".zip": |
| with zipfile.ZipFile(path, "r") as zin: |
| for info in zin.infolist(): |
| if info.filename.lower().endswith(".pdf"): |
| data = zin.read(info.filename) |
| images = _convert_bytes(data, formato, usar_paginas) |
| stem_zip = Path(info.filename).stem.replace("/", "_") |
| for i, img in enumerate(images, 1): |
| img_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") |
| img.save(img_tmp.name) |
| galeria.append((img_tmp.name, f"{stem_zip} — p{i}")) |
| zout.write(img_tmp.name, arcname=f"{stem_zip}_p{i}.{ext}") |
| else: |
| images = _convert_path(path, formato, usar_paginas) |
| for i, img in enumerate(images, 1): |
| img_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") |
| img.save(img_tmp.name) |
| galeria.append((img_tmp.name, f"{path.stem} — p{i}")) |
| zout.write(img_tmp.name, arcname=f"{path.stem}_p{i}.{ext}") |
|
|
| return galeria, tmp_zip.name |
|
|