Spaces:
Sleeping
Sleeping
| """Gradio app wrapping the official `commonforms` package to convert PDFs | |
| into fillable forms using jbarrow's FFDNet-L object detector (CPU ONNX). | |
| - Paper: <https://arxiv.org/abs/2509.16506> | |
| - Model: <https://huggingface.co/jbarrow/FFDNet-L-cpu> | |
| - Package: <https://pypi.org/project/commonforms/> | |
| Detecta 3 classes de campos: text boxes, checkboxes (choice buttons) e signatures. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| # Força CPU antes de qualquer import que possa inicializar CUDA. | |
| os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") | |
| os.environ.setdefault("NVIDIA_VISIBLE_DEVICES", "") | |
| import inspect | |
| import tempfile | |
| from pathlib import Path | |
| import gradio as gr | |
| from commonforms import prepare_form | |
| from huggingface_hub import hf_hub_download | |
| _PARAMS = inspect.signature(prepare_form).parameters | |
| print(f"[commonforms] prepare_form signature: {list(_PARAMS.keys())}") | |
| # Pre-baixa o ONNX uma vez no startup. O `commonforms` usa ultralytics YOLO | |
| # por baixo, que só aceita caminho LOCAL no parâmetro `model_or_path`. | |
| _MODEL_REPO = "jbarrow/FFDNet-L-cpu" | |
| _MODEL_FILE = "FFDNet-L.onnx" | |
| print(f"[commonforms] baixando {_MODEL_REPO}/{_MODEL_FILE}...") | |
| _ONNX_PATH = hf_hub_download(repo_id=_MODEL_REPO, filename=_MODEL_FILE) | |
| print(f"[commonforms] ONNX local: {_ONNX_PATH}") | |
| def detect_fields( | |
| pdf_path: str | None, | |
| image_size: int, | |
| use_signature_fields: bool, | |
| keep_existing_fields: bool, | |
| ) -> str: | |
| if not pdf_path: | |
| raise gr.Error("Envie um PDF.") | |
| src = Path(pdf_path) | |
| if not src.exists(): | |
| raise gr.Error(f"Arquivo não encontrado: {src}") | |
| _, out_str = tempfile.mkstemp(suffix="_fillable.pdf") | |
| out = Path(out_str) | |
| optional = { | |
| "image_size": int(image_size), | |
| "use_signature_fields": bool(use_signature_fields), | |
| "keep_existing_fields": bool(keep_existing_fields), | |
| "device": "cpu", | |
| "model_or_path": _ONNX_PATH, | |
| } | |
| accepted = {k: v for k, v in optional.items() if k in _PARAMS} | |
| print(f"[commonforms] calling prepare_form with kwargs: {accepted}") | |
| try: | |
| prepare_form(str(src), str(out), **accepted) | |
| except Exception as exc: | |
| raise gr.Error(f"Falha ao processar PDF: {exc}") from exc | |
| return str(out) | |
| with gr.Blocks(title="CommonForms — Form Field Detector") as demo: | |
| gr.Markdown( | |
| "# CommonForms — Form Field Detector\n" | |
| "Converte um PDF em formulário preenchível usando **FFDNet-L** " | |
| "(`jbarrow/FFDNet-L-cpu`, Object Detection ONNX em CPU). " | |
| "Detecta *text boxes*, *checkboxes* e *signature fields*.\n\n" | |
| "Paper: [arxiv 2509.16506](<https://arxiv.org/abs/2509.16506>) · " | |
| "Modelo: [jbarrow/FFDNet-L-cpu](<https://huggingface.co/jbarrow/FFDNet-L-cpu>)" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_in = gr.File( | |
| label="PDF de entrada", | |
| file_types=[".pdf"], | |
| type="filepath", | |
| ) | |
| image_size = gr.Slider( | |
| minimum=512, | |
| maximum=2048, | |
| value=1600, | |
| step=32, | |
| label="Image size (px)", | |
| info="Tamanho usado na inferência. Maior = mais preciso, mais lento.", | |
| ) | |
| use_sig = gr.Checkbox( | |
| value=False, | |
| label="Incluir signature fields", | |
| info="Detecta áreas de assinatura além de text/checkbox.", | |
| ) | |
| keep = gr.Checkbox( | |
| value=False, | |
| label="Manter campos já existentes", | |
| info="Preserva widgets AcroForm que já estavam no PDF.", | |
| ) | |
| btn = gr.Button("Detectar campos", variant="primary") | |
| with gr.Column(): | |
| pdf_out = gr.File(label="PDF preenchível") | |
| btn.click( | |
| fn=detect_fields, | |
| inputs=[pdf_in, image_size, use_sig, keep], | |
| outputs=pdf_out, | |
| api_name="detect", | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=4).launch() | |