luisabwk commited on
Commit
266a753
·
verified ·
1 Parent(s): 81c4003

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio app wrapping the official `commonforms` package to convert PDFs
2
+ into fillable forms using jbarrow's FFDNet-L object detector (CPU ONNX).
3
+
4
+ - Paper: <https://arxiv.org/abs/2509.16506>
5
+ - Model: <https://huggingface.co/jbarrow/FFDNet-L-cpu>
6
+ - Package: <https://pypi.org/project/commonforms/>
7
+
8
+ Detecta 3 classes de campos: text boxes, checkboxes (choice buttons) e signatures.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import os
13
+
14
+ # Força CPU antes de qualquer import que possa inicializar CUDA.
15
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
16
+ os.environ.setdefault("NVIDIA_VISIBLE_DEVICES", "")
17
+
18
+ import inspect
19
+ import tempfile
20
+ from pathlib import Path
21
+
22
+ import gradio as gr
23
+ from commonforms import prepare_form
24
+ from huggingface_hub import hf_hub_download
25
+
26
+ _PARAMS = inspect.signature(prepare_form).parameters
27
+ print(f"[commonforms] prepare_form signature: {list(_PARAMS.keys())}")
28
+
29
+ # Pre-baixa o ONNX uma vez no startup. O `commonforms` usa ultralytics YOLO
30
+ # por baixo, que só aceita caminho LOCAL no parâmetro `model_or_path`.
31
+ _MODEL_REPO = "jbarrow/FFDNet-L-cpu"
32
+ _MODEL_FILE = "FFDNet-L.onnx"
33
+ print(f"[commonforms] baixando {_MODEL_REPO}/{_MODEL_FILE}...")
34
+ _ONNX_PATH = hf_hub_download(repo_id=_MODEL_REPO, filename=_MODEL_FILE)
35
+ print(f"[commonforms] ONNX local: {_ONNX_PATH}")
36
+
37
+
38
+ def detect_fields(
39
+ pdf_path: str | None,
40
+ image_size: int,
41
+ use_signature_fields: bool,
42
+ keep_existing_fields: bool,
43
+ ) -> str:
44
+ if not pdf_path:
45
+ raise gr.Error("Envie um PDF.")
46
+
47
+ src = Path(pdf_path)
48
+ if not src.exists():
49
+ raise gr.Error(f"Arquivo não encontrado: {src}")
50
+
51
+ _, out_str = tempfile.mkstemp(suffix="_fillable.pdf")
52
+ out = Path(out_str)
53
+
54
+ optional = {
55
+ "image_size": int(image_size),
56
+ "use_signature_fields": bool(use_signature_fields),
57
+ "keep_existing_fields": bool(keep_existing_fields),
58
+ "device": "cpu",
59
+ "model_or_path": _ONNX_PATH,
60
+ }
61
+ accepted = {k: v for k, v in optional.items() if k in _PARAMS}
62
+ print(f"[commonforms] calling prepare_form with kwargs: {accepted}")
63
+
64
+ try:
65
+ prepare_form(str(src), str(out), **accepted)
66
+ except Exception as exc:
67
+ raise gr.Error(f"Falha ao processar PDF: {exc}") from exc
68
+
69
+ return str(out)
70
+
71
+
72
+ with gr.Blocks(title="CommonForms — Form Field Detector") as demo:
73
+ gr.Markdown(
74
+ "# CommonForms — Form Field Detector\n"
75
+ "Converte um PDF em formulário preenchível usando **FFDNet-L** "
76
+ "(`jbarrow/FFDNet-L-cpu`, Object Detection ONNX em CPU). "
77
+ "Detecta *text boxes*, *checkboxes* e *signature fields*.\n\n"
78
+ "Paper: [arxiv 2509.16506](<https://arxiv.org/abs/2509.16506>) · "
79
+ "Modelo: [jbarrow/FFDNet-L-cpu](<https://huggingface.co/jbarrow/FFDNet-L-cpu>)"
80
+ )
81
+ with gr.Row():
82
+ with gr.Column():
83
+ pdf_in = gr.File(
84
+ label="PDF de entrada",
85
+ file_types=[".pdf"],
86
+ type="filepath",
87
+ )
88
+ image_size = gr.Slider(
89
+ minimum=512,
90
+ maximum=2048,
91
+ value=1600,
92
+ step=32,
93
+ label="Image size (px)",
94
+ info="Tamanho usado na inferência. Maior = mais preciso, mais lento.",
95
+ )
96
+ use_sig = gr.Checkbox(
97
+ value=False,
98
+ label="Incluir signature fields",
99
+ info="Detecta áreas de assinatura além de text/checkbox.",
100
+ )
101
+ keep = gr.Checkbox(
102
+ value=False,
103
+ label="Manter campos já existentes",
104
+ info="Preserva widgets AcroForm que já estavam no PDF.",
105
+ )
106
+ btn = gr.Button("Detectar campos", variant="primary")
107
+ with gr.Column():
108
+ pdf_out = gr.File(label="PDF preenchível")
109
+
110
+ btn.click(
111
+ fn=detect_fields,
112
+ inputs=[pdf_in, image_size, use_sig, keep],
113
+ outputs=pdf_out,
114
+ api_name="detect",
115
+ )
116
+
117
+
118
+ if __name__ == "__main__":
119
+ demo.queue(max_size=4).launch()