Spaces:
Running
Running
| """ | |
| Widget Detector β Hugging Face Spaces Demo | |
| ========================================== | |
| Drag-and-drop a PDF or image to detect form widgets (text inputs, | |
| checkboxes, signatures) using YOLO11m fine-tuned on CommonForms. | |
| Features: | |
| - Visual bounding box overlay (Tab 1) | |
| - Raw JSON output for developers (Tab 2) | |
| - Download Fillable PDF β converts detections into interactive PDF form fields | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import json | |
| import tempfile | |
| from pathlib import Path | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| # βββ Colour palette ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CLASS_COLORS_BGR = { | |
| "text_input": (217, 144, 74), # blue (#4A90D9 β BGR) | |
| "choice_button": (60, 76, 231), # red (#E74C3C β BGR) | |
| "signature": (18, 156, 243), # gold (#F39C12 β BGR) | |
| } | |
| CLASS_EMOJIS = { | |
| "text_input": "π¦", | |
| "choice_button": "π₯", | |
| "signature": "π¨", | |
| } | |
| # Render DPI used for both visualization and fillable PDF coordinate mapping | |
| RENDER_DPI = 200 | |
| # βββ Global model (loaded once per worker) βββββββββββββββββββββββββββββββββββββ | |
| _detector = None | |
| def _get_detector(conf: float): | |
| """Return a cached WidgetDetector instance.""" | |
| global _detector | |
| if _detector is None: | |
| from widget_detector import WidgetDetector | |
| _detector = WidgetDetector(conf=conf, imgsz=640, device="cpu") | |
| else: | |
| _detector.conf = conf | |
| _detector.model.overrides["conf"] = conf | |
| return _detector | |
| # βββ Drawing helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _draw_boxes(pil_img: Image.Image, widgets: list) -> Image.Image: | |
| """Draw coloured bounding boxes + labels on a PIL image.""" | |
| img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) | |
| h, w = img.shape[:2] | |
| font_scale = max(0.45, w / 2200) | |
| thickness = max(2, w // 800) | |
| for widget in widgets: | |
| cls = widget.class_name | |
| conf = widget.confidence | |
| x1, y1, x2, y2 = ( | |
| int(widget.bbox.x1), int(widget.bbox.y1), | |
| int(widget.bbox.x2), int(widget.bbox.y2), | |
| ) | |
| color = CLASS_COLORS_BGR.get(cls, (128, 128, 128)) | |
| cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness) | |
| label = f"{cls} {conf:.0%}" | |
| (tw, th), baseline = cv2.getTextSize( | |
| label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1 | |
| ) | |
| label_y = max(y1, th + baseline + 4) | |
| cv2.rectangle( | |
| img, | |
| (x1, label_y - th - baseline - 4), | |
| (x1 + tw + 4, label_y), | |
| color, -1, | |
| ) | |
| cv2.putText( | |
| img, label, (x1 + 2, label_y - baseline - 2), | |
| cv2.FONT_HERSHEY_SIMPLEX, font_scale, | |
| (255, 255, 255), 1, cv2.LINE_AA, | |
| ) | |
| return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
| # βββ Fillable PDF generator ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def create_fillable_pdf(state: dict | None) -> str | None: | |
| """ | |
| Convert detected widgets into a fillable PDF with interactive form fields: | |
| - text_input β PDF TextField (blue tint, typeable) | |
| - choice_button β PDF CheckBox (red border, clickable) | |
| - signature β PDF Signature (gold tint) | |
| Coordinate mapping: bboxes are in pixels at RENDER_DPI. | |
| PDF uses points (1 pt = 1/72 inch), so scale = 72 / RENDER_DPI. | |
| """ | |
| if state is None or "result" not in state: | |
| return None | |
| try: | |
| import fitz # PyMuPDF | |
| except ImportError: | |
| return None | |
| result = state["result"] | |
| file_path = Path(state["file_path"]) | |
| is_pdf_ = state["is_pdf"] | |
| scale = 72.0 / RENDER_DPI # pixel β PDF point | |
| # ββ Open or create the base PDF βββββββββββββββββββββββββββββββββββββββββββ | |
| if is_pdf_: | |
| doc = fitz.open(str(file_path)) | |
| else: | |
| # Build a PDF page from the image, sized to match the image pixels | |
| pil_img = Image.open(str(file_path)).convert("RGB") | |
| w_px, h_px = pil_img.size | |
| doc = fitz.open() | |
| page = doc.new_page(width=w_px * scale, height=h_px * scale) | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="PNG") | |
| buf.seek(0) | |
| page.insert_image(page.rect, stream=buf.read()) | |
| # ββ Add form widgets to each page βββββββββββββββββββββββββββββββββββββββββ | |
| for page_idx, page_result in enumerate(result.pages): | |
| if page_idx >= len(doc): | |
| break | |
| page = doc[page_idx] | |
| for i, w in enumerate(page_result.widgets): | |
| cls = w.class_name | |
| x1 = w.bbox.x1 * scale | |
| y1 = w.bbox.y1 * scale | |
| x2 = w.bbox.x2 * scale | |
| y2 = w.bbox.y2 * scale | |
| rect = fitz.Rect(x1, y1, x2, y2) | |
| widget = fitz.Widget() | |
| widget.rect = rect | |
| if cls == "text_input": | |
| widget.field_type = fitz.PDF_WIDGET_TYPE_TEXT | |
| widget.field_name = f"text_p{page_idx}_{i}" | |
| widget.field_flags = 0 # single-line | |
| widget.text_fontsize = 9 | |
| widget.fill_color = (0.94, 0.97, 1.0) # light blue | |
| widget.border_color = (0.29, 0.56, 0.89) | |
| widget.border_width = 1.0 | |
| elif cls == "choice_button": | |
| widget.field_type = fitz.PDF_WIDGET_TYPE_CHECKBOX | |
| widget.field_name = f"check_p{page_idx}_{i}" | |
| widget.field_value = "Off" | |
| widget.fill_color = (1.0, 1.0, 1.0) | |
| widget.border_color = (0.91, 0.30, 0.24) | |
| widget.border_width = 1.5 | |
| elif cls == "signature": | |
| widget.field_type = fitz.PDF_WIDGET_TYPE_SIGNATURE | |
| widget.field_name = f"sig_p{page_idx}_{i}" | |
| widget.fill_color = (1.0, 0.98, 0.90) # light gold | |
| widget.border_color = (0.95, 0.61, 0.07) | |
| widget.border_width = 1.0 | |
| else: | |
| continue | |
| page.add_widget(widget) | |
| # ββ Save to a named temp file (Gradio serves it as download) ββββββββββββββ | |
| tmp = tempfile.NamedTemporaryFile( | |
| suffix=".pdf", delete=False, prefix="fillable_form_" | |
| ) | |
| doc.save(tmp.name, garbage=4, deflate=True) | |
| doc.close() | |
| return tmp.name | |
| # βββ Core inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_inference(file_obj, conf: float, high_quality: bool): | |
| """Main inference function called by Gradio. | |
| Returns: gallery, summary_md, json_str, state_dict | |
| """ | |
| if not file_obj: | |
| return [], "No file uploaded.", "{}", None | |
| detector = _get_detector(conf) | |
| detector.model.overrides["imgsz"] = 1024 if high_quality else 640 | |
| file_path = Path(file_obj) | |
| try: | |
| result = detector.detect_path(str(file_path)) | |
| except Exception as exc: | |
| return [], f"β Inference error: {exc}", "{}", None | |
| from widget_detector.pdf_utils import is_pdf, pdf_to_images, image_to_pil | |
| is_pdf_flag = is_pdf(file_path) | |
| if is_pdf_flag: | |
| source_images = [img for img, _ in pdf_to_images(file_path, dpi=RENDER_DPI)] | |
| else: | |
| source_images = [image_to_pil(file_path)] | |
| # ββ Visualizations ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gallery_images = [] | |
| for page_result, pil_img in zip(result.pages, source_images): | |
| gallery_images.append(_draw_boxes(pil_img, page_result.widgets)) | |
| # ββ Summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| counts = {"text_input": 0, "choice_button": 0, "signature": 0} | |
| for page in result.pages: | |
| for w in page.widgets: | |
| counts[w.class_name] = counts.get(w.class_name, 0) + 1 | |
| summary_lines = [ | |
| f"### β Detected **{result.total_widgets}** widgets across **{result.total_pages}** page(s)\n", | |
| "| Class | Count |", | |
| "|---|---|", | |
| ] | |
| for cls, count in counts.items(): | |
| emoji = CLASS_EMOJIS.get(cls, "β’") | |
| summary_lines.append(f"| {emoji} `{cls}` | **{count}** |") | |
| summary_md = "\n".join(summary_lines) | |
| # ββ JSON ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| json_str = json.dumps(result.model_dump(), indent=2) | |
| # ββ State (passed to fillable PDF generator) ββββββββββββββββββββββββββββββ | |
| state = { | |
| "result": result, | |
| "file_path": str(file_path), | |
| "is_pdf": is_pdf_flag, | |
| } | |
| return gallery_images, summary_md, json_str, state | |
| # βββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DESCRIPTION = """ | |
| <div style="text-align:center; padding: 12px 0 4px 0"> | |
| <h1 style="font-size:2rem; margin-bottom:4px">π Widget Detector</h1> | |
| <p style="font-size:1.05rem; color:#666; margin-top:0"> | |
| Detect form fields in scanned PDFs and document images using <b>YOLO11m</b> | |
| fine-tuned on the <a href="https://huggingface.co/datasets/jbarrow/CommonForms" target="_blank">CommonForms</a> dataset. | |
| </p> | |
| <p style="font-size:0.9rem; margin-top:6px"> | |
| π¦ <code>text_input</code> | | |
| π₯ <code>choice_button</code> (checkboxes / radio) | | |
| π¨ <code>signature</code> | |
| </p> | |
| <p style="font-size:0.85rem; color:#888"> | |
| π¦ <a href="https://pypi.org/project/psynx-widget-detector/" target="_blank">pip install psynx-widget-detector</a> | | |
| π€ <a href="https://huggingface.co/PSynx/widget-detector-yolo" target="_blank">Model Card</a> | |
| </p> | |
| </div> | |
| """ | |
| with gr.Blocks( | |
| title="Widget Detector Demo", | |
| theme=gr.themes.Soft( | |
| primary_hue=gr.themes.colors.blue, | |
| secondary_hue=gr.themes.colors.indigo, | |
| font=[gr.themes.GoogleFont("Inter"), "sans-serif"], | |
| ), | |
| css=""" | |
| .contain { max-width: 1100px; margin: 0 auto; } | |
| #output-gallery img { border-radius: 8px; } | |
| .summary-box { background: #f8f9ff; border-radius: 8px; padding: 12px; } | |
| .fillable-section { background: #f0fdf4; border-radius: 8px; padding: 12px; | |
| border: 1px solid #bbf7d0; margin-top: 8px; } | |
| footer { display: none !important; } | |
| """, | |
| ) as demo: | |
| # Shared state between inference run and fillable PDF generation | |
| inference_state = gr.State(None) | |
| gr.HTML(DESCRIPTION) | |
| with gr.Row(equal_height=False): | |
| # ββ Left column: Inputs βββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1, min_width=280): | |
| file_input = gr.File( | |
| label="Upload PDF or Image", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"], | |
| type="filepath", | |
| ) | |
| conf_slider = gr.Slider( | |
| minimum=0.10, maximum=0.90, step=0.05, | |
| value=0.35, label="Confidence Threshold", | |
| info="Lower = more detections (may include false positives)", | |
| ) | |
| hq_checkbox = gr.Checkbox( | |
| label="β‘ High Quality (1024px β slower on CPU)", | |
| value=False, | |
| ) | |
| run_btn = gr.Button("π Detect Widgets", variant="primary", size="lg") | |
| # ββ Fillable PDF section ββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div class="fillable-section"> | |
| <b>π₯ Fillable PDF Export</b><br> | |
| <span style="font-size:0.85rem;color:#555"> | |
| After detecting widgets, click below to download a fillable PDF | |
| with interactive text boxes, checkboxes, and signature fields | |
| placed exactly over the detected widget locations. | |
| </span> | |
| </div> | |
| """) | |
| pdf_btn = gr.Button( | |
| "π₯ Download Fillable PDF", | |
| variant="secondary", | |
| size="lg", | |
| interactive=False, | |
| ) | |
| pdf_output = gr.File( | |
| label="Fillable PDF", | |
| visible=False, | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["examples/tt.pdf", 0.35, False], | |
| ["examples/mvatform1.pdf", 0.35, False], | |
| ["examples/new.pdf", 0.35, False], | |
| ], | |
| inputs=[file_input, conf_slider, hq_checkbox], | |
| label="π Example Files (click to load)", | |
| ) | |
| # ββ Right column: Outputs βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.TabItem("πΌοΈ Visual Output"): | |
| summary_md = gr.Markdown( | |
| "Upload a file and click **Detect Widgets** to see results.", | |
| elem_classes=["summary-box"], | |
| ) | |
| gallery = gr.Gallery( | |
| label="Detected Widgets", | |
| elem_id="output-gallery", | |
| columns=1, | |
| object_fit="contain", | |
| height=700, | |
| show_label=False, | |
| ) | |
| with gr.TabItem("{ } JSON Output"): | |
| gr.Markdown( | |
| "The raw JSON response β copy this to integrate the detector into your own app.", | |
| elem_classes=["summary-box"], | |
| ) | |
| json_output = gr.Code( | |
| language="json", | |
| label="Detection Result", | |
| lines=35, | |
| interactive=False, | |
| ) | |
| # ββ Inference click βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| run_btn.click( | |
| fn=run_inference, | |
| inputs=[file_input, conf_slider, hq_checkbox], | |
| outputs=[gallery, summary_md, json_output, inference_state], | |
| ).then( | |
| # Enable the PDF button after successful inference | |
| fn=lambda state: gr.update(interactive=state is not None), | |
| inputs=[inference_state], | |
| outputs=[pdf_btn], | |
| ) | |
| # ββ Fillable PDF click ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| pdf_btn.click( | |
| fn=create_fillable_pdf, | |
| inputs=[inference_state], | |
| outputs=[pdf_output], | |
| ).then( | |
| fn=lambda f: gr.update(visible=f is not None), | |
| inputs=[pdf_output], | |
| outputs=[pdf_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |