Spaces:
Sleeping
Sleeping
| """ | |
| Shared Gradio Interface Factory | |
| This module provides a reusable Gradio interface factory that works with | |
| different detection backends (direct service or API client). | |
| This eliminates code duplication between app.py and ui/gradio_interface.py | |
| """ | |
| import gradio as gr | |
| from typing import Callable, Optional | |
| def _handle_ocr_only_toggle(is_ocr_only: bool): | |
| """ | |
| Update dependent controls when OCR-only mode is toggled. | |
| Returns tuple of updates for: | |
| - CLIP checkbox | |
| - OCR checkbox | |
| - BLIP checkbox | |
| - BLIP scope radio | |
| """ | |
| if is_ocr_only: | |
| return ( | |
| gr.update(value=False, interactive=False), | |
| gr.update(value=True, interactive=False), | |
| gr.update(value=False, interactive=False), | |
| gr.update(value="Only image & button", visible=False), | |
| ) | |
| return ( | |
| gr.update(interactive=True), | |
| gr.update(value=True, interactive=True), | |
| gr.update(interactive=True), | |
| gr.update(visible=False), | |
| ) | |
| def create_interface( | |
| detection_fn: Callable, | |
| title_suffix: str = "", | |
| show_api_info: bool = False, | |
| api_url: Optional[str] = None | |
| ) -> gr.Blocks: | |
| """ | |
| Create a Gradio interface with a pluggable detection function | |
| Args: | |
| detection_fn: Function that takes (image, confidence, thickness, clip, ocr, blip, ocr_only, blip_scope) | |
| and returns (annotated_image, summary, json_data) | |
| title_suffix: Additional text for the title | |
| show_api_info: Whether to show API connection info | |
| api_url: API URL to display (if show_api_info=True) | |
| Returns: | |
| Gradio Blocks interface | |
| """ | |
| with gr.Blocks(title="CU-1 UI Element Detector", theme=gr.themes.Soft()) as interface: | |
| # Build title markdown | |
| title_parts = [ | |
| "# π― CU-1 UI Element Detector", | |
| "", | |
| "Detect interactive elements in screenshots and UI mockups.", | |
| "", | |
| "**Multi-Model Pipeline:**", | |
| "- π **RF-DETR** detects all UI elements (single class detection)", | |
| "- π·οΈ **CLIP** classifies elements into 6 types (button, input, text, image, list_item, navigation)", | |
| "- π **OCR** extracts text content from detected elements", | |
| "- πΌοΈ **BLIP** generates visual descriptions for icons" | |
| ] | |
| if title_suffix: | |
| title_parts.append("") | |
| title_parts.append(f"**{title_suffix}**") | |
| if show_api_info and api_url: | |
| title_parts.append("") | |
| title_parts.append(f"**API:** Connected to `{api_url}`") | |
| gr.Markdown("\n".join(title_parts)) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image( | |
| type="pil", | |
| label="Upload Screenshot", | |
| height=400, | |
| sources=["upload"] | |
| ) | |
| with gr.Accordion("Detection Settings", open=True): | |
| confidence_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=0.9, | |
| value=0.35, | |
| step=0.05, | |
| label="Confidence Threshold", | |
| info="Lower = more elements detected" | |
| ) | |
| thickness_slider = gr.Slider( | |
| minimum=1, | |
| maximum=6, | |
| value=2, | |
| step=1, | |
| label="Box Line Thickness" | |
| ) | |
| with gr.Accordion("Feature Settings", open=True): | |
| clip_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Enable CLIP Classification", | |
| info="Classify elements into types (slower but more informative)" | |
| ) | |
| ocr_checkbox = gr.Checkbox( | |
| value=True, | |
| label="Enable OCR Text Extraction", | |
| info="Extract text content from elements" | |
| ) | |
| blip_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Enable BLIP Description", | |
| info="Generate visual descriptions for icons (slower)" | |
| ) | |
| ocr_only_checkbox = gr.Checkbox( | |
| value=False, | |
| label="OCR-only (skip detection/classification)", | |
| info="Run OCR across the whole image and return OCR boxes only" | |
| ) | |
| blip_scope_radio = gr.Radio( | |
| choices=["Only image & button", "All elements"], | |
| value="Only image & button", | |
| label="BLIP Scope", | |
| info="When to apply BLIP descriptions", | |
| visible=False | |
| ) | |
| with gr.Accordion("π¨ Preprocessing (Cross-Device Consistency)", open=False): | |
| preprocess_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Enable Image Preprocessing", | |
| info="Standardize screenshots from different devices (Samsung, Pixel, Oppo, etc.)" | |
| ) | |
| preprocess_mode_radio = gr.Radio( | |
| choices=["RF-DETR Optimized (Recommended)", "Generic (CLIP/OCR Focus)"], | |
| value="RF-DETR Optimized (Recommended)", | |
| label="Preprocessing Mode", | |
| info="RF-DETR: Preserves ImageNet normalization | Generic: Aggressive for OCR", | |
| visible=False | |
| ) | |
| preprocess_preset_dropdown = gr.Dropdown( | |
| choices=["gentle", "standard", "aggressive_denoise", "color_only"], | |
| value="standard", | |
| label="Preprocessing Preset", | |
| info="gentle=minimal | standard=balanced | aggressive_denoise=strong | color_only=colors", | |
| visible=False | |
| ) | |
| detect_button = gr.Button("π Detect Elements", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| output_image = gr.Image( | |
| type="pil", | |
| label="Detected Elements", | |
| height=400 | |
| ) | |
| summary_output = gr.Markdown(label="Detection Summary") | |
| with gr.Accordion("Raw Results (JSON)", open=False): | |
| json_output = gr.JSON(label="Detections JSON") | |
| with gr.Accordion("API Quickstart", open=False): | |
| api_docs = gr.Markdown( | |
| value="\n".join([ | |
| "#### Call the Detection API", | |
| "", | |
| "```bash", | |
| "curl -X POST \"https://your-space.hf.space/detect\" \\", | |
| " -H \"Authorization: Bearer <HF_TOKEN>\" \\", | |
| " -F \"image=@screenshot.png\" \\", | |
| " -F \"confidence_threshold=0.35\" \\", | |
| " -F \"enable_clip=true\" \\", | |
| " -F \"enable_ocr=true\"", | |
| "```", | |
| "", | |
| "```python", | |
| "import requests", | |
| "", | |
| "url = \"https://your-space.hf.space/detect\"", | |
| "headers = {\"Authorization\": \"Bearer <HF_TOKEN>\"}", | |
| "files = {\"image\": open(\"screenshot.png\", \"rb\")}", | |
| "data = {", | |
| " \"confidence_threshold\": 0.35,", | |
| " \"enable_clip\": \"true\",", | |
| " \"enable_ocr\": \"true\"", | |
| "}", | |
| "resp = requests.post(url, files=files, data=data, headers=headers, timeout=120)", | |
| "resp.raise_for_status()", | |
| "print(resp.json())", | |
| "```", | |
| "", | |
| "- Replace `your-space` with your Hugging Face Space slug.", | |
| "- Add the `Authorization` header for private Spaces.", | |
| "- Response payload includes bounding boxes, texts, and optional annotated image." | |
| ]) | |
| ) | |
| # Toggle BLIP scope visibility | |
| blip_checkbox.change( | |
| fn=lambda v: gr.update(visible=v), | |
| inputs=blip_checkbox, | |
| outputs=blip_scope_radio | |
| ) | |
| # Handle OCR-only toggle to disable/enable related controls | |
| ocr_only_checkbox.change( | |
| fn=_handle_ocr_only_toggle, | |
| inputs=ocr_only_checkbox, | |
| outputs=[clip_checkbox, ocr_checkbox, blip_checkbox, blip_scope_radio] | |
| ) | |
| # Toggle preprocessing options visibility | |
| def toggle_preprocess_options(enabled): | |
| return gr.update(visible=enabled), gr.update(visible=enabled) | |
| preprocess_checkbox.change( | |
| fn=toggle_preprocess_options, | |
| inputs=preprocess_checkbox, | |
| outputs=[preprocess_mode_radio, preprocess_preset_dropdown] | |
| ) | |
| # Update preset choices based on mode | |
| def update_preset_choices(mode): | |
| if "RF-DETR" in mode: | |
| return gr.update( | |
| choices=["gentle", "standard", "aggressive_denoise", "color_only"], | |
| value="standard", | |
| info="gentle=minimal | standard=balanced | aggressive_denoise=strong | color_only=colors" | |
| ) | |
| else: # Generic mode | |
| return gr.update( | |
| choices=["minimal", "standard", "aggressive", "ocr_optimized"], | |
| value="standard", | |
| info="minimal=light | standard=balanced | aggressive=maximum | ocr_optimized=best for text" | |
| ) | |
| preprocess_mode_radio.change( | |
| fn=update_preset_choices, | |
| inputs=preprocess_mode_radio, | |
| outputs=preprocess_preset_dropdown | |
| ) | |
| # Connect detection button | |
| detect_button.click( | |
| fn=detection_fn, | |
| inputs=[ | |
| input_image, | |
| confidence_slider, | |
| thickness_slider, | |
| clip_checkbox, | |
| ocr_checkbox, | |
| blip_checkbox, | |
| ocr_only_checkbox, | |
| blip_scope_radio, | |
| preprocess_checkbox, | |
| preprocess_mode_radio, | |
| preprocess_preset_dropdown | |
| ], | |
| outputs=[output_image, summary_output, json_output] | |
| ) | |
| # Build footer markdown | |
| footer_parts = [ | |
| "---", | |
| "### β‘ Performance Tips", | |
| "", | |
| "- **Fast mode** (CLIP β, OCR β ): ~30-40s - Good for text extraction", | |
| "- **Balanced mode** (CLIP β , OCR β ): ~50-60s - Full classification + text", | |
| "- **Ultra-fast mode** (CLIP β, OCR β): ~25-35s - Just bounding boxes", | |
| "", | |
| "### π¨ Cross-Device Preprocessing", | |
| "", | |
| "Testing on multiple devices (Samsung, Pixel, Oppo)? **Enable preprocessing** for consistent results!", | |
| "", | |
| "- **RF-DETR Optimized** (Recommended): Preserves ImageNet normalization, best for detection", | |
| "- **Generic Mode**: Aggressive normalization, best for OCR accuracy", | |
| "", | |
| "### ποΈ Architecture", | |
| "", | |
| "**Single-Class Detection:** RF-DETR detects generic \"UI elements\" (one class)", | |
| "**Multi-Class Classification:** CLIP classifies detections into 6 specific types" | |
| ] | |
| if show_api_info and api_url: | |
| footer_parts.extend([ | |
| "", | |
| "### π§ API Connection", | |
| "", | |
| f"This UI is a **client** of the API server at `{api_url}`", | |
| "", | |
| "**Communication:** HTTP/REST (multipart/form-data)", | |
| "**Separation:** UI layer is completely isolated from detection logic", | |
| "", | |
| "To change API endpoint:", | |
| "```bash", | |
| "export CU1_API_URL=http://your-api-server:8000", | |
| "python app_ui.py", | |
| "```" | |
| ]) | |
| else: | |
| footer_parts.extend([ | |
| "", | |
| "### π¦ Deployment", | |
| "", | |
| "This app uses direct detection service access (no API layer).", | |
| "Optimized for Hugging Face Spaces and local testing." | |
| ]) | |
| gr.Markdown("\n".join(footer_parts)) | |
| return interface | |