Spaces:

AI-DrivenTesting
/

CU1-X

Configuration error

Abdelkader HASSINE commited on Nov 12, 2025

Commit

3038c10

1 Parent(s): 4fe5dcd

Deploy CU1-X to Hugging Face Spaces

- Multi-model AI pipeline (RF-DETR, CLIP, OCR, BLIP)
- Unified API architecture
- Gradio web interface
- Full model weights included via Git LFS
- Ready for production deployment

Files changed (5) hide show

api/endpoints.py +50 -0
app.py +31 -4
app_api.py +1 -0
ui/detection_wrapper.py +8 -4
ui/shared_interface.py +5 -1

api/endpoints.py CHANGED Viewed

@@ -51,6 +51,7 @@ async def root():
         "endpoints": {
             "/detect": "POST - Detect UI elements in an image",
             "/health": "GET - Health check",
             "/docs": "GET - Interactive API documentation"
         },
         "example": {
@@ -73,6 +74,40 @@ async def health_check():
     }
 @app.post("/detect")
 async def detect_ui_elements(
     image: UploadFile = File(..., description="Image file to process"),
@@ -172,9 +207,15 @@ async def detect_ui_elements(
             )
         # Standard detection path: Use detection service
         service = get_detection_service()
         # Run analysis (pass parameters directly to avoid race conditions)
         analysis = service.analyze(
             pil_image,
             confidence_threshold=confidence_threshold,
@@ -187,8 +228,12 @@ async def detect_ui_elements(
             preprocess_mode=preprocess_mode,
             preprocess_preset=preprocess_preset
         )
         # Generate annotated image
         annotated = service.get_prediction_image(
             pil_image,
             confidence_threshold=confidence_threshold,
@@ -197,6 +242,11 @@ async def detect_ui_elements(
             return_format="numpy",
             analysis=analysis
         )
         # Build response
         return response_builder.build_detection_response(

         "endpoints": {
             "/detect": "POST - Detect UI elements in an image",
             "/health": "GET - Health check",
+            "/warmup": "POST - Preload models to avoid timeout on first request",
             "/docs": "GET - Interactive API documentation"
         },
         "example": {
     }
+@app.post("/warmup")
+async def warmup_models():
+    """
+    Warmup endpoint to preload models before first detection request.
+    This helps avoid timeout on the first run.
+    """
+    try:
+        service = get_detection_service()
+        # Force loading of all models by accessing them
+        # RF-DETR is already loaded in __init__
+        service._load_ocr()  # Load OCR if enabled
+        service._load_clip()  # Load CLIP if enabled
+        service._load_blip()  # Load BLIP if enabled
+        return {
+            "status": "success",
+            "message": "Models warmed up successfully",
+            "models_loaded": {
+                "rfdetr": service.model is not None,
+                "ocr": service.ocr_reader is not None if service.enable_ocr else None,
+                "clip": service.clip_processor is not None if service.enable_clip else None,
+                "blip": service.blip_model is not None if service.enable_blip else None
+            }
+        }
+    except Exception as e:
+        import traceback
+        error_msg = f"Error during warmup: {str(e)}"
+        print(f"{error_msg}\n{traceback.format_exc()}")
+        return {
+            "status": "error",
+            "message": error_msg
+        }
 @app.post("/detect")
 async def detect_ui_elements(
     image: UploadFile = File(..., description="Image file to process"),
             )
         # Standard detection path: Use detection service
+        import time
+        start_time = time.time()
+        print(f"[API] Starting detection - Image size: {pil_image.size}, CLIP: {enable_clip}, OCR: {enable_ocr}, BLIP: {enable_blip}")
         service = get_detection_service()
         # Run analysis (pass parameters directly to avoid race conditions)
+        print(f"[API] Calling service.analyze()...")
+        analysis_start = time.time()
         analysis = service.analyze(
             pil_image,
             confidence_threshold=confidence_threshold,
             preprocess_mode=preprocess_mode,
             preprocess_preset=preprocess_preset
         )
+        analysis_time = time.time() - analysis_start
+        print(f"[API] service.analyze() completed in {analysis_time:.2f}s - Found {len(analysis.get('detections', []))} detections")
         # Generate annotated image
+        print(f"[API] Generating annotated image...")
+        annotated_start = time.time()
         annotated = service.get_prediction_image(
             pil_image,
             confidence_threshold=confidence_threshold,
             return_format="numpy",
             analysis=analysis
         )
+        annotated_time = time.time() - annotated_start
+        print(f"[API] Annotated image generated in {annotated_time:.2f}s")
+        total_time = time.time() - start_time
+        print(f"[API] Total detection time: {total_time:.2f}s")
         # Build response
         return response_builder.build_detection_response(

app.py CHANGED Viewed

@@ -73,6 +73,24 @@ def start_api_server():
             response = requests.get(f"{API_URL}/health", timeout=2)
             if response.status_code == 200:
                 print(f"✅ API server ready at {API_URL}")
                 return api_process
         except requests.exceptions.RequestException:
             pass
@@ -142,19 +160,28 @@ def main():
         # Launch Gradio with automatic port fallback
         # API is automatically exposed at /api/predict for HF Spaces
         try:
-            demo.queue().launch(
                 server_name=UI_HOST,
                 server_port=UI_PORT,
-                share=False
             )
         except OSError as e:
             if "Cannot find empty port" in str(e):
                 print(f"⚠️  Port {UI_PORT} is busy, trying to find a free port...")
-                demo.queue().launch(
                     server_name=UI_HOST,
                     server_port=None,  # Auto-select free port
-                    share=False
                 )
             else:
                 raise

             response = requests.get(f"{API_URL}/health", timeout=2)
             if response.status_code == 200:
                 print(f"✅ API server ready at {API_URL}")
+                # Optional: Warmup models to avoid timeout on first request
+                # This is especially useful for CPU-only environments
+                warmup_enabled = os.getenv("CU1_WARMUP_MODELS", "true").lower() in {"1", "true", "yes", "y"}
+                if warmup_enabled:
+                    print("🔥 Warming up models (this may take 1-3 minutes on first run)...")
+                    try:
+                        warmup_timeout = int(os.getenv("CU1_WARMUP_TIMEOUT", "180"))  # 3 minutes default
+                        warmup_response = requests.post(f"{API_URL}/warmup", timeout=warmup_timeout)
+                        if warmup_response.status_code == 200:
+                            print("✅ Models warmed up successfully!")
+                        else:
+                            print(f"⚠️  Warmup returned status {warmup_response.status_code}, continuing anyway...")
+                    except requests.exceptions.Timeout:
+                        print("⚠️  Warmup timed out, but API is ready. First request may be slower.")
+                    except requests.exceptions.RequestException as e:
+                        print(f"⚠️  Warmup failed: {e}, but API is ready. First request may be slower.")
                 return api_process
         except requests.exceptions.RequestException:
             pass
         # Launch Gradio with automatic port fallback
         # API is automatically exposed at /api/predict for HF Spaces
+        # Configure queue with longer timeout for CPU processing and model loading
         try:
+            demo.queue(
+                max_size=10,  # Allow up to 10 queued requests
+                default_concurrency_limit=1  # Process one at a time to avoid memory issues
+            ).launch(
                 server_name=UI_HOST,
                 server_port=UI_PORT,
+                share=False,
+                max_threads=1  # Single thread to avoid memory issues
             )
         except OSError as e:
             if "Cannot find empty port" in str(e):
                 print(f"⚠️  Port {UI_PORT} is busy, trying to find a free port...")
+                demo.queue(
+                    max_size=10,
+                    default_concurrency_limit=1
+                ).launch(
                     server_name=UI_HOST,
                     server_port=None,  # Auto-select free port
+                    share=False,
+                    max_threads=1
                 )
             else:
                 raise

app_api.py CHANGED Viewed

@@ -38,6 +38,7 @@ def main():
     print(f"  - Root:   http://localhost:{port}")
     print(f"  - Detect: http://localhost:{port}/detect")
     print(f"  - Health: http://localhost:{port}/health")
     print(f"  - Docs:   http://localhost:{port}/docs")
     print("\n💡 Tip: The Gradio UI connects to this API")
     print("  Run 'python app_ui.py' in another terminal")

     print(f"  - Root:   http://localhost:{port}")
     print(f"  - Detect: http://localhost:{port}/detect")
     print(f"  - Health: http://localhost:{port}/health")
+    print(f"  - Warmup: http://localhost:{port}/warmup (preload models)")
     print(f"  - Docs:   http://localhost:{port}/docs")
     print("\n💡 Tip: The Gradio UI connects to this API")
     print("  Run 'python app_ui.py' in another terminal")

ui/detection_wrapper.py CHANGED Viewed

@@ -200,12 +200,14 @@ def detect_with_api(
         }
         # Call API
         try:
             response = requests.post(
                 f"{api_url}/detect",
                 files=files,
                 data=data,
-                timeout=120
             )
             response.raise_for_status()
         except requests.exceptions.ConnectionError:
@@ -225,19 +227,21 @@ Cannot connect to API server at `{api_url}`
 You can change this by setting the `CU1_API_URL` environment variable.
 """, None
         except requests.exceptions.Timeout:
             return None, f"""❌ **Timeout Error**
-The API request timed out after 120 seconds.
 This might happen with:
 - Very large images
-- First run (models need to download)
 - CPU-only processing (slower than GPU)
 **Try:**
 - Using a smaller image
-- Waiting for model downloads to complete
 - Checking API server logs for errors
 """, None
         except requests.exceptions.HTTPError as e:
             error_detail = "Unknown error"

         }
         # Call API
+        # Use configurable timeout (default 300s = 5min for CPU processing and model loading)
+        timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "300"))
         try:
             response = requests.post(
                 f"{api_url}/detect",
                 files=files,
                 data=data,
+                timeout=timeout_seconds
             )
             response.raise_for_status()
         except requests.exceptions.ConnectionError:
 You can change this by setting the `CU1_API_URL` environment variable.
 """, None
         except requests.exceptions.Timeout:
+            timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "300"))
             return None, f"""❌ **Timeout Error**
+The API request timed out after {timeout_seconds} seconds.
 This might happen with:
 - Very large images
+- First run (models need to download - can take 2-5 minutes)
 - CPU-only processing (slower than GPU)
 **Try:**
 - Using a smaller image
+- Waiting for model downloads to complete (check API server logs)
 - Checking API server logs for errors
+- Increasing timeout: export CU1_API_TIMEOUT=600 (10 minutes)
 """, None
         except requests.exceptions.HTTPError as e:
             error_detail = "Unknown error"

ui/shared_interface.py CHANGED Viewed

@@ -7,6 +7,7 @@ different detection backends (direct service or API client).
 This eliminates code duplication between app.py and ui/gradio_interface.py
 """
 import gradio as gr
 from typing import Callable, Optional
@@ -261,6 +262,8 @@ def create_interface(
         # Connect detection button
         # api_name exposes this function as /api/predict endpoint for Hugging Face Spaces
         detect_button.click(
             fn=detection_fn,
             inputs=[
@@ -277,7 +280,8 @@ def create_interface(
                 preprocess_preset_dropdown
             ],
             outputs=[output_image, summary_output, json_output],
-            api_name="predict"  # Expose as /api/predict endpoint
         )
         # Build footer markdown

 This eliminates code duplication between app.py and ui/gradio_interface.py
 """
+import os
 import gradio as gr
 from typing import Callable, Optional
         # Connect detection button
         # api_name exposes this function as /api/predict endpoint for Hugging Face Spaces
+        # max_time increases Gradio's function timeout (default is 60s, we set to 300s = 5min)
+        max_time_seconds = int(os.getenv("GRADIO_MAX_TIME", "300"))  # 5 minutes default
         detect_button.click(
             fn=detection_fn,
             inputs=[
                 preprocess_preset_dropdown
             ],
             outputs=[output_image, summary_output, json_output],
+            api_name="predict",  # Expose as /api/predict endpoint
+            max_time=max_time_seconds  # Increase Gradio function timeout
         )
         # Build footer markdown