Spaces:
Sleeping
Sleeping
| """ | |
| Detection Function Wrappers | |
| Provides unified detection function signatures for different backends: | |
| - Direct service access (for HF Spaces / local) | |
| - API client access (for production service-oriented architecture) | |
| This eliminates duplication of detection logic across app.py and ui/gradio_interface.py | |
| """ | |
| import os | |
| import requests | |
| import base64 | |
| import io | |
| import json | |
| from PIL import Image | |
| from typing import Tuple, Optional | |
| import traceback | |
| from detection.service_factory import get_detection_service | |
| from detection import ocr_handler, response_builder | |
| def detect_with_service( | |
| image: Image.Image, | |
| confidence_threshold: float, | |
| line_thickness: int, | |
| enable_clip: bool, | |
| enable_ocr: bool, | |
| enable_blip: bool, | |
| ocr_only: bool, | |
| blip_scope_choice: str, | |
| preprocess: bool = False, | |
| preprocess_mode_choice: str = "RF-DETR Optimized (Recommended)", | |
| preprocess_preset: str = "standard" | |
| ) -> Tuple[Optional[Image.Image], str, Optional[dict]]: | |
| """ | |
| Detect UI elements using detection service directly (no API) | |
| Used by: app.py (HF Spaces / local mode) | |
| Returns: | |
| Tuple of (annotated_image, summary_text, json_payload) | |
| """ | |
| try: | |
| if image is None: | |
| return None, "β Please upload an image first.", None | |
| # Map BLIP scope choice to internal value | |
| scope_value = "all" if (blip_scope_choice or "").lower().startswith("all") else "icons" | |
| # Map preprocessing mode choice to internal value | |
| preprocess_mode = "rfdetr" if "RF-DETR" in preprocess_mode_choice else "generic" | |
| # OCR-only path | |
| if ocr_only: | |
| detections = ocr_handler.process_ocr_only(image) | |
| annotated = ocr_handler.annotate_ocr_detections( | |
| image, | |
| detections, | |
| thickness=line_thickness, | |
| return_format="pil" | |
| ) | |
| # Build analysis structure for simplified response | |
| analysis = { | |
| "detections": detections, | |
| "image_size": {"width": image.width, "height": image.height} | |
| } | |
| json_payload = response_builder.build_simplified_response( | |
| analysis=analysis, | |
| image=image, | |
| annotated_image=None, | |
| confidence_threshold=confidence_threshold, | |
| line_thickness=line_thickness, | |
| enable_clip=False, | |
| enable_ocr=True, | |
| enable_blip=False, | |
| blip_scope=None, | |
| ocr_only=True | |
| ) | |
| detections_list = list(json_payload.get("detections", {}).values()) | |
| summary_text = f"**OCR-only mode**\n**Total OCR texts:** {len(detections_list)}" | |
| # Return JSON as string for Gradio compatibility | |
| return annotated, summary_text, json.dumps(json_payload, indent=2) | |
| # Standard detection path | |
| service = get_detection_service() | |
| # Run analysis (pass parameters directly to avoid race conditions) | |
| analysis = service.analyze( | |
| image, | |
| confidence_threshold=confidence_threshold, | |
| extract_text=enable_ocr, | |
| use_clip=enable_clip, | |
| use_blip=enable_blip, | |
| merge_global_ocr=True, | |
| blip_scope=scope_value, | |
| preprocess=preprocess, | |
| preprocess_mode=preprocess_mode, | |
| preprocess_preset=preprocess_preset | |
| ) | |
| # Generate annotated image | |
| annotated = service.get_prediction_image( | |
| image, | |
| confidence_threshold=confidence_threshold, | |
| extract_content=True, | |
| thickness=line_thickness, | |
| return_format="pil", | |
| analysis=analysis | |
| ) | |
| # Build JSON response using simplified format | |
| json_payload = response_builder.build_simplified_response( | |
| analysis=analysis, | |
| image=image, | |
| annotated_image=None, # Don't include in JSON (already have PIL image) | |
| confidence_threshold=confidence_threshold, | |
| line_thickness=line_thickness, | |
| enable_clip=enable_clip, | |
| enable_ocr=enable_ocr, | |
| enable_blip=enable_blip, | |
| blip_scope=scope_value, | |
| ocr_only=False | |
| ) | |
| # Build summary text from detections | |
| detections_list = list(json_payload.get("detections", {}).values()) | |
| summary_lines = [f"**Total detections:** {len(detections_list)}", ""] | |
| summary_lines.append("**Settings:**") | |
| summary_lines.append(f"- Confidence threshold: {confidence_threshold:.2f}") | |
| summary_lines.append(f"- CLIP classification: {'β Enabled' if enable_clip else 'β Disabled'}") | |
| summary_lines.append(f"- OCR text extraction: {'β Enabled' if enable_ocr else 'β Disabled'}") | |
| summary_lines.append(f"- BLIP description: {'β Enabled' if enable_blip else 'β Disabled'}") | |
| summary_text = "\n".join(summary_lines) | |
| # Return JSON as string for Gradio compatibility | |
| return annotated, summary_text, json.dumps(json_payload, indent=2) | |
| except Exception as e: | |
| error_msg = f"""β **Error during detection:** | |
| ``` | |
| {str(e)} | |
| {traceback.format_exc()} | |
| ``` | |
| """ | |
| print(error_msg) | |
| return None, error_msg, None | |
| def detect_with_api( | |
| image: Image.Image, | |
| confidence_threshold: float, | |
| line_thickness: int, | |
| enable_clip: bool, | |
| enable_ocr: bool, | |
| enable_blip: bool, | |
| ocr_only: bool, | |
| blip_scope_choice: str, | |
| preprocess: bool = False, | |
| preprocess_mode_choice: str = "RF-DETR Optimized (Recommended)", | |
| preprocess_preset: str = "standard", | |
| api_url: str = "http://localhost:8000" | |
| ) -> Tuple[Optional[Image.Image], str, Optional[dict]]: | |
| """ | |
| Detect UI elements by calling the API | |
| Used by: app_ui.py (service-oriented mode) | |
| Returns: | |
| Tuple of (annotated_image, summary_text, json_payload) | |
| """ | |
| try: | |
| if image is None: | |
| return None, "β Please upload an image first.", None | |
| # Map BLIP scope choice to internal value | |
| scope_value = "all" if (blip_scope_choice or "").lower().startswith("all") else "icons" | |
| # Map preprocessing mode choice to internal value | |
| preprocess_mode = "rfdetr" if "RF-DETR" in preprocess_mode_choice else "generic" | |
| # Prepare image for upload | |
| img_byte_arr = io.BytesIO() | |
| image.save(img_byte_arr, format='PNG') | |
| img_byte_arr.seek(0) | |
| # Prepare form data | |
| files = { | |
| 'image': ('image.png', img_byte_arr, 'image/png') | |
| } | |
| data = { | |
| 'confidence_threshold': confidence_threshold, | |
| 'line_thickness': line_thickness, | |
| 'enable_clip': str(enable_clip).lower(), | |
| 'enable_ocr': str(enable_ocr).lower(), | |
| 'enable_blip': str(enable_blip).lower(), | |
| 'blip_scope': scope_value, | |
| 'ocr_only': str(ocr_only).lower(), | |
| 'preprocess': str(preprocess).lower(), | |
| 'preprocess_mode': preprocess_mode, | |
| 'preprocess_preset': preprocess_preset | |
| } | |
| # Call API with extended timeout for HuggingFace Spaces CPU processing | |
| # Default: 600s (10 minutes) to handle model loading on first run | |
| timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "600")) | |
| try: | |
| response = requests.post( | |
| f"{api_url}/detect", | |
| files=files, | |
| data=data, | |
| timeout=timeout_seconds | |
| ) | |
| response.raise_for_status() | |
| except requests.exceptions.ConnectionError: | |
| return None, f"""β **Connection Error** | |
| Cannot connect to API server at `{api_url}` | |
| **To fix this:** | |
| 1. Make sure the API server is running: | |
| ```bash | |
| python app_api.py | |
| ``` | |
| 2. The API should be accessible at http://localhost:8000 | |
| 3. Check that no firewall is blocking the connection | |
| **Current API URL:** {api_url} | |
| You can change this by setting the `CU1_API_URL` environment variable. | |
| """, None | |
| except requests.exceptions.Timeout: | |
| timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "600")) | |
| return None, f"""β **Timeout Error** | |
| The API request timed out after {timeout_seconds} seconds. | |
| **Most likely cause:** First-time model initialization on HuggingFace Spaces | |
| **What to do:** | |
| 1. Wait 2-3 minutes and try again (models are loading in background) | |
| 2. Check the "Logs" tab in HuggingFace Spaces to see progress | |
| 3. If you see "[API] Starting detection..." in logs, the API is working | |
| **For debugging:** | |
| - Check if you see initialization messages in logs | |
| - Look for "Loading RF-DETR model..." or "Loading OCR reader..." | |
| - These operations can take 2-5 minutes on CPU the first time | |
| """, None | |
| except requests.exceptions.HTTPError as e: | |
| error_detail = "Unknown error" | |
| try: | |
| error_json = response.json() | |
| error_detail = error_json.get("detail", str(e)) | |
| except: | |
| error_detail = str(e) | |
| return None, f"""β **API Error ({response.status_code})** | |
| {error_detail} | |
| **API URL:** {api_url} | |
| """, None | |
| # Parse response | |
| json_payload = response.json() | |
| if not json_payload.get("success", False): | |
| return None, f"β Detection failed: {json_payload.get('error', 'Unknown error')}", json_payload | |
| # Decode annotated image | |
| annotated_image = None | |
| if "annotated_image" in json_payload and json_payload["annotated_image"]: | |
| try: | |
| img_data = base64.b64decode(json_payload["annotated_image"]["base64"]) | |
| annotated_image = Image.open(io.BytesIO(img_data)) | |
| except Exception as e: | |
| print(f"Failed to decode annotated image: {e}") | |
| # Build summary text using response_builder | |
| summary_text = response_builder.format_summary_text( | |
| detections=json_payload.get("detections", []), | |
| parameters=json_payload.get("parameters", {}), | |
| ocr_only=json_payload.get("parameters", {}).get("ocr_only", False) | |
| ) | |
| # Return JSON as string for Gradio compatibility | |
| return annotated_image, summary_text, json.dumps(json_payload, indent=2) if json_payload else None | |
| except Exception as e: | |
| error_msg = f"""β **Error during detection:** | |
| ``` | |
| {str(e)} | |
| {traceback.format_exc()} | |
| ``` | |
| **API URL:** {api_url} | |
| """ | |
| print(error_msg) | |
| return None, error_msg, None | |