Spaces:

AI-DrivenTesting
/

CU1-X

Sleeping

CU1-X / ui /detection_wrapper.py

abdelkader

Fix Gradio JSON schema error by returning JSON as string

e585852 5 days ago

10.7 kB

	"""
	Detection Function Wrappers

	Provides unified detection function signatures for different backends:
	- Direct service access (for HF Spaces / local)
	- API client access (for production service-oriented architecture)

	This eliminates duplication of detection logic across app.py and ui/gradio_interface.py
	"""

	import os
	import requests
	import base64
	import io
	import json
	from PIL import Image
	from typing import Tuple, Optional
	import traceback

	from detection.service_factory import get_detection_service
	from detection import ocr_handler, response_builder


	def detect_with_service(
	image: Image.Image,
	confidence_threshold: float,
	line_thickness: int,
	enable_clip: bool,
	enable_ocr: bool,
	enable_blip: bool,
	ocr_only: bool,
	blip_scope_choice: str,
	preprocess: bool = False,
	preprocess_mode_choice: str = "RF-DETR Optimized (Recommended)",
	preprocess_preset: str = "standard"
	) -> Tuple[Optional[Image.Image], str, Optional[dict]]:
	"""
	Detect UI elements using detection service directly (no API)

	Used by: app.py (HF Spaces / local mode)

	Returns:
	Tuple of (annotated_image, summary_text, json_payload)
	"""
	try:
	if image is None:
	return None, "❌ Please upload an image first.", None

	# Map BLIP scope choice to internal value
	scope_value = "all" if (blip_scope_choice or "").lower().startswith("all") else "icons"

	# Map preprocessing mode choice to internal value
	preprocess_mode = "rfdetr" if "RF-DETR" in preprocess_mode_choice else "generic"

	# OCR-only path
	if ocr_only:
	detections = ocr_handler.process_ocr_only(image)
	annotated = ocr_handler.annotate_ocr_detections(
	image,
	detections,
	thickness=line_thickness,
	return_format="pil"
	)

	# Build analysis structure for simplified response
	analysis = {
	"detections": detections,
	"image_size": {"width": image.width, "height": image.height}
	}

	json_payload = response_builder.build_simplified_response(
	analysis=analysis,
	image=image,
	annotated_image=None,
	confidence_threshold=confidence_threshold,
	line_thickness=line_thickness,
	enable_clip=False,
	enable_ocr=True,
	enable_blip=False,
	blip_scope=None,
	ocr_only=True
	)

	detections_list = list(json_payload.get("detections", {}).values())
	summary_text = f"OCR-only mode\nTotal OCR texts: {len(detections_list)}"

	# Return JSON as string for Gradio compatibility
	return annotated, summary_text, json.dumps(json_payload, indent=2)

	# Standard detection path
	service = get_detection_service()

	# Run analysis (pass parameters directly to avoid race conditions)
	analysis = service.analyze(
	image,
	confidence_threshold=confidence_threshold,
	extract_text=enable_ocr,
	use_clip=enable_clip,
	use_blip=enable_blip,
	merge_global_ocr=True,
	blip_scope=scope_value,
	preprocess=preprocess,
	preprocess_mode=preprocess_mode,
	preprocess_preset=preprocess_preset
	)

	# Generate annotated image
	annotated = service.get_prediction_image(
	image,
	confidence_threshold=confidence_threshold,
	extract_content=True,
	thickness=line_thickness,
	return_format="pil",
	analysis=analysis
	)

	# Build JSON response using simplified format
	json_payload = response_builder.build_simplified_response(
	analysis=analysis,
	image=image,
	annotated_image=None, # Don't include in JSON (already have PIL image)
	confidence_threshold=confidence_threshold,
	line_thickness=line_thickness,
	enable_clip=enable_clip,
	enable_ocr=enable_ocr,
	enable_blip=enable_blip,
	blip_scope=scope_value,
	ocr_only=False
	)

	# Build summary text from detections
	detections_list = list(json_payload.get("detections", {}).values())
	summary_lines = [f"Total detections: {len(detections_list)}", ""]
	summary_lines.append("Settings:")
	summary_lines.append(f"- Confidence threshold: {confidence_threshold:.2f}")
	summary_lines.append(f"- CLIP classification: {'✅ Enabled' if enable_clip else '❌ Disabled'}")
	summary_lines.append(f"- OCR text extraction: {'✅ Enabled' if enable_ocr else '❌ Disabled'}")
	summary_lines.append(f"- BLIP description: {'✅ Enabled' if enable_blip else '❌ Disabled'}")
	summary_text = "\n".join(summary_lines)

	# Return JSON as string for Gradio compatibility
	return annotated, summary_text, json.dumps(json_payload, indent=2)

	except Exception as e:
	error_msg = f"""❌ Error during detection:

	```
	{str(e)}

	{traceback.format_exc()}
	```
	"""
	print(error_msg)
	return None, error_msg, None


	def detect_with_api(
	image: Image.Image,
	confidence_threshold: float,
	line_thickness: int,
	enable_clip: bool,
	enable_ocr: bool,
	enable_blip: bool,
	ocr_only: bool,
	blip_scope_choice: str,
	preprocess: bool = False,
	preprocess_mode_choice: str = "RF-DETR Optimized (Recommended)",
	preprocess_preset: str = "standard",
	api_url: str = "http://localhost:8000"
	) -> Tuple[Optional[Image.Image], str, Optional[dict]]:
	"""
	Detect UI elements by calling the API

	Used by: app_ui.py (service-oriented mode)

	Returns:
	Tuple of (annotated_image, summary_text, json_payload)
	"""
	try:
	if image is None:
	return None, "❌ Please upload an image first.", None

	# Map BLIP scope choice to internal value
	scope_value = "all" if (blip_scope_choice or "").lower().startswith("all") else "icons"

	# Map preprocessing mode choice to internal value
	preprocess_mode = "rfdetr" if "RF-DETR" in preprocess_mode_choice else "generic"

	# Prepare image for upload
	img_byte_arr = io.BytesIO()
	image.save(img_byte_arr, format='PNG')
	img_byte_arr.seek(0)

	# Prepare form data
	files = {
	'image': ('image.png', img_byte_arr, 'image/png')
	}
	data = {
	'confidence_threshold': confidence_threshold,
	'line_thickness': line_thickness,
	'enable_clip': str(enable_clip).lower(),
	'enable_ocr': str(enable_ocr).lower(),
	'enable_blip': str(enable_blip).lower(),
	'blip_scope': scope_value,
	'ocr_only': str(ocr_only).lower(),
	'preprocess': str(preprocess).lower(),
	'preprocess_mode': preprocess_mode,
	'preprocess_preset': preprocess_preset
	}

	# Call API with extended timeout for HuggingFace Spaces CPU processing
	# Default: 600s (10 minutes) to handle model loading on first run
	timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "600"))
	try:
	response = requests.post(
	f"{api_url}/detect",
	files=files,
	data=data,
	timeout=timeout_seconds
	)
	response.raise_for_status()
	except requests.exceptions.ConnectionError:
	return None, f"""❌ Connection Error

	Cannot connect to API server at `{api_url}`

	To fix this:
	1. Make sure the API server is running:
	```bash
	python app_api.py
	```
	2. The API should be accessible at http://localhost:8000
	3. Check that no firewall is blocking the connection

	Current API URL: {api_url}
	You can change this by setting the `CU1_API_URL` environment variable.
	""", None
	except requests.exceptions.Timeout:
	timeout_seconds = int(os.getenv("CU1_API_TIMEOUT", "600"))
	return None, f"""❌ Timeout Error

	The API request timed out after {timeout_seconds} seconds.

	Most likely cause: First-time model initialization on HuggingFace Spaces

	What to do:
	1. Wait 2-3 minutes and try again (models are loading in background)
	2. Check the "Logs" tab in HuggingFace Spaces to see progress
	3. If you see "[API] Starting detection..." in logs, the API is working

	For debugging:
	- Check if you see initialization messages in logs
	- Look for "Loading RF-DETR model..." or "Loading OCR reader..."
	- These operations can take 2-5 minutes on CPU the first time
	""", None
	except requests.exceptions.HTTPError as e:
	error_detail = "Unknown error"
	try:
	error_json = response.json()
	error_detail = error_json.get("detail", str(e))
	except:
	error_detail = str(e)
	return None, f"""❌ API Error ({response.status_code})

	{error_detail}

	API URL: {api_url}
	""", None

	# Parse response
	json_payload = response.json()

	if not json_payload.get("success", False):
	return None, f"❌ Detection failed: {json_payload.get('error', 'Unknown error')}", json_payload

	# Decode annotated image
	annotated_image = None
	if "annotated_image" in json_payload and json_payload["annotated_image"]:
	try:
	img_data = base64.b64decode(json_payload["annotated_image"]["base64"])
	annotated_image = Image.open(io.BytesIO(img_data))
	except Exception as e:
	print(f"Failed to decode annotated image: {e}")

	# Build summary text using response_builder
	summary_text = response_builder.format_summary_text(
	detections=json_payload.get("detections", []),
	parameters=json_payload.get("parameters", {}),
	ocr_only=json_payload.get("parameters", {}).get("ocr_only", False)
	)

	# Return JSON as string for Gradio compatibility
	return annotated_image, summary_text, json.dumps(json_payload, indent=2) if json_payload else None

	except Exception as e:
	error_msg = f"""❌ Error during detection:

	```
	{str(e)}

	{traceback.format_exc()}
	```

	API URL: {api_url}
	"""
	print(error_msg)
	return None, error_msg, None