Spaces:

AI-DrivenTesting
/

CU1-X

Sleeping

App Files Files Community

CU1-X / detection /response_builder.py

AI-DrivenTesting

init

77da9e2 about 1 month ago

raw

history blame

7.3 kB

	"""
	Response Builder - Standardized Response Formatting

	This module provides utilities for formatting detection results into
	standardized response formats for API and UI consumption.
	"""

	import base64
	import cv2
	import numpy as np
	from typing import Dict, List, Optional, Any
	from PIL import Image


	def build_detection_response(
	analysis: Dict,
	image: Image.Image,
	annotated_image: Optional[np.ndarray] = None,
	confidence_threshold: float = 0.35,
	line_thickness: int = 2,
	enable_clip: bool = False,
	enable_ocr: bool = True,
	enable_blip: bool = False,
	blip_scope: Optional[str] = None,
	ocr_only: bool = False,
	include_annotated_image: bool = True
	) -> Dict:
	"""
	Build standardized detection response for API/UI

	Args:
	analysis: Detection analysis results from DetectionService or OCR handler
	image: Original PIL Image
	annotated_image: Optional annotated image (numpy array, RGB)
	confidence_threshold: Confidence threshold used
	enable_clip: Whether CLIP classification was enabled
	enable_ocr: Whether OCR was enabled
	enable_blip: Whether BLIP was enabled
	blip_scope: BLIP scope ("icons" or "all")
	ocr_only: Whether this was OCR-only mode
	include_annotated_image: Whether to include base64-encoded annotated image

	Returns:
	Standardized response dictionary with detections, metadata, and parameters
	"""
	# Extract detections
	detections = analysis.get("detections", [])

	# Build type distribution if CLIP is enabled
	type_counts = None
	if enable_clip and not ocr_only:
	type_counts = build_type_distribution(detections)

	# Prepare response
	response = {
	"success": True,
	"detections": detections,
	"total_detections": len(detections),
	"image_size": analysis.get("image_size", {"width": image.width, "height": image.height}),
	"parameters": {
	"confidence_threshold": confidence_threshold,
	"line_thickness": line_thickness,
	"enable_clip": enable_clip if not ocr_only else False,
	"enable_ocr": enable_ocr if not ocr_only else False,
	"enable_blip": enable_blip if not ocr_only else False,
	"blip_scope": blip_scope if enable_blip and not ocr_only else None,
	"ocr_only": ocr_only
	},
	"type_distribution": type_counts
	}

	# Add annotated image if requested
	if include_annotated_image and annotated_image is not None:
	# Encode as base64 PNG
	img_bgr = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
	ok, png_bytes = cv2.imencode(".png", img_bgr)
	if ok:
	annotated_b64 = base64.b64encode(png_bytes.tobytes()).decode("ascii")
	response["annotated_image"] = {
	"mime": "image/png",
	"base64": annotated_b64
	}

	return response


	def build_type_distribution(detections: List[Dict]) -> Dict[str, int]:
	"""
	Build element type distribution from detections

	Args:
	detections: List of detection dictionaries with class_name field

	Returns:
	Dictionary mapping class names to counts
	"""
	type_counts = {}
	for det in detections:
	class_name = det.get("class_name", "")
	if class_name: # Only count if class_name is not empty
	type_counts[class_name] = type_counts.get(class_name, 0) + 1
	return type_counts


	def format_summary_text(
	detections: List[Dict],
	parameters: Dict,
	ocr_only: bool = False
	) -> str:
	"""
	Format detection results as markdown summary text for Gradio UI

	Args:
	detections: List of detection dictionaries
	parameters: Detection parameters used
	ocr_only: Whether this was OCR-only mode

	Returns:
	Markdown-formatted summary string
	"""
	lines = []

	if ocr_only:
	lines.append("OCR-only mode")
	lines.append(f"Total OCR texts: {len(detections)}")
	else:
	lines.append(f"Total detections: {len(detections)}")

	lines.append("")
	lines.append("Settings:")
	lines.append(f"- Confidence threshold: {parameters.get('confidence_threshold', 0.35):.2f}")

	enable_clip = parameters.get('enable_clip', False)
	enable_ocr = parameters.get('enable_ocr', True)
	enable_blip = parameters.get('enable_blip', False)
	blip_scope = parameters.get('blip_scope')
	line_thickness = parameters.get('line_thickness')

	lines.append(f"- CLIP classification: {'✅ Enabled' if enable_clip else '❌ Disabled'}")
	lines.append(f"- OCR text extraction: {'✅ Enabled' if enable_ocr or ocr_only else '❌ Disabled'}")
	if line_thickness is not None:
	lines.append(f"- Box line thickness: {line_thickness}")

	blip_text = f"- BLIP description: {'✅ Enabled' if enable_blip else '❌ Disabled'}"
	if enable_blip and blip_scope:
	scope_display = "All elements" if blip_scope == "all" else "Only image & button"
	blip_text += f" (scope: {scope_display})"
	lines.append(blip_text)

	# Add type distribution if CLIP is enabled
	if enable_clip and not ocr_only and len(detections) > 0:
	type_counts = build_type_distribution(detections)
	if type_counts:
	lines.append("")
	lines.append("Element types:")
	for typ, count in sorted(type_counts.items(), key=lambda x: -x[1]):
	lines.append(f"- {typ}: {count}")

	return "\n".join(lines)


	def build_ocr_only_response(
	detections: List[Dict],
	image_width: int,
	image_height: int,
	annotated_image: Optional[np.ndarray] = None,
	confidence_threshold: float = 0.35,
	line_thickness: int = 2
	) -> Dict:
	"""
	Build response specifically for OCR-only mode

	Args:
	detections: List of OCR detections
	image_width: Original image width
	image_height: Original image height
	annotated_image: Optional annotated image (numpy array, RGB)
	confidence_threshold: Confidence threshold (for consistency in response)

	Returns:
	OCR-only response dictionary
	"""
	response = {
	"success": True,
	"detections": detections,
	"total_detections": len(detections),
	"image_size": {"width": image_width, "height": image_height},
	"parameters": {
	"confidence_threshold": confidence_threshold,
	"line_thickness": line_thickness,
	"enable_clip": False,
	"enable_ocr": False, # Not using standard OCR flow
	"enable_blip": False,
	"blip_scope": None,
	"ocr_only": True
	},
	"type_distribution": None
	}

	# Add annotated image if provided
	if annotated_image is not None:
	img_bgr = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
	ok, png_bytes = cv2.imencode(".png", img_bgr)
	if ok:
	annotated_b64 = base64.b64encode(png_bytes.tobytes()).decode("ascii")
	response["annotated_image"] = {
	"mime": "image/png",
	"base64": annotated_b64
	}

	return response