Spaces:

kamcio1989
/

anycoder-0c3bc5d9

Build error

File size: 7,905 Bytes

82c6a9d

import cv2
import numpy as np
import json
from typing import Tuple, List, Dict, Any
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def load_detection_models() -> Tuple[Any, Any, List[str]]:
    """
    Load face detection and object detection models.
    
    Returns:
        Tuple of (face_cascade, object_net, class_names)
    """
    try:
        # Load face detection cascade
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        
        # Load object detection model (MobileNet SSD)
        model_path = "MobileNetSSD_deploy.prototxt"
        weights_path = "MobileNetSSD_deploy.caffemodel"
        
        try:
            object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
        except:
            # If model files don't exist, create a dummy network
            logger.warning("Object detection model files not found. Using placeholder.")
            object_net = None
        
        # COCO class names
        class_names = [
            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
        ]
        
        return face_cascade, object_net, class_names
        
    except Exception as e:
        logger.error(f"Error loading models: {e}")
        return None, None, []

def process_image(
    image: np.ndarray,
    face_cascade: Any,
    object_net: Any,
    class_names: List[str],
    enable_face_detection: bool,
    enable_object_detection: bool,
    face_confidence: float,
    object_confidence: float
) -> Tuple[np.ndarray, List[Dict], List[Dict]]:
    """
    Process the input image for face and object detection.
    
    Args:
        image: Input image
        face_cascade: Face detection cascade
        object_net: Object detection network
        class_names: List of class names
        enable_face_detection: Whether to detect faces
        enable_object_detection: Whether to detect objects
        face_confidence: Face detection confidence threshold
        object_confidence: Object detection confidence threshold
        
    Returns:
        Tuple of (processed_image, face_results, object_results)
    """
    # Convert RGB to BGR for OpenCV processing
    if len(image.shape) == 3 and image.shape[2] == 3:
        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    else:
        image_bgr = image.copy()
    
    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
    
    face_results = []
    object_results = []
    
    # Face detection
    if enable_face_detection and face_cascade is not None:
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30)
        )
        
        for i, (x, y, w, h) in enumerate(faces):
            face_results.append({
                "id": i,
                "bbox": [int(x), int(y), int(w), int(h)],
                "confidence": 1.0,  # Haar cascade doesn't provide confidence
                "label": "face"
            })
    
    # Object detection
    if enable_object_detection and object_net is not None:
        try:
            h, w = image_bgr.shape[:2]
            blob = cv2.dnn.blobFromImage(
                image_bgr, 0.007843, (300, 300), 127.5
            )
            object_net.setInput(blob)
            detections = object_net.forward()
            
            for i in range(detections.shape[2]):
                confidence = detections[0, 0, i, 2]
                
                if confidence > object_confidence:
                    idx = int(detections[0, 0, i, 1])
                    if idx < len(class_names):
                        x1 = int(detections[0, 0, i, 3] * w)
                        y1 = int(detections[0, 0, i, 4] * h)
                        x2 = int(detections[0, 0, i, 5] * w)
                        y2 = int(detections[0, 0, i, 6] * h)
                        
                        object_results.append({
                            "id": i,
                            "bbox": [x1, y1, x2 - x1, y2 - y1],
                            "confidence": float(confidence),
                            "label": class_names[idx]
                        })
        except Exception as e:
            logger.warning(f"Object detection failed: {e}")
    
    return image, face_results, object_results

def draw_detections(
    image: np.ndarray,
    face_results: List[Dict],
    object_results: List[Dict],
    show_labels: bool,
    box_color: str
) -> np.ndarray:
    """
    Draw bounding boxes and labels on the image.
    
    Args:
        image: Input image
        face_results: Face detection results
        object_results: Object detection results
        show_labels: Whether to show labels
        box_color: Color for bounding boxes
        
    Returns:
        Image with drawn detections
    """
    # Convert to BGR for OpenCV drawing
    if len(image.shape) == 3 and image.shape[2] == 3:
        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    else:
        image_bgr = image.copy()
    
    # Color mapping
    color_map = {
        "red": (0, 0, 255),
        "green": (0, 255, 0),
        "blue": (255, 0, 0),
        "yellow": (0, 255, 255),
        "purple": (255, 0, 255),
        "orange": (0, 165, 255)
    }
    
    color = color_map.get(box_color, (0, 0, 255))
    
    # Draw face detections
    for face in face_results:
        x, y, w, h = face["bbox"]
        cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
        
        if show_labels:
            label = f"Face {face['id']}"
            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
            cv2.rectangle(
                image_bgr,
                (x, y - label_size[1] - 10),
                (x + label_size[0], y),
                color,
                -1
            )
            cv2.putText(
                image_bgr, label, (x, y - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
            )
    
    # Draw object detections
    for obj in object_results:
        x, y, w, h = obj["bbox"]
        cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
        
        if show_labels:
            label = f"{obj['label']}: {obj['confidence']:.2f}"
            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
            cv2.rectangle(
                image_bgr,
                (x, y - label_size[1] - 10),
                (x + label_size[0], y),
                color,
                -1
            )
            cv2.putText(
                image_bgr, label, (x, y - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
            )
    
    # Convert back to RGB
    return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

def format_results(results: List[Dict], result_type: str) -> str:
    """
    Format detection results as a readable string.
    
    Args:
        results: Detection results
        result_type: Type of results (face/object)
        
    Returns:
        Formatted string
    """
    if not results:
        return f"No {result_type}s detected"
    
    output = [f"Detected {len(results)} {result_type}s:"]
    for result in results:
        bbox = result["bbox"]
        output.append(
            f"  - {result_type.capitalize()} {result['id']}: "
            f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})"
        )
        if "confidence" in result:
            output.append(f"    Confidence: {result['confidence']:.2f}")
        if "label" in result and result["label"] != result_type:
            output.append(f"    Label: {result['label']}")
    
    return "\n".join(output)