Spaces:

kamcio1989
/

anycoder-0c3bc5d9

Build error

File size: 9,177 Bytes

82c6a9d

import cv2
import numpy as np
from typing import List, Dict, Tuple, Any
import logging

logger = logging.getLogger(__name__)

class FaceDetector:
    """Face detection using Haar Cascade classifiers."""
    
    def __init__(self):
        self.face_cascade = None
        self.eye_cascade = None
        self.smile_cascade = None
        self.load_models()
    
    def load_models(self):
        """Load Haar Cascade models."""
        try:
            self.face_cascade = cv2.CascadeClassifier(
                cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            )
            self.eye_cascade = cv2.CascadeClassifier(
                cv2.data.haarcascades + 'haarcascade_eye.xml'
            )
            self.smile_cascade = cv2.CascadeClassifier(
                cv2.data.haarcascades + 'haarcascade_smile.xml'
            )
            logger.info("Face detection models loaded successfully")
        except Exception as e:
            logger.error(f"Failed to load face detection models: {e}")
    
    def detect_faces(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
        """
        Detect faces in the input image.
        
        Args:
            image: Input image in BGR format
            confidence_threshold: Not used for Haar cascade (always returns high confidence)
            
        Returns:
            List of face detection results
        """
        if self.face_cascade is None:
            return []
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = self.face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )
        
        results = []
        for i, (x, y, w, h) in enumerate(faces):
            # Detect eyes within face region
            roi_gray = gray[y:y+h, x:x+w]
            eyes = self.eye_cascade.detectMultiScale(roi_gray) if self.eye_cascade is not None else []
            
            # Detect smile within face region
            smiles = self.smile_cascade.detectMultiScale(
                roi_gray, 
                scaleFactor=1.7,
                minNeighbors=22,
                minSize=(25, 25)
            ) if self.smile_cascade is not None else []
            
            results.append({
                "id": i,
                "bbox": [int(x), int(y), int(w), int(h)],
                "confidence": 1.0,  # Haar cascade doesn't provide confidence scores
                "label": "face",
                "features": {
                    "eyes_detected": len(eyes) if len(eyes) > 0 else 0,
                    "smile_detected": len(smiles) > 0
                }
            })
        
        return results

class ObjectDetector:
    """Object detection using MobileNet SSD."""
    
    def __init__(self):
        self.net = None
        self.classes = [
            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
        ]
        self.load_model()
    
    def load_model(self):
        """Load the MobileNet SSD model."""
        try:
            # Try to load the model (files may not exist in all environments)
            model_path = "MobileNetSSD_deploy.prototxt"
            weights_path = "MobileNetSSD_deploy.caffemodel"
            self.net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
            logger.info("Object detection model loaded successfully")
        except:
            logger.warning("Object detection model files not found. Using placeholder.")
            self.net = None
    
    def detect_objects(self, image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
        """
        Detect objects in the input image.
        
        Args:
            image: Input image in BGR format
            confidence_threshold: Minimum confidence for detection
            
        Returns:
            List of object detection results
        """
        if self.net is None:
            # Return placeholder detections for demo purposes
            return self._placeholder_detections(image)
        
        try:
            h, w = image.shape[:2]
            
            # Create blob from image
            blob = cv2.dnn.blobFromImage(
                image, 0.007843, (300, 300), 127.5
            )
            
            # Pass blob through the network
            self.net.setInput(blob)
            detections = self.net.forward()
            
            results = []
            for i in range(detections.shape[2]):
                confidence = detections[0, 0, i, 2]
                
                if confidence > confidence_threshold:
                    idx = int(detections[0, 0, i, 1])
                    
                    if idx < len(self.classes):
                        x1 = int(detections[0, 0, i, 3] * w)
                        y1 = int(detections[0, 0, i, 4] * h)
                        x2 = int(detections[0, 0, i, 5] * w)
                        y2 = int(detections[0, 0, i, 6] * h)
                        
                        results.append({
                            "id": i,
                            "bbox": [x1, y1, x2 - x1, y2 - y1],
                            "confidence": float(confidence),
                            "label": self.classes[idx],
                            "class_id": idx
                        })
            
            return results
            
        except Exception as e:
            logger.error(f"Object detection failed: {e}")
            return []
    
    def _placeholder_detections(self, image: np.ndarray) -> List[Dict]:
        """
        Generate placeholder detections for demo when model is not available.
        
        Args:
            image: Input image
            
        Returns:
            Placeholder detection results
        """
        h, w = image.shape[:2]
        
        # Generate some random placeholder detections
        placeholder_objects = [
            {"label": "person", "confidence": 0.85, "size_factor": 0.3},
            {"label": "car", "confidence": 0.75, "size_factor": 0.2},
            {"label": "bottle", "confidence": 0.65, "size_factor": 0.1}
        ]
        
        results = []
        for i, obj in enumerate(placeholder_objects):
            # Random position with size based on factor
            size = int(min(h, w) * obj["size_factor"])
            x = np.random.randint(0, max(1, w - size))
            y = np.random.randint(0, max(1, h - size))
            
            results.append({
                "id": i,
                "bbox": [x, y, size, size],
                "confidence": obj["confidence"],
                "label": obj["label"],
                "class_id": i + 1,
                "placeholder": True
            })
        
        return results

# Detector instances
_face_detector = None
_object_detector = None

def get_face_detector() -> FaceDetector:
    """Get or create face detector instance."""
    global _face_detector
    if _face_detector is None:
        _face_detector = FaceDetector()
    return _face_detector

def get_object_detector() -> ObjectDetector:
    """Get or create object detector instance."""
    global _object_detector
    if _object_detector is None:
        _object_detector = ObjectDetector()
    return _object_detector

def detect_faces(image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
    """
    Detect faces using the global face detector.
    
    Args:
        image: Input image
        confidence_threshold: Confidence threshold
        
    Returns:
        Face detection results
    """
    detector = get_face_detector()
    return detector.detect_faces(image, confidence_threshold)

def detect_objects(image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
    """
    Detect objects using the global object detector.
    
    Args:
        image: Input image
        confidence_threshold: Confidence threshold
        
    Returns:
        Object detection results
    """
    detector = get_object_detector()
    return detector.detect_objects(image, confidence_threshold)

def get_model_info() -> Dict[str, Any]:
    """
    Get information about the loaded models.
    
    Returns:
        Dictionary with model information
    """
    face_detector = get_face_detector()
    object_detector = get_object_detector()
    
    return {
        "face_detector": {
            "model_type": "Haar Cascade",
            "loaded": face_detector.face_cascade is not None,
            "features": ["face", "eyes", "smile"],
            "input_format": "BGR",
            "output_format": "bounding boxes"
        },
        "object_detector": {
            "model_type": "MobileNet-SSD",
            "loaded": object_detector.net is not None,
            "num_classes": len(object_detector.classes),
            "input_size": "300x300",
            "output_format": "bounding boxes with confidence"
        }
    }