Spaces:

kamcio1989
/

anycoder-0c3bc5d9

Build error

App Files Files Community

kamcio1989 commited on Nov 24, 2025

Commit

82c6a9d

verified ·

1 Parent(s): d196433

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

models.py +269 -0
requirements.txt +1 -0
utils.py +239 -0

models.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import cv2
+import numpy as np
+from typing import List, Dict, Tuple, Any
+import logging
+logger = logging.getLogger(__name__)
+class FaceDetector:
+    """Face detection using Haar Cascade classifiers."""
+    def __init__(self):
+        self.face_cascade = None
+        self.eye_cascade = None
+        self.smile_cascade = None
+        self.load_models()
+    def load_models(self):
+        """Load Haar Cascade models."""
+        try:
+            self.face_cascade = cv2.CascadeClassifier(
+                cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+            )
+            self.eye_cascade = cv2.CascadeClassifier(
+                cv2.data.haarcascades + 'haarcascade_eye.xml'
+            )
+            self.smile_cascade = cv2.CascadeClassifier(
+                cv2.data.haarcascades + 'haarcascade_smile.xml'
+            )
+            logger.info("Face detection models loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load face detection models: {e}")
+    def detect_faces(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
+        """
+        Detect faces in the input image.
+        Args:
+            image: Input image in BGR format
+            confidence_threshold: Not used for Haar cascade (always returns high confidence)
+        Returns:
+            List of face detection results
+        """
+        if self.face_cascade is None:
+            return []
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        faces = self.face_cascade.detectMultiScale(
+            gray,
+            scaleFactor=1.1,
+            minNeighbors=5,
+            minSize=(30, 30),
+            flags=cv2.CASCADE_SCALE_IMAGE
+        )
+        results = []
+        for i, (x, y, w, h) in enumerate(faces):
+            # Detect eyes within face region
+            roi_gray = gray[y:y+h, x:x+w]
+            eyes = self.eye_cascade.detectMultiScale(roi_gray) if self.eye_cascade is not None else []
+            # Detect smile within face region
+            smiles = self.smile_cascade.detectMultiScale(
+                roi_gray,
+                scaleFactor=1.7,
+                minNeighbors=22,
+                minSize=(25, 25)
+            ) if self.smile_cascade is not None else []
+            results.append({
+                "id": i,
+                "bbox": [int(x), int(y), int(w), int(h)],
+                "confidence": 1.0,  # Haar cascade doesn't provide confidence scores
+                "label": "face",
+                "features": {
+                    "eyes_detected": len(eyes) if len(eyes) > 0 else 0,
+                    "smile_detected": len(smiles) > 0
+                }
+            })
+        return results
+class ObjectDetector:
+    """Object detection using MobileNet SSD."""
+    def __init__(self):
+        self.net = None
+        self.classes = [
+            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
+            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
+            "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+        ]
+        self.load_model()
+    def load_model(self):
+        """Load the MobileNet SSD model."""
+        try:
+            # Try to load the model (files may not exist in all environments)
+            model_path = "MobileNetSSD_deploy.prototxt"
+            weights_path = "MobileNetSSD_deploy.caffemodel"
+            self.net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
+            logger.info("Object detection model loaded successfully")
+        except:
+            logger.warning("Object detection model files not found. Using placeholder.")
+            self.net = None
+    def detect_objects(self, image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
+        """
+        Detect objects in the input image.
+        Args:
+            image: Input image in BGR format
+            confidence_threshold: Minimum confidence for detection
+        Returns:
+            List of object detection results
+        """
+        if self.net is None:
+            # Return placeholder detections for demo purposes
+            return self._placeholder_detections(image)
+        try:
+            h, w = image.shape[:2]
+            # Create blob from image
+            blob = cv2.dnn.blobFromImage(
+                image, 0.007843, (300, 300), 127.5
+            )
+            # Pass blob through the network
+            self.net.setInput(blob)
+            detections = self.net.forward()
+            results = []
+            for i in range(detections.shape[2]):
+                confidence = detections[0, 0, i, 2]
+                if confidence > confidence_threshold:
+                    idx = int(detections[0, 0, i, 1])
+                    if idx < len(self.classes):
+                        x1 = int(detections[0, 0, i, 3] * w)
+                        y1 = int(detections[0, 0, i, 4] * h)
+                        x2 = int(detections[0, 0, i, 5] * w)
+                        y2 = int(detections[0, 0, i, 6] * h)
+                        results.append({
+                            "id": i,
+                            "bbox": [x1, y1, x2 - x1, y2 - y1],
+                            "confidence": float(confidence),
+                            "label": self.classes[idx],
+                            "class_id": idx
+                        })
+            return results
+        except Exception as e:
+            logger.error(f"Object detection failed: {e}")
+            return []
+    def _placeholder_detections(self, image: np.ndarray) -> List[Dict]:
+        """
+        Generate placeholder detections for demo when model is not available.
+        Args:
+            image: Input image
+        Returns:
+            Placeholder detection results
+        """
+        h, w = image.shape[:2]
+        # Generate some random placeholder detections
+        placeholder_objects = [
+            {"label": "person", "confidence": 0.85, "size_factor": 0.3},
+            {"label": "car", "confidence": 0.75, "size_factor": 0.2},
+            {"label": "bottle", "confidence": 0.65, "size_factor": 0.1}
+        ]
+        results = []
+        for i, obj in enumerate(placeholder_objects):
+            # Random position with size based on factor
+            size = int(min(h, w) * obj["size_factor"])
+            x = np.random.randint(0, max(1, w - size))
+            y = np.random.randint(0, max(1, h - size))
+            results.append({
+                "id": i,
+                "bbox": [x, y, size, size],
+                "confidence": obj["confidence"],
+                "label": obj["label"],
+                "class_id": i + 1,
+                "placeholder": True
+            })
+        return results
+# Detector instances
+_face_detector = None
+_object_detector = None
+def get_face_detector() -> FaceDetector:
+    """Get or create face detector instance."""
+    global _face_detector
+    if _face_detector is None:
+        _face_detector = FaceDetector()
+    return _face_detector
+def get_object_detector() -> ObjectDetector:
+    """Get or create object detector instance."""
+    global _object_detector
+    if _object_detector is None:
+        _object_detector = ObjectDetector()
+    return _object_detector
+def detect_faces(image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
+    """
+    Detect faces using the global face detector.
+    Args:
+        image: Input image
+        confidence_threshold: Confidence threshold
+    Returns:
+        Face detection results
+    """
+    detector = get_face_detector()
+    return detector.detect_faces(image, confidence_threshold)
+def detect_objects(image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
+    """
+    Detect objects using the global object detector.
+    Args:
+        image: Input image
+        confidence_threshold: Confidence threshold
+    Returns:
+        Object detection results
+    """
+    detector = get_object_detector()
+    return detector.detect_objects(image, confidence_threshold)
+def get_model_info() -> Dict[str, Any]:
+    """
+    Get information about the loaded models.
+    Returns:
+        Dictionary with model information
+    """
+    face_detector = get_face_detector()
+    object_detector = get_object_detector()
+    return {
+        "face_detector": {
+            "model_type": "Haar Cascade",
+            "loaded": face_detector.face_cascade is not None,
+            "features": ["face", "eyes", "smile"],
+            "input_format": "BGR",
+            "output_format": "bounding boxes"
+        },
+        "object_detector": {
+            "model_type": "MobileNet-SSD",
+            "loaded": object_detector.net is not None,
+            "num_classes": len(object_detector.classes),
+            "input_size": "300x300",
+            "output_format": "bounding boxes with confidence"
+        }
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gradio>=4.0.0

utils.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import cv2
+import numpy as np
+import json
+from typing import Tuple, List, Dict, Any
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def load_detection_models() -> Tuple[Any, Any, List[str]]:
+    """
+    Load face detection and object detection models.
+    Returns:
+        Tuple of (face_cascade, object_net, class_names)
+    """
+    try:
+        # Load face detection cascade
+        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        # Load object detection model (MobileNet SSD)
+        model_path = "MobileNetSSD_deploy.prototxt"
+        weights_path = "MobileNetSSD_deploy.caffemodel"
+        try:
+            object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
+        except:
+            # If model files don't exist, create a dummy network
+            logger.warning("Object detection model files not found. Using placeholder.")
+            object_net = None
+        # COCO class names
+        class_names = [
+            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
+            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
+            "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+        ]
+        return face_cascade, object_net, class_names
+    except Exception as e:
+        logger.error(f"Error loading models: {e}")
+        return None, None, []
+def process_image(
+    image: np.ndarray,
+    face_cascade: Any,
+    object_net: Any,
+    class_names: List[str],
+    enable_face_detection: bool,
+    enable_object_detection: bool,
+    face_confidence: float,
+    object_confidence: float
+) -> Tuple[np.ndarray, List[Dict], List[Dict]]:
+    """
+    Process the input image for face and object detection.
+    Args:
+        image: Input image
+        face_cascade: Face detection cascade
+        object_net: Object detection network
+        class_names: List of class names
+        enable_face_detection: Whether to detect faces
+        enable_object_detection: Whether to detect objects
+        face_confidence: Face detection confidence threshold
+        object_confidence: Object detection confidence threshold
+    Returns:
+        Tuple of (processed_image, face_results, object_results)
+    """
+    # Convert RGB to BGR for OpenCV processing
+    if len(image.shape) == 3 and image.shape[2] == 3:
+        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    else:
+        image_bgr = image.copy()
+    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
+    face_results = []
+    object_results = []
+    # Face detection
+    if enable_face_detection and face_cascade is not None:
+        faces = face_cascade.detectMultiScale(
+            gray,
+            scaleFactor=1.1,
+            minNeighbors=5,
+            minSize=(30, 30)
+        )
+        for i, (x, y, w, h) in enumerate(faces):
+            face_results.append({
+                "id": i,
+                "bbox": [int(x), int(y), int(w), int(h)],
+                "confidence": 1.0,  # Haar cascade doesn't provide confidence
+                "label": "face"
+            })
+    # Object detection
+    if enable_object_detection and object_net is not None:
+        try:
+            h, w = image_bgr.shape[:2]
+            blob = cv2.dnn.blobFromImage(
+                image_bgr, 0.007843, (300, 300), 127.5
+            )
+            object_net.setInput(blob)
+            detections = object_net.forward()
+            for i in range(detections.shape[2]):
+                confidence = detections[0, 0, i, 2]
+                if confidence > object_confidence:
+                    idx = int(detections[0, 0, i, 1])
+                    if idx < len(class_names):
+                        x1 = int(detections[0, 0, i, 3] * w)
+                        y1 = int(detections[0, 0, i, 4] * h)
+                        x2 = int(detections[0, 0, i, 5] * w)
+                        y2 = int(detections[0, 0, i, 6] * h)
+                        object_results.append({
+                            "id": i,
+                            "bbox": [x1, y1, x2 - x1, y2 - y1],
+                            "confidence": float(confidence),
+                            "label": class_names[idx]
+                        })
+        except Exception as e:
+            logger.warning(f"Object detection failed: {e}")
+    return image, face_results, object_results
+def draw_detections(
+    image: np.ndarray,
+    face_results: List[Dict],
+    object_results: List[Dict],
+    show_labels: bool,
+    box_color: str
+) -> np.ndarray:
+    """
+    Draw bounding boxes and labels on the image.
+    Args:
+        image: Input image
+        face_results: Face detection results
+        object_results: Object detection results
+        show_labels: Whether to show labels
+        box_color: Color for bounding boxes
+    Returns:
+        Image with drawn detections
+    """
+    # Convert to BGR for OpenCV drawing
+    if len(image.shape) == 3 and image.shape[2] == 3:
+        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    else:
+        image_bgr = image.copy()
+    # Color mapping
+    color_map = {
+        "red": (0, 0, 255),
+        "green": (0, 255, 0),
+        "blue": (255, 0, 0),
+        "yellow": (0, 255, 255),
+        "purple": (255, 0, 255),
+        "orange": (0, 165, 255)
+    }
+    color = color_map.get(box_color, (0, 0, 255))
+    # Draw face detections
+    for face in face_results:
+        x, y, w, h = face["bbox"]
+        cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
+        if show_labels:
+            label = f"Face {face['id']}"
+            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
+            cv2.rectangle(
+                image_bgr,
+                (x, y - label_size[1] - 10),
+                (x + label_size[0], y),
+                color,
+                -1
+            )
+            cv2.putText(
+                image_bgr, label, (x, y - 5),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
+            )
+    # Draw object detections
+    for obj in object_results:
+        x, y, w, h = obj["bbox"]
+        cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
+        if show_labels:
+            label = f"{obj['label']}: {obj['confidence']:.2f}"
+            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
+            cv2.rectangle(
+                image_bgr,
+                (x, y - label_size[1] - 10),
+                (x + label_size[0], y),
+                color,
+                -1
+            )
+            cv2.putText(
+                image_bgr, label, (x, y - 5),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
+            )
+    # Convert back to RGB
+    return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
+def format_results(results: List[Dict], result_type: str) -> str:
+    """
+    Format detection results as a readable string.
+    Args:
+        results: Detection results
+        result_type: Type of results (face/object)
+    Returns:
+        Formatted string
+    """
+    if not results:
+        return f"No {result_type}s detected"
+    output = [f"Detected {len(results)} {result_type}s:"]
+    for result in results:
+        bbox = result["bbox"]
+        output.append(
+            f"  - {result_type.capitalize()} {result['id']}: "
+            f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})"
+        )
+        if "confidence" in result:
+            output.append(f"    Confidence: {result['confidence']:.2f}")
+        if "label" in result and result["label"] != result_type:
+            output.append(f"    Label: {result['label']}")
+    return "\n".join(output)