anycoder-0c3bc5d9 / models.py
kamcio1989's picture
Upload folder using huggingface_hub
82c6a9d verified
import cv2
import numpy as np
from typing import List, Dict, Tuple, Any
import logging
logger = logging.getLogger(__name__)
class FaceDetector:
"""Face detection using Haar Cascade classifiers."""
def __init__(self):
self.face_cascade = None
self.eye_cascade = None
self.smile_cascade = None
self.load_models()
def load_models(self):
"""Load Haar Cascade models."""
try:
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
self.eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
self.smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_smile.xml'
)
logger.info("Face detection models loaded successfully")
except Exception as e:
logger.error(f"Failed to load face detection models: {e}")
def detect_faces(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
"""
Detect faces in the input image.
Args:
image: Input image in BGR format
confidence_threshold: Not used for Haar cascade (always returns high confidence)
Returns:
List of face detection results
"""
if self.face_cascade is None:
return []
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = self.face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE
)
results = []
for i, (x, y, w, h) in enumerate(faces):
# Detect eyes within face region
roi_gray = gray[y:y+h, x:x+w]
eyes = self.eye_cascade.detectMultiScale(roi_gray) if self.eye_cascade is not None else []
# Detect smile within face region
smiles = self.smile_cascade.detectMultiScale(
roi_gray,
scaleFactor=1.7,
minNeighbors=22,
minSize=(25, 25)
) if self.smile_cascade is not None else []
results.append({
"id": i,
"bbox": [int(x), int(y), int(w), int(h)],
"confidence": 1.0, # Haar cascade doesn't provide confidence scores
"label": "face",
"features": {
"eyes_detected": len(eyes) if len(eyes) > 0 else 0,
"smile_detected": len(smiles) > 0
}
})
return results
class ObjectDetector:
"""Object detection using MobileNet SSD."""
def __init__(self):
self.net = None
self.classes = [
"background", "aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
self.load_model()
def load_model(self):
"""Load the MobileNet SSD model."""
try:
# Try to load the model (files may not exist in all environments)
model_path = "MobileNetSSD_deploy.prototxt"
weights_path = "MobileNetSSD_deploy.caffemodel"
self.net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
logger.info("Object detection model loaded successfully")
except:
logger.warning("Object detection model files not found. Using placeholder.")
self.net = None
def detect_objects(self, image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
"""
Detect objects in the input image.
Args:
image: Input image in BGR format
confidence_threshold: Minimum confidence for detection
Returns:
List of object detection results
"""
if self.net is None:
# Return placeholder detections for demo purposes
return self._placeholder_detections(image)
try:
h, w = image.shape[:2]
# Create blob from image
blob = cv2.dnn.blobFromImage(
image, 0.007843, (300, 300), 127.5
)
# Pass blob through the network
self.net.setInput(blob)
detections = self.net.forward()
results = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confidence_threshold:
idx = int(detections[0, 0, i, 1])
if idx < len(self.classes):
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
results.append({
"id": i,
"bbox": [x1, y1, x2 - x1, y2 - y1],
"confidence": float(confidence),
"label": self.classes[idx],
"class_id": idx
})
return results
except Exception as e:
logger.error(f"Object detection failed: {e}")
return []
def _placeholder_detections(self, image: np.ndarray) -> List[Dict]:
"""
Generate placeholder detections for demo when model is not available.
Args:
image: Input image
Returns:
Placeholder detection results
"""
h, w = image.shape[:2]
# Generate some random placeholder detections
placeholder_objects = [
{"label": "person", "confidence": 0.85, "size_factor": 0.3},
{"label": "car", "confidence": 0.75, "size_factor": 0.2},
{"label": "bottle", "confidence": 0.65, "size_factor": 0.1}
]
results = []
for i, obj in enumerate(placeholder_objects):
# Random position with size based on factor
size = int(min(h, w) * obj["size_factor"])
x = np.random.randint(0, max(1, w - size))
y = np.random.randint(0, max(1, h - size))
results.append({
"id": i,
"bbox": [x, y, size, size],
"confidence": obj["confidence"],
"label": obj["label"],
"class_id": i + 1,
"placeholder": True
})
return results
# Detector instances
_face_detector = None
_object_detector = None
def get_face_detector() -> FaceDetector:
"""Get or create face detector instance."""
global _face_detector
if _face_detector is None:
_face_detector = FaceDetector()
return _face_detector
def get_object_detector() -> ObjectDetector:
"""Get or create object detector instance."""
global _object_detector
if _object_detector is None:
_object_detector = ObjectDetector()
return _object_detector
def detect_faces(image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
"""
Detect faces using the global face detector.
Args:
image: Input image
confidence_threshold: Confidence threshold
Returns:
Face detection results
"""
detector = get_face_detector()
return detector.detect_faces(image, confidence_threshold)
def detect_objects(image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
"""
Detect objects using the global object detector.
Args:
image: Input image
confidence_threshold: Confidence threshold
Returns:
Object detection results
"""
detector = get_object_detector()
return detector.detect_objects(image, confidence_threshold)
def get_model_info() -> Dict[str, Any]:
"""
Get information about the loaded models.
Returns:
Dictionary with model information
"""
face_detector = get_face_detector()
object_detector = get_object_detector()
return {
"face_detector": {
"model_type": "Haar Cascade",
"loaded": face_detector.face_cascade is not None,
"features": ["face", "eyes", "smile"],
"input_format": "BGR",
"output_format": "bounding boxes"
},
"object_detector": {
"model_type": "MobileNet-SSD",
"loaded": object_detector.net is not None,
"num_classes": len(object_detector.classes),
"input_size": "300x300",
"output_format": "bounding boxes with confidence"
}
}