anycoder-0c3bc5d9 / utils.py
kamcio1989's picture
Upload folder using huggingface_hub
82c6a9d verified
import cv2
import numpy as np
import json
from typing import Tuple, List, Dict, Any
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_detection_models() -> Tuple[Any, Any, List[str]]:
"""
Load face detection and object detection models.
Returns:
Tuple of (face_cascade, object_net, class_names)
"""
try:
# Load face detection cascade
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Load object detection model (MobileNet SSD)
model_path = "MobileNetSSD_deploy.prototxt"
weights_path = "MobileNetSSD_deploy.caffemodel"
try:
object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
except:
# If model files don't exist, create a dummy network
logger.warning("Object detection model files not found. Using placeholder.")
object_net = None
# COCO class names
class_names = [
"background", "aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
return face_cascade, object_net, class_names
except Exception as e:
logger.error(f"Error loading models: {e}")
return None, None, []
def process_image(
image: np.ndarray,
face_cascade: Any,
object_net: Any,
class_names: List[str],
enable_face_detection: bool,
enable_object_detection: bool,
face_confidence: float,
object_confidence: float
) -> Tuple[np.ndarray, List[Dict], List[Dict]]:
"""
Process the input image for face and object detection.
Args:
image: Input image
face_cascade: Face detection cascade
object_net: Object detection network
class_names: List of class names
enable_face_detection: Whether to detect faces
enable_object_detection: Whether to detect objects
face_confidence: Face detection confidence threshold
object_confidence: Object detection confidence threshold
Returns:
Tuple of (processed_image, face_results, object_results)
"""
# Convert RGB to BGR for OpenCV processing
if len(image.shape) == 3 and image.shape[2] == 3:
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
else:
image_bgr = image.copy()
gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
face_results = []
object_results = []
# Face detection
if enable_face_detection and face_cascade is not None:
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
for i, (x, y, w, h) in enumerate(faces):
face_results.append({
"id": i,
"bbox": [int(x), int(y), int(w), int(h)],
"confidence": 1.0, # Haar cascade doesn't provide confidence
"label": "face"
})
# Object detection
if enable_object_detection and object_net is not None:
try:
h, w = image_bgr.shape[:2]
blob = cv2.dnn.blobFromImage(
image_bgr, 0.007843, (300, 300), 127.5
)
object_net.setInput(blob)
detections = object_net.forward()
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > object_confidence:
idx = int(detections[0, 0, i, 1])
if idx < len(class_names):
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
object_results.append({
"id": i,
"bbox": [x1, y1, x2 - x1, y2 - y1],
"confidence": float(confidence),
"label": class_names[idx]
})
except Exception as e:
logger.warning(f"Object detection failed: {e}")
return image, face_results, object_results
def draw_detections(
image: np.ndarray,
face_results: List[Dict],
object_results: List[Dict],
show_labels: bool,
box_color: str
) -> np.ndarray:
"""
Draw bounding boxes and labels on the image.
Args:
image: Input image
face_results: Face detection results
object_results: Object detection results
show_labels: Whether to show labels
box_color: Color for bounding boxes
Returns:
Image with drawn detections
"""
# Convert to BGR for OpenCV drawing
if len(image.shape) == 3 and image.shape[2] == 3:
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
else:
image_bgr = image.copy()
# Color mapping
color_map = {
"red": (0, 0, 255),
"green": (0, 255, 0),
"blue": (255, 0, 0),
"yellow": (0, 255, 255),
"purple": (255, 0, 255),
"orange": (0, 165, 255)
}
color = color_map.get(box_color, (0, 0, 255))
# Draw face detections
for face in face_results:
x, y, w, h = face["bbox"]
cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
if show_labels:
label = f"Face {face['id']}"
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
cv2.rectangle(
image_bgr,
(x, y - label_size[1] - 10),
(x + label_size[0], y),
color,
-1
)
cv2.putText(
image_bgr, label, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
)
# Draw object detections
for obj in object_results:
x, y, w, h = obj["bbox"]
cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
if show_labels:
label = f"{obj['label']}: {obj['confidence']:.2f}"
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
cv2.rectangle(
image_bgr,
(x, y - label_size[1] - 10),
(x + label_size[0], y),
color,
-1
)
cv2.putText(
image_bgr, label, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
)
# Convert back to RGB
return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
def format_results(results: List[Dict], result_type: str) -> str:
"""
Format detection results as a readable string.
Args:
results: Detection results
result_type: Type of results (face/object)
Returns:
Formatted string
"""
if not results:
return f"No {result_type}s detected"
output = [f"Detected {len(results)} {result_type}s:"]
for result in results:
bbox = result["bbox"]
output.append(
f" - {result_type.capitalize()} {result['id']}: "
f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})"
)
if "confidence" in result:
output.append(f" Confidence: {result['confidence']:.2f}")
if "label" in result and result["label"] != result_type:
output.append(f" Label: {result['label']}")
return "\n".join(output)