import gradio as gr import numpy as np from PIL import Image import cv2 import json from typing import Tuple, List, Dict, Any import time from utils import draw_detections, process_image, load_detection_models from models import detect_faces, detect_objects # Load models at startup face_cascade, object_net, object_classes = load_detection_models() def recognize_face_and_objects( image: np.ndarray, enable_face_detection: bool, enable_object_detection: bool, face_confidence: float, object_confidence: float, draw_boxes: bool, show_labels: bool, box_color: str ) -> Tuple[np.ndarray, str, str]: """ Perform face and object detection on the input image. Args: image: Input image as numpy array enable_face_detection: Whether to detect faces enable_object_detection: Whether to detect objects face_confidence: Confidence threshold for face detection object_confidence: Confidence threshold for object detection draw_boxes: Whether to draw bounding boxes show_labels: Whether to show labels on detections box_color: Color for bounding boxes Returns: Tuple of (processed_image, face_results_json, object_results_json) """ if image is None: return None, "No image provided", "No image provided" # Convert PIL to numpy if needed if isinstance(image, Image.Image): image = np.array(image) # Process image processed_image, face_results, object_results = process_image( image, face_cascade, object_net, object_classes, enable_face_detection, enable_object_detection, face_confidence, object_confidence ) # Draw detections if requested if draw_boxes: processed_image = draw_detections( processed_image.copy(), face_results, object_results, show_labels, box_color ) # Convert results to JSON face_json = json.dumps(face_results, indent=2) if face_results else "No faces detected" object_json = json.dumps(object_results, indent=2) if object_results else "No objects detected" return processed_image, face_json, object_json def webcam_recognition( image: np.ndarray, enable_face_detection: bool, enable_object_detection: bool, face_confidence: float, object_confidence: float, draw_boxes: bool, show_labels: bool, box_color: str ) -> np.ndarray: """Real-time webcam recognition.""" if image is None: return None processed_image, _, _ = recognize_face_and_objects( image, enable_face_detection, enable_object_detection, face_confidence, object_confidence, draw_boxes, show_labels, box_color ) return processed_image def get_detection_statistics() -> str: """Get information about available detection models.""" stats = { "face_detection": { "model": "Haar Cascade", "features": ["Face detection", "Eye detection", "Smile detection"], "speed": "Fast", "accuracy": "Medium" }, "object_detection": { "model": "OpenCV DNN with MobileNet-SSD", "classes": len(object_classes) if object_classes else 0, "input_size": "300x300", "speed": "Real-time capable", "accuracy": "High" } } return json.dumps(stats, indent=2) # Create custom CSS for better styling custom_css = """ .main-container { max-width: 1400px; margin: 0 auto; } .settings-panel { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; padding: 20px; } .result-panel { border: 2px solid #e0e0e0; border-radius: 10px; padding: 15px; } .image-container { border: 1px solid #ddd; border-radius: 8px; overflow: hidden; } """ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as demo: gr.Markdown(""" # 🔍 Face & Object Recognition Platform Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder) Advanced computer vision platform for real-time face and object detection with customizable settings. """) with gr.Row(): with gr.Column(scale=2): gr.Markdown("### 📤 Input Source") with gr.Tabs(): with gr.TabItem("Upload Image"): input_image = gr.Image( label="Upload an image for analysis", type="numpy", height=400 ) analyze_btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg") with gr.TabItem("Webcam"): webcam_image = gr.Image( label="Webcam Feed", sources="webcam", type="numpy", streaming=True, height=400 ) gr.Markdown("*Webcam provides real-time detection (may have slight delay)*") with gr.Column(scale=1): gr.Markdown("### âš™ī¸ Detection Settings") with gr.Group(elem_classes=["settings-panel"]): gr.Markdown("#### Detection Modes") enable_face = gr.Checkbox(label="👤 Enable Face Detection", value=True) enable_objects = gr.Checkbox(label="đŸ“Ļ Enable Object Detection", value=True) gr.Markdown("#### Confidence Thresholds") face_conf = gr.Slider( label="Face Detection Confidence", minimum=0.1, maximum=1.0, value=0.7, step=0.1, info="Lower values detect more faces" ) object_conf = gr.Slider( label="Object Detection Confidence", minimum=0.1, maximum=1.0, value=0.5, step=0.1, info="Lower values detect more objects" ) gr.Markdown("#### Display Options") draw_boxes = gr.Checkbox(label="📐 Draw Bounding Boxes", value=True) show_labels = gr.Checkbox(label="đŸˇī¸ Show Labels", value=True) box_color = gr.Dropdown( label="Box Color", choices=["red", "green", "blue", "yellow", "purple", "orange"], value="red" ) with gr.Row(): with gr.Column(): gr.Markdown("### đŸ–ŧī¸ Detection Results") output_image = gr.Image( label="Processed Image with Detections", type="numpy", height=400, elem_classes=["image-container"] ) with gr.Column(): with gr.Tabs(): with gr.TabItem("👤 Face Results"): face_results = gr.JSON( label="Face Detection Data", elem_classes=["result-panel"] ) with gr.TabItem("đŸ“Ļ Object Results"): object_results = gr.JSON( label="Object Detection Data", elem_classes=["result-panel"] ) with gr.TabItem("â„šī¸ Model Info"): model_info = gr.JSON( label="Detection Models Information", value=json.loads(get_detection_statistics()), elem_classes=["result-panel"] ) # Event handlers analyze_btn.click( fn=recognize_face_and_objects, inputs=[ input_image, enable_face, enable_objects, face_conf, object_conf, draw_boxes, show_labels, box_color ], outputs=[output_image, face_results, object_results] ) # Real-time webcam processing webcam_image.stream( fn=webcam_recognition, inputs=[ webcam_image, enable_face, enable_objects, face_conf, object_conf, draw_boxes, show_labels, box_color ], outputs=[output_image], time_limit=30, stream_every=0.5 ) # Examples gr.Examples( examples=[ # These would need actual image files, for now using placeholder ["example1.jpg", True, True, 0.7, 0.5, True, True, "red"], ["example2.jpg", False, True, 0.8, 0.6, True, True, "blue"], ["example3.jpg", True, False, 0.6, 0.4, True, False, "green"], ], inputs=[ input_image, enable_face, enable_objects, face_conf, object_conf, draw_boxes, show_labels, box_color ], outputs=[output_image, face_results, object_results], cache_examples=False ) gr.Markdown(""" --- ### 📚 Usage Instructions 1. **Upload Image**: Select an image from your device for analysis 2. **Webcam**: Use your webcam for real-time detection 3. **Adjust Settings**: Customize confidence thresholds and display options 4. **View Results**: See detections overlayed on the image with detailed JSON data ### đŸŽ¯ Features - **Face Detection**: Identifies faces in images using Haar Cascade classifiers - **Object Detection**: Recognizes 80+ object classes using MobileNet-SSD - **Real-time Processing**: Webcam support with live detection - **Customizable**: Adjustable confidence thresholds and visual settings - **Detailed Output**: JSON formatted results with coordinates and confidence scores """) if __name__ == "__main__": demo.launch(share=True, debug=True)