Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| from facenet_pytorch import MTCNN, InceptionResnetV1 | |
| import os | |
| import glob | |
| # --- MODEL INITIALIZATION --- | |
| # Check for GPU availability and set the device | |
| device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | |
| print(f'Running on device: {device}') | |
| # Initialize MTCNN for face detection | |
| # keep_all=True allows detection of multiple faces in a frame | |
| mtcnn = MTCNN( | |
| image_size=160, margin=14, min_face_size=20, | |
| thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, | |
| device=device, keep_all=True | |
| ) | |
| # Initialize FaceNet (InceptionResnetV1) for face recognition | |
| # Use a pre-trained model on VGGFace2 dataset | |
| resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval() | |
| # --- FACE DATABASE SETUP --- | |
| def build_face_database(directory): | |
| """ | |
| Builds a database of known face embeddings from a directory of images. | |
| Args: | |
| directory (str): The path to the directory containing subdirectories of images, | |
| where each subdirectory is named after the person. | |
| Returns: | |
| tuple: A tuple containing two lists: | |
| - known_face_embeddings (list): A list of face embedding tensors. | |
| - known_face_names (list): A list of corresponding names. | |
| """ | |
| known_face_embeddings = [] | |
| known_face_names = [] | |
| if not os.path.exists(directory): | |
| print(f"Database directory '{directory}' not found. Creating it.") | |
| os.makedirs(directory) | |
| return known_face_embeddings, known_face_names | |
| # Iterate over each person in the directory | |
| for person_name in os.listdir(directory): | |
| person_dir = os.path.join(directory, person_name) | |
| if not os.path.isdir(person_dir): | |
| continue | |
| # Find all image files for the person | |
| image_files = glob.glob(os.path.join(person_dir, '*.jpg')) + \ | |
| glob.glob(os.path.join(person_dir, '*.png')) | |
| for image_path in image_files: | |
| try: | |
| img = Image.open(image_path).convert('RGB') | |
| # Detect face(s) and get the face tensor(s) | |
| # mtcnn() returns a batch of face tensors (N, 3, 160, 160) | |
| face_tensors = mtcnn(img) | |
| if face_tensors is not None: | |
| # Generate embeddings for all detected faces in the image | |
| # The input is already a 4D batch tensor, no need for .unsqueeze() | |
| embeddings = resnet(face_tensors.to(device)) | |
| # Add each found embedding to the database | |
| for emb in embeddings: | |
| # Detach from graph and move to CPU | |
| known_face_embeddings.append(emb.detach().cpu()) | |
| known_face_names.append(person_name) | |
| print(f"Processed {person_name} from {os.path.basename(image_path)}") | |
| except Exception as e: | |
| print(f"Error processing image {image_path}: {e}") | |
| return known_face_embeddings, known_face_names | |
| # Build the database from the 'known_faces' directory | |
| # For Hugging Face Spaces, you can upload a zip file and unzip it, | |
| # or add the folder to your repository. | |
| known_embeddings, known_names = build_face_database('known_faces') | |
| print(f"Loaded {len(known_names)} known faces.") | |
| # --- REAL-TIME RECOGNITION FUNCTION --- | |
| def recognize_faces(video_frame): | |
| """ | |
| Performs face detection and recognition on a single video frame. | |
| Args: | |
| video_frame (np.ndarray): The input video frame from the webcam. | |
| Returns: | |
| np.ndarray: The video frame with bounding boxes and names drawn on it. | |
| """ | |
| if video_frame is None: | |
| return None, [] | |
| # Create a writable copy of the frame, as the input from Gradio can be read-only | |
| video_frame = video_frame.copy() | |
| # Convert frame from BGR (OpenCV format) to RGB (PIL format) | |
| frame_rgb = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB) | |
| img_pil = Image.fromarray(frame_rgb) | |
| # Detect faces using MTCNN | |
| boxes, _ = mtcnn.detect(img_pil) | |
| # Get face embeddings for all detected faces in the frame | |
| try: | |
| # mtcnn() returns a tensor of cropped face images | |
| face_tensors = mtcnn(img_pil) | |
| if face_tensors is None: | |
| # If no faces are detected, return the original frame | |
| return video_frame, [] | |
| embeddings = resnet(face_tensors.to(device)) | |
| embeddings = embeddings.detach().cpu() | |
| except Exception as e: | |
| # This can happen if a face is detected but then fails processing | |
| print(f"Could not get embeddings: {e}") | |
| return video_frame, [] | |
| # Compare detected faces with the known faces database | |
| detected_names = [] | |
| if boxes is not None: | |
| for i, box in enumerate(boxes): | |
| embedding = embeddings[i] | |
| min_dist = float('inf') | |
| identity = "Unknown" | |
| if known_embeddings: | |
| # Calculate distances to all known faces | |
| # Both 'embedding' and 'known_emb' are 1D tensors of size 512 | |
| distances = [(embedding - known_emb).norm().item() for known_emb in known_embeddings] | |
| min_dist = min(distances) | |
| # Set a threshold for recognition | |
| # This value may need tuning depending on your dataset | |
| recognition_threshold = 0.8 | |
| if min_dist < recognition_threshold: | |
| # Get the index of the closest match | |
| min_dist_idx = distances.index(min_dist) | |
| identity = known_names[min_dist_idx] | |
| # Draw bounding box and name on the frame | |
| x1, y1, x2, y2 = [int(b) for b in box] | |
| color = (0, 255, 0) if identity != "Unknown" else (0, 0, 255) # Green for known, Red for unknown | |
| cv2.rectangle(video_frame, (x1, y1), (x2, y2), color, 2) | |
| # Prepare text label | |
| label = f"{identity} ({min_dist:.2f})" | |
| # Calculate text size to draw a solid background | |
| (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) | |
| cv2.rectangle(video_frame, (x1, y2 - text_height - baseline), (x1 + text_width, y2), color, -1) | |
| cv2.putText(video_frame, label, (x1, y2 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) | |
| detected_names.append(identity) | |
| return video_frame, detected_names | |
| # --- GRADIO INTERFACE (ENHANCED WITH BLOCKS) --- | |
| def recognize_and_return(frame: np.ndarray, recognition_enabled: bool): | |
| if not recognition_enabled: | |
| return frame, "Recognition Paused" | |
| result = recognize_faces(frame) | |
| if result is None: | |
| return frame, "No faces detected" | |
| annotated_frame, names = result | |
| name_str = ", ".join(names) if names else "No known faces" | |
| return annotated_frame, name_str | |
| with gr.Blocks(css=".gr-button {background-color: #2c3e50; color: white;}") as demo: | |
| recognition_enabled = gr.State(True) # Shared state | |
| gr.Markdown("# 🧠 Advanced Real-Time Facial Recognition") | |
| gr.Markdown( | |
| """ | |
| <div style='line-height:1.6'> | |
| This application uses <b>MTCNN</b> for real-time face detection and <b>FaceNet</b> (InceptionResnetV1) for recognition.<br> | |
| Add known faces to the <code>known_faces/</code> folder with subfolders for each person.<br> | |
| This demo runs fully in-browser using <b>Gradio</b> and <b>OpenCV</b>. | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| video_input = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="📷 Live Webcam Feed") | |
| with gr.Column(): | |
| video_output = gr.Image(label="🧾 Annotated Output Frame") | |
| names_output = gr.Textbox(label="👤 Recognition Log", interactive=False) | |
| with gr.Row(): | |
| toggle = gr.Checkbox(value=True, label="Enable Recognition") | |
| clear_btn = gr.Button("🧹 Clear Output") | |
| video_input.stream(fn=recognize_and_return, | |
| inputs=[video_input, recognition_enabled], | |
| outputs=[video_output, names_output]) | |
| toggle.change(lambda v: v, inputs=toggle, outputs=recognition_enabled) | |
| clear_btn.click(lambda: (None, ""), outputs=[video_output, names_output]) | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| """ | |
| **📁 To Add a New Person:** | |
| - Create a folder in `known_faces/` named after the individual. | |
| - Add multiple `.jpg` or `.png` clear images (front-facing). | |
| - Restart the app for changes to take effect. | |
| """ | |
| ) | |
| # --- LAUNCH APP --- | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |