Spaces:

Brightsun10
/

facial-recognition

Sleeping

App Files Files Community

Brightsun10 commited on Jun 30, 2025

Commit

0a3f751

verified ·

1 Parent(s): 1b819c6

Upload 2 files

Browse files

Files changed (2) hide show

app.py +173 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import gradio as gr
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+from facenet_pytorch import MTCNN, InceptionResnetV1
+import os
+import glob
+# --- MODEL INITIALIZATION ---
+# Check for GPU availability and set the device
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+print(f'Running on device: {device}')
+# Initialize MTCNN for face detection
+# keep_all=True allows detection of multiple faces in a frame
+mtcnn = MTCNN(
+    image_size=160, margin=14, min_face_size=20,
+    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
+    device=device, keep_all=True
+)
+# Initialize FaceNet (InceptionResnetV1) for face recognition
+# Use a pre-trained model on VGGFace2 dataset
+resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()
+# --- FACE DATABASE SETUP ---
+def build_face_database(directory):
+    """
+    Builds a database of known face embeddings from a directory of images.
+    Args:
+        directory (str): The path to the directory containing subdirectories of images,
+                         where each subdirectory is named after the person.
+    Returns:
+        tuple: A tuple containing two lists:
+               - known_face_embeddings (list): A list of face embedding tensors.
+               - known_face_names (list): A list of corresponding names.
+    """
+    known_face_embeddings = []
+    known_face_names = []
+    if not os.path.exists(directory):
+        print(f"Database directory '{directory}' not found. Creating it.")
+        os.makedirs(directory)
+        return known_face_embeddings, known_face_names
+    # Iterate over each person in the directory
+    for person_name in os.listdir(directory):
+        person_dir = os.path.join(directory, person_name)
+        if not os.path.isdir(person_dir):
+            continue
+        # Find all image files for the person
+        image_files = glob.glob(os.path.join(person_dir, '*.jpg')) + \
+                      glob.glob(os.path.join(person_dir, '*.png'))
+        for image_path in image_files:
+            try:
+                img = Image.open(image_path).convert('RGB')
+                # Detect face and get the face tensor
+                img_cropped = mtcnn(img)
+                if img_cropped is not None:
+                    # Generate embedding
+                    embedding = resnet(img_cropped.unsqueeze(0).to(device))
+                    known_face_embeddings.append(embedding.detach().cpu())
+                    known_face_names.append(person_name)
+                    print(f"Processed {person_name} from {os.path.basename(image_path)}")
+            except Exception as e:
+                print(f"Error processing image {image_path}: {e}")
+    return known_face_embeddings, known_face_names
+# Build the database from the 'known_faces' directory
+# For Hugging Face Spaces, you can upload a zip file and unzip it,
+# or add the folder to your repository.
+known_embeddings, known_names = build_face_database('known_faces')
+print(f"Loaded {len(known_names)} known faces.")
+# --- REAL-TIME RECOGNITION FUNCTION ---
+def recognize_faces(video_frame):
+    """
+    Performs face detection and recognition on a single video frame.
+    Args:
+        video_frame (np.ndarray): The input video frame from the webcam.
+    Returns:
+        np.ndarray: The video frame with bounding boxes and names drawn on it.
+    """
+    if video_frame is None:
+        return None
+    # Convert frame from BGR (OpenCV format) to RGB (PIL format)
+    frame_rgb = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
+    img_pil = Image.fromarray(frame_rgb)
+    # Detect faces using MTCNN
+    boxes, _ = mtcnn.detect(img_pil)
+    # Get face embeddings for all detected faces in the frame
+    try:
+        # mtcnn() returns a tensor of cropped face images
+        face_tensors = mtcnn(img_pil)
+        if face_tensors is None:
+            # If no faces are detected, return the original frame
+            return video_frame
+        embeddings = resnet(face_tensors.to(device))
+        embeddings = embeddings.detach().cpu()
+    except Exception as e:
+        # This can happen if a face is detected but then fails processing
+        print(f"Could not get embeddings: {e}")
+        return video_frame
+    # Compare detected faces with the known faces database
+    if boxes is not None:
+        for i, box in enumerate(boxes):
+            embedding = embeddings[i]
+            min_dist = float('inf')
+            identity = "Unknown"
+            if known_embeddings:
+                # Calculate distances to all known faces
+                distances = [(embedding - known_emb).norm().item() for known_emb in known_embeddings]
+                min_dist = min(distances)
+                # Set a threshold for recognition
+                # This value may need tuning depending on your dataset
+                recognition_threshold = 0.8
+                if min_dist < recognition_threshold:
+                    # Get the index of the closest match
+                    min_dist_idx = distances.index(min_dist)
+                    identity = known_names[min_dist_idx]
+            # Draw bounding box and name on the frame
+            x1, y1, x2, y2 = [int(b) for b in box]
+            color = (0, 255, 0) if identity != "Unknown" else (0, 0, 255) # Green for known, Red for unknown
+            cv2.rectangle(video_frame, (x1, y1), (x2, y2), color, 2)
+            # Prepare text label
+            label = f"{identity} ({min_dist:.2f})"
+            # Calculate text size to draw a solid background
+            (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
+            cv2.rectangle(video_frame, (x1, y2 - text_height - baseline), (x1 + text_width, y2), color, -1)
+            cv2.putText(video_frame, label, (x1, y2 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+    return video_frame
+# --- GRADIO INTERFACE ---
+# Define the Gradio interface
+# inputs="webcam" creates a real-time video input component
+# outputs="image" will display the processed video frames
+iface = gr.Interface(
+    fn=recognize_faces,
+    inputs=gr.Image(sources=['webcam'], type="numpy", streaming=True),
+    outputs="image",
+    title="Advanced Real-Time Facial Recognition",
+    description="This application uses MTCNN for face detection and FaceNet for recognition. "
+                "It identifies known faces from a pre-built database. "
+                "To add a new person, create a folder with their name inside the 'known_faces' directory and add their pictures.",
+    live=True
+)
+# Launch the application
+if __name__ == "__main__":
+    iface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+torch
+torchvision
+opencv-python-headless
+facenet-pytorch
+numpy
+Pillow