Brightsun10's picture
Update app.py
c8a0621 verified
raw
history blame
8.95 kB
import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
import os
import glob
# --- MODEL INITIALIZATION ---
# Check for GPU availability and set the device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Running on device: {device}')
# Initialize MTCNN for face detection
# keep_all=True allows detection of multiple faces in a frame
mtcnn = MTCNN(
image_size=160, margin=14, min_face_size=20,
thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
device=device, keep_all=True
)
# Initialize FaceNet (InceptionResnetV1) for face recognition
# Use a pre-trained model on VGGFace2 dataset
resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()
# --- FACE DATABASE SETUP ---
def build_face_database(directory):
"""
Builds a database of known face embeddings from a directory of images.
Args:
directory (str): The path to the directory containing subdirectories of images,
where each subdirectory is named after the person.
Returns:
tuple: A tuple containing two lists:
- known_face_embeddings (list): A list of face embedding tensors.
- known_face_names (list): A list of corresponding names.
"""
known_face_embeddings = []
known_face_names = []
if not os.path.exists(directory):
print(f"Database directory '{directory}' not found. Creating it.")
os.makedirs(directory)
return known_face_embeddings, known_face_names
# Iterate over each person in the directory
for person_name in os.listdir(directory):
person_dir = os.path.join(directory, person_name)
if not os.path.isdir(person_dir):
continue
# Find all image files for the person
image_files = glob.glob(os.path.join(person_dir, '*.jpg')) + \
glob.glob(os.path.join(person_dir, '*.png'))
for image_path in image_files:
try:
img = Image.open(image_path).convert('RGB')
# Detect face(s) and get the face tensor(s)
# mtcnn() returns a batch of face tensors (N, 3, 160, 160)
face_tensors = mtcnn(img)
if face_tensors is not None:
# Generate embeddings for all detected faces in the image
# The input is already a 4D batch tensor, no need for .unsqueeze()
embeddings = resnet(face_tensors.to(device))
# Add each found embedding to the database
for emb in embeddings:
# Detach from graph and move to CPU
known_face_embeddings.append(emb.detach().cpu())
known_face_names.append(person_name)
print(f"Processed {person_name} from {os.path.basename(image_path)}")
except Exception as e:
print(f"Error processing image {image_path}: {e}")
return known_face_embeddings, known_face_names
# Build the database from the 'known_faces' directory
# For Hugging Face Spaces, you can upload a zip file and unzip it,
# or add the folder to your repository.
known_embeddings, known_names = build_face_database('known_faces')
print(f"Loaded {len(known_names)} known faces.")
# --- REAL-TIME RECOGNITION FUNCTION ---
def recognize_faces(video_frame):
"""
Performs face detection and recognition on a single video frame.
Args:
video_frame (np.ndarray): The input video frame from the webcam.
Returns:
np.ndarray: The video frame with bounding boxes and names drawn on it.
"""
if video_frame is None:
return None, []
# Create a writable copy of the frame, as the input from Gradio can be read-only
video_frame = video_frame.copy()
# Convert frame from BGR (OpenCV format) to RGB (PIL format)
frame_rgb = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
img_pil = Image.fromarray(frame_rgb)
# Detect faces using MTCNN
boxes, _ = mtcnn.detect(img_pil)
# Get face embeddings for all detected faces in the frame
try:
# mtcnn() returns a tensor of cropped face images
face_tensors = mtcnn(img_pil)
if face_tensors is None:
# If no faces are detected, return the original frame
return video_frame, []
embeddings = resnet(face_tensors.to(device))
embeddings = embeddings.detach().cpu()
except Exception as e:
# This can happen if a face is detected but then fails processing
print(f"Could not get embeddings: {e}")
return video_frame, []
# Compare detected faces with the known faces database
detected_names = []
if boxes is not None:
for i, box in enumerate(boxes):
embedding = embeddings[i]
min_dist = float('inf')
identity = "Unknown"
if known_embeddings:
# Calculate distances to all known faces
# Both 'embedding' and 'known_emb' are 1D tensors of size 512
distances = [(embedding - known_emb).norm().item() for known_emb in known_embeddings]
min_dist = min(distances)
# Set a threshold for recognition
# This value may need tuning depending on your dataset
recognition_threshold = 0.8
if min_dist < recognition_threshold:
# Get the index of the closest match
min_dist_idx = distances.index(min_dist)
identity = known_names[min_dist_idx]
# Draw bounding box and name on the frame
x1, y1, x2, y2 = [int(b) for b in box]
color = (0, 255, 0) if identity != "Unknown" else (0, 0, 255) # Green for known, Red for unknown
cv2.rectangle(video_frame, (x1, y1), (x2, y2), color, 2)
# Prepare text label
label = f"{identity} ({min_dist:.2f})"
# Calculate text size to draw a solid background
(text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
cv2.rectangle(video_frame, (x1, y2 - text_height - baseline), (x1 + text_width, y2), color, -1)
cv2.putText(video_frame, label, (x1, y2 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
detected_names.append(identity)
return video_frame, detected_names
# --- GRADIO INTERFACE (ENHANCED WITH BLOCKS) ---
def recognize_and_return(frame: np.ndarray, recognition_enabled: bool):
if not recognition_enabled:
return frame, "Recognition Paused"
result = recognize_faces(frame)
if result is None:
return frame, "No faces detected"
annotated_frame, names = result
name_str = ", ".join(names) if names else "No known faces"
return annotated_frame, name_str
with gr.Blocks(css=".gr-button {background-color: #2c3e50; color: white;}") as demo:
recognition_enabled = gr.State(True) # Shared state
gr.Markdown("# 🧠 Advanced Real-Time Facial Recognition")
gr.Markdown(
"""
<div style='line-height:1.6'>
This application uses <b>MTCNN</b> for real-time face detection and <b>FaceNet</b> (InceptionResnetV1) for recognition.<br>
Add known faces to the <code>known_faces/</code> folder with subfolders for each person.<br>
This demo runs fully in-browser using <b>Gradio</b> and <b>OpenCV</b>.
</div>
"""
)
with gr.Row():
with gr.Column():
video_input = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="📷 Live Webcam Feed")
with gr.Column():
video_output = gr.Image(label="🧾 Annotated Output Frame")
names_output = gr.Textbox(label="👤 Recognition Log", interactive=False)
with gr.Row():
toggle = gr.Checkbox(value=True, label="Enable Recognition")
clear_btn = gr.Button("🧹 Clear Output")
video_input.stream(fn=recognize_and_return,
inputs=[video_input, recognition_enabled],
outputs=[video_output, names_output])
toggle.change(lambda v: v, inputs=toggle, outputs=recognition_enabled)
clear_btn.click(lambda: (None, ""), outputs=[video_output, names_output])
gr.Markdown("---")
gr.Markdown(
"""
**📁 To Add a New Person:**
- Create a folder in `known_faces/` named after the individual.
- Add multiple `.jpg` or `.png` clear images (front-facing).
- Restart the app for changes to take effect.
"""
)
# --- LAUNCH APP ---
if __name__ == "__main__":
demo.launch(debug=True)