Spaces:

Brightsun10
/

facial-recognition

Sleeping

App Files Files Community

facial-recognition / app.py

Brightsun10

Update app.py

c8a0621 verified 10 months ago

raw

history blame

8.95 kB

	import gradio as gr
	import torch
	import cv2
	import numpy as np
	from PIL import Image
	from facenet_pytorch import MTCNN, InceptionResnetV1
	import os
	import glob

	# --- MODEL INITIALIZATION ---

	# Check for GPU availability and set the device
	device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
	print(f'Running on device: {device}')

	# Initialize MTCNN for face detection
	# keep_all=True allows detection of multiple faces in a frame
	mtcnn = MTCNN(
	image_size=160, margin=14, min_face_size=20,
	thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
	device=device, keep_all=True
	)

	# Initialize FaceNet (InceptionResnetV1) for face recognition
	# Use a pre-trained model on VGGFace2 dataset
	resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()

	# --- FACE DATABASE SETUP ---

	def build_face_database(directory):
	"""
	Builds a database of known face embeddings from a directory of images.

	Args:
	directory (str): The path to the directory containing subdirectories of images,
	where each subdirectory is named after the person.

	Returns:
	tuple: A tuple containing two lists:
	- known_face_embeddings (list): A list of face embedding tensors.
	- known_face_names (list): A list of corresponding names.
	"""
	known_face_embeddings = []
	known_face_names = []

	if not os.path.exists(directory):
	print(f"Database directory '{directory}' not found. Creating it.")
	os.makedirs(directory)
	return known_face_embeddings, known_face_names

	# Iterate over each person in the directory
	for person_name in os.listdir(directory):
	person_dir = os.path.join(directory, person_name)
	if not os.path.isdir(person_dir):
	continue

	# Find all image files for the person
	image_files = glob.glob(os.path.join(person_dir, '*.jpg')) + \
	glob.glob(os.path.join(person_dir, '*.png'))

	for image_path in image_files:
	try:
	img = Image.open(image_path).convert('RGB')
	# Detect face(s) and get the face tensor(s)
	# mtcnn() returns a batch of face tensors (N, 3, 160, 160)
	face_tensors = mtcnn(img)

	if face_tensors is not None:
	# Generate embeddings for all detected faces in the image
	# The input is already a 4D batch tensor, no need for .unsqueeze()
	embeddings = resnet(face_tensors.to(device))

	# Add each found embedding to the database
	for emb in embeddings:
	# Detach from graph and move to CPU
	known_face_embeddings.append(emb.detach().cpu())
	known_face_names.append(person_name)

	print(f"Processed {person_name} from {os.path.basename(image_path)}")
	except Exception as e:
	print(f"Error processing image {image_path}: {e}")

	return known_face_embeddings, known_face_names

	# Build the database from the 'known_faces' directory
	# For Hugging Face Spaces, you can upload a zip file and unzip it,
	# or add the folder to your repository.
	known_embeddings, known_names = build_face_database('known_faces')
	print(f"Loaded {len(known_names)} known faces.")

	# --- REAL-TIME RECOGNITION FUNCTION ---

	def recognize_faces(video_frame):
	"""
	Performs face detection and recognition on a single video frame.

	Args:
	video_frame (np.ndarray): The input video frame from the webcam.

	Returns:
	np.ndarray: The video frame with bounding boxes and names drawn on it.
	"""
	if video_frame is None:
	return None, []

	# Create a writable copy of the frame, as the input from Gradio can be read-only
	video_frame = video_frame.copy()

	# Convert frame from BGR (OpenCV format) to RGB (PIL format)
	frame_rgb = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
	img_pil = Image.fromarray(frame_rgb)

	# Detect faces using MTCNN
	boxes, _ = mtcnn.detect(img_pil)

	# Get face embeddings for all detected faces in the frame
	try:
	# mtcnn() returns a tensor of cropped face images
	face_tensors = mtcnn(img_pil)
	if face_tensors is None:
	# If no faces are detected, return the original frame
	return video_frame, []

	embeddings = resnet(face_tensors.to(device))
	embeddings = embeddings.detach().cpu()
	except Exception as e:
	# This can happen if a face is detected but then fails processing
	print(f"Could not get embeddings: {e}")
	return video_frame, []

	# Compare detected faces with the known faces database
	detected_names = []
	if boxes is not None:
	for i, box in enumerate(boxes):
	embedding = embeddings[i]
	min_dist = float('inf')
	identity = "Unknown"

	if known_embeddings:
	# Calculate distances to all known faces
	# Both 'embedding' and 'known_emb' are 1D tensors of size 512
	distances = [(embedding - known_emb).norm().item() for known_emb in known_embeddings]
	min_dist = min(distances)

	# Set a threshold for recognition
	# This value may need tuning depending on your dataset
	recognition_threshold = 0.8
	if min_dist < recognition_threshold:
	# Get the index of the closest match
	min_dist_idx = distances.index(min_dist)
	identity = known_names[min_dist_idx]

	# Draw bounding box and name on the frame
	x1, y1, x2, y2 = [int(b) for b in box]
	color = (0, 255, 0) if identity != "Unknown" else (0, 0, 255) # Green for known, Red for unknown
	cv2.rectangle(video_frame, (x1, y1), (x2, y2), color, 2)

	# Prepare text label
	label = f"{identity} ({min_dist:.2f})"

	# Calculate text size to draw a solid background
	(text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
	cv2.rectangle(video_frame, (x1, y2 - text_height - baseline), (x1 + text_width, y2), color, -1)
	cv2.putText(video_frame, label, (x1, y2 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

	detected_names.append(identity)

	return video_frame, detected_names

	# --- GRADIO INTERFACE (ENHANCED WITH BLOCKS) ---

	def recognize_and_return(frame: np.ndarray, recognition_enabled: bool):
	if not recognition_enabled:
	return frame, "Recognition Paused"

	result = recognize_faces(frame)
	if result is None:
	return frame, "No faces detected"

	annotated_frame, names = result
	name_str = ", ".join(names) if names else "No known faces"
	return annotated_frame, name_str

	with gr.Blocks(css=".gr-button {background-color: #2c3e50; color: white;}") as demo:
	recognition_enabled = gr.State(True) # Shared state
	gr.Markdown("# 🧠 Advanced Real-Time Facial Recognition")
	gr.Markdown(
	"""
	<div style='line-height:1.6'>
	This application uses <b>MTCNN</b> for real-time face detection and <b>FaceNet</b> (InceptionResnetV1) for recognition.<br>
	Add known faces to the <code>known_faces/</code> folder with subfolders for each person.<br>
	This demo runs fully in-browser using <b>Gradio</b> and <b>OpenCV</b>.
	</div>
	"""
	)

	with gr.Row():
	with gr.Column():
	video_input = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="📷 Live Webcam Feed")
	with gr.Column():
	video_output = gr.Image(label="🧾 Annotated Output Frame")
	names_output = gr.Textbox(label="👤 Recognition Log", interactive=False)

	with gr.Row():
	toggle = gr.Checkbox(value=True, label="Enable Recognition")
	clear_btn = gr.Button("🧹 Clear Output")

	video_input.stream(fn=recognize_and_return,
	inputs=[video_input, recognition_enabled],
	outputs=[video_output, names_output])

	toggle.change(lambda v: v, inputs=toggle, outputs=recognition_enabled)
	clear_btn.click(lambda: (None, ""), outputs=[video_output, names_output])

	gr.Markdown("---")
	gr.Markdown(
	"""
	📁 To Add a New Person:
	- Create a folder in `known_faces/` named after the individual.
	- Add multiple `.jpg` or `.png` clear images (front-facing).
	- Restart the app for changes to take effect.
	"""
	)

	# --- LAUNCH APP ---
	if __name__ == "__main__":
	demo.launch(debug=True)