FaceShapeAI / demo.py

Upload folder using huggingface_hub

3219a41 verified 4 days ago

7.87 kB

	from __future__ import annotations

	from pathlib import Path
	from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

	import cv2
	import mediapipe as mp
	import numpy as np
	import pickle
	import argparse

	# Paths to model files (assumed to be in the same directory as this script)
	PROJECT_DIR = Path(__file__).parent
	MODEL_FILE = PROJECT_DIR / 'face_shape_model.pkl'
	LABEL_ENCODER_FILE = PROJECT_DIR / 'label_encoder.pkl'

	Keypoint = Dict[str, float]
	NormalizedLandmark = Tuple[float, float, float]


	def normalize_landmarks(keypoints: Sequence[Keypoint], width: int, height: int) -> List[NormalizedLandmark]:
	"""
	Normalize keypoints to be centered, roll-corrected, and scaled.
	Retains 3D coordinates (Z) but aligns to the 2D plane based on eyes.

	Returns list of tuples: [(x, y, z), ...]

	(Copied from create.py to ensure consistent preprocessing)
	"""
	if not keypoints:
	return []

	# Convert to numpy array (N, 3)
	landmarks = np.array([[kp["x"], kp["y"], kp["z"]] for kp in keypoints])

	# Denormalize x, y, z to pixel/aspect-correct coordinates
	# MediaPipe Z is roughly same scale as X (relative to image width)
	landmarks[:, 0] *= width
	landmarks[:, 1] *= height
	landmarks[:, 2] *= width

	# Indices for irises (refine_landmarks=True gives 478 points)
	# 468: Left Iris Center (Subject's Left, Image Right)
	# 473: Right Iris Center (Subject's Right, Image Left)
	left_iris_idx = 468
	right_iris_idx = 473

	if len(landmarks) > right_iris_idx:
	left_iris = landmarks[left_iris_idx]
	right_iris = landmarks[right_iris_idx]
	else:
	# Fallback to eye corners if iris landmarks missing
	p1 = landmarks[33] # Left eye outer
	p2 = landmarks[133] # Left eye inner
	left_iris = (p1 + p2) / 2
	p3 = landmarks[362] # Right eye inner
	p4 = landmarks[263] # Right eye outer
	right_iris = (p3 + p4) / 2

	# 1. Centering: Move midpoint of eyes to origin
	eye_center = (left_iris + right_iris) / 2.0
	landmarks -= eye_center

	# 2. Rotation (Roll Correction)
	delta = left_iris - right_iris
	dX, dY = delta[0], delta[1]

	# Calculate angle of this vector relative to horizontal
	angle = np.arctan2(dY, dX)

	# Rotate by -angle to align with X-axis
	c, s = np.cos(-angle), np.sin(-angle)

	# Rotation matrix around Z axis
	R = np.array([
	[c, -s, 0],
	[s, c, 0],
	[0, 0, 1]
	])

	landmarks = landmarks.dot(R.T)

	# 3. Scaling: Scale such that inter-ocular distance is 1.0
	dist = np.sqrt(dX2 + dY2)
	if dist > 0:
	scale = 1.0 / dist
	landmarks *= scale

	# Convert to list of tuples
	return [(round(float(l[0]), 5), round(float(l[1]), 5), round(float(l[2]), 5))
	for l in landmarks]


	def create_face_mesh(image_path: Union[str, Path]) -> Tuple[Optional[List[Keypoint]], Optional[np.ndarray]]:
	"""
	Process image to get face mesh data using MediaPipe
	Returns: keypoints, img_bgr or None if failed

	(Copied from create.py to ensure consistent preprocessing)
	"""
	max_width_or_height = 512

	mp_face_mesh = mp.solutions.face_mesh

	# Initialize face mesh
	with mp_face_mesh.FaceMesh(
	static_image_mode=True,
	max_num_faces=1,
	refine_landmarks=True,
	min_detection_confidence=0.5) as face_mesh:

	# Read image from file
	img_bgr = cv2.imread(str(image_path))

	if img_bgr is None:
	print(f"Error: Could not read image: {image_path}")
	return None, None

	# Downscale large images to speed up inference (keep aspect ratio)
	h, w = img_bgr.shape[:2]
	longest = max(h, w)
	if longest > max_width_or_height:
	scale = max_width_or_height / float(longest)
	new_w = max(1, int(round(w * scale)))
	new_h = max(1, int(round(h * scale)))
	img_bgr = cv2.resize(img_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)

	# Convert BGR to RGB for MediaPipe processing
	img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

	# Process the image
	results = face_mesh.process(img_rgb)

	if not results.multi_face_landmarks:
	print(f"Error: No face detected in: {image_path}")
	return None, None

	keypoints = []
	for landmark in results.multi_face_landmarks[0].landmark:
	keypoints.append({
	"x": round(landmark.x, 5),
	"y": round(landmark.y, 5),
	"z": round(landmark.z, 5)
	})
	return keypoints, img_bgr


	def load_model_resources() -> Tuple[Any, Any]:
	"""Load the trained model and label encoder."""
	if not MODEL_FILE.exists():
	raise FileNotFoundError(f"Model file not found at {MODEL_FILE}. Please run create_model.py first.")

	if not LABEL_ENCODER_FILE.exists():
	raise FileNotFoundError(f"Label encoder file not found at {LABEL_ENCODER_FILE}. Please run create_model.py first.")

	print(f"Loading model from {MODEL_FILE}...")
	with open(MODEL_FILE, 'rb') as f:
	model = pickle.load(f)

	print(f"Loading label encoder from {LABEL_ENCODER_FILE}...")
	with open(LABEL_ENCODER_FILE, 'rb') as f:
	label_encoder = pickle.load(f)

	return model, label_encoder


	def predict_face_shape(image_path: Union[str, Path]) -> Optional[str]:
	"""
	Main function to predict face shape for a given image.
	"""
	# 1. Load Model
	try:
	model, label_encoder = load_model_resources()
	except Exception as e:
	print(f"Failed to load model resources: {e}")
	return None

	# 2. Process Image (Extract Landmarks)
	print(f"Processing image: {image_path}")
	keypoints, img_bgr = create_face_mesh(image_path)

	if keypoints is None:
	print("Could not extract landmarks. Exiting.")
	return None

	# 3. Normalize Landmarks
	h, w = img_bgr.shape[:2]
	normalized_kpts = normalize_landmarks(keypoints, w, h)

	# 4. Prepare Features (Flatten and drop Z)
	# The model expects a flattened array of [x1, y1, x2, y2, ...]
	flattened_features: List[float] = []
	for kp in normalized_kpts:
	flattened_features.extend([kp[0], kp[1]]) # x, y only

	# Reshape for sklearn (1 sample, N features)
	features_array = np.array([flattened_features])

	# 5. Predict
	print("Running prediction...")
	# Get probabilities
	probas = model.predict_proba(features_array)[0]
	# Get prediction
	prediction_idx = model.predict(features_array)[0]
	predicted_label = label_encoder.inverse_transform([prediction_idx])[0]

	# 6. Show Results
	print("\n" + "="*30)
	print(f"PREDICTED FACE SHAPE: {predicted_label.upper()}")
	print("="*30)

	print("\nConfidence Scores:")
	# Sort probabilities
	class_indices = np.argsort(probas)[::-1]
	for i in class_indices:
	class_name = label_encoder.classes_[i]
	score = probas[i]
	print(f" {class_name}: {score:.4f}")

	return predicted_label


	def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace:
	"""
	Parse command-line arguments.

	Note: Default behavior remains to run against `sample_image.jpg` when no args are provided.
	"""
	parser = argparse.ArgumentParser(description="Predict face shape from an image using a trained sklearn model.")
	parser.add_argument(
	"image",
	nargs="?",
	default="sample_image.jpg",
	help="Path to the input image (default: sample_image.jpg).",
	)
	return parser.parse_args(argv)


	if __name__ == "__main__":
	args = parse_args()
	predict_face_shape(args.image)