File size: 3,071 Bytes
47ff6b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import cv2
import numpy as np
import base64
def resize_image(image: np.ndarray, target_width=320, target_height=240) -> np.ndarray:
"""
Resize image to target dimensions while maintaining aspect ratio or not.
For performance, we might just resize to fixed size or max dimension.
The requirement says 'Resize images to 320x240 before processing'.
"""
return cv2.resize(image, (target_width, target_height))
def decode_base64_image(base64_string: str) -> np.ndarray:
if "," in base64_string:
base64_string = base64_string.split(",")[1]
image_bytes = base64.b64decode(base64_string)
nparr = np.frombuffer(image_bytes, np.uint8)
return cv2.imdecode(nparr, cv2.IMREAD_COLOR)
def align_face(img, landmarks):
"""
Align face using 5-point landmarks.
landmarks: list of [x, y] or np array of shape (5, 2)
Order: Left Eye, Right Eye, Nose, Left Mouth, Right Mouth
"""
# Standard 5 points for ArcFace (112x112)
src = np.array([
[30.2946, 51.6963],
[65.5318, 51.5014],
[48.0252, 71.7366],
[33.5493, 92.3655],
[62.7299, 92.2041] ], dtype=np.float32 )
if landmarks.shape[0] == 6:
# MediaPipe gives 6 points: Right Eye, Left Eye, Nose, Mouth Center, Right Ear, Left Ear
# (Note: MediaPipe Detection gives: 0:RightEye, 1:LeftEye, 2:Nose, 3:MouthCenter, 4:RightEar, 5:LeftEar)
# We need: Left Eye, Right Eye, Nose, Left Mouth, Right Mouth.
# MediaPipe doesn't give separate mouth corners in simple detection!
pass
# Wait, MediaPipe Face Detection only gives mouth center.
# IF WE NEED ACCURATE ALIGNMENT FOR ARCFACE, WE NEED EYE CENTERS AND MOUTH CORNERS.
# MediaPipe Face Mesh gives 468 landmarks, detailed mouth corners.
# BUT Face Mesh is heavier.
# ALTERNATIVE: Use just eyes and nose and estimate/generic transform, or use Face Mesh.
# Let's use MediaPipe FaceMesh for better alignment (it has specific eye/mouth corner indices).
# FaceMesh Refined is still fast on CPU.
# Actually, for ArcFace, similarity transformation can be estimated from just Eyes + Nose + Mouth Center if needed,
# but standard is 5 points.
# If using MediaPipe Face Detection (BlazeFace), we have:
# 0: Right Eye (Image coordinator: Left side of face from cam perspective if selfie, but usually Left/Right refers to subject's left/right)
# MediaPipe docs: 0: Right eye, 1: Left eye, 2: Nose tip, 3: Mouth center, 4: Right ear, 5: Left ear.
dst = landmarks.astype(np.float32)
tform = cv2.estimateAffinePartial2D(dst, src, method=cv2.LMEDS)[0] # or RANSAC
warped = cv2.warpAffine(img, tform, (112, 112))
return warped
def compute_cosine_similarity(embed1, embed2):
# Ensure numpy arrays
e1 = np.array(embed1).flatten()
e2 = np.array(embed2).flatten()
norm1 = np.linalg.norm(e1)
norm2 = np.linalg.norm(e2)
if norm1 == 0 or norm2 == 0:
return 0.0
return np.dot(e1, e2) / (norm1 * norm2)
|