Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import base64 | |
| def resize_image(image: np.ndarray, target_width=320, target_height=240) -> np.ndarray: | |
| """ | |
| Resize image to target dimensions while maintaining aspect ratio or not. | |
| For performance, we might just resize to fixed size or max dimension. | |
| The requirement says 'Resize images to 320x240 before processing'. | |
| """ | |
| return cv2.resize(image, (target_width, target_height)) | |
| def decode_base64_image(base64_string: str) -> np.ndarray: | |
| if "," in base64_string: | |
| base64_string = base64_string.split(",")[1] | |
| image_bytes = base64.b64decode(base64_string) | |
| nparr = np.frombuffer(image_bytes, np.uint8) | |
| return cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| def align_face(img, landmarks): | |
| """ | |
| Align face using 5-point landmarks. | |
| landmarks: list of [x, y] or np array of shape (5, 2) | |
| Order: Left Eye, Right Eye, Nose, Left Mouth, Right Mouth | |
| """ | |
| # Standard 5 points for ArcFace (112x112) | |
| src = np.array([ | |
| [30.2946, 51.6963], | |
| [65.5318, 51.5014], | |
| [48.0252, 71.7366], | |
| [33.5493, 92.3655], | |
| [62.7299, 92.2041] ], dtype=np.float32 ) | |
| if landmarks.shape[0] == 6: | |
| # MediaPipe gives 6 points: Right Eye, Left Eye, Nose, Mouth Center, Right Ear, Left Ear | |
| # (Note: MediaPipe Detection gives: 0:RightEye, 1:LeftEye, 2:Nose, 3:MouthCenter, 4:RightEar, 5:LeftEar) | |
| # We need: Left Eye, Right Eye, Nose, Left Mouth, Right Mouth. | |
| # MediaPipe doesn't give separate mouth corners in simple detection! | |
| pass | |
| # Wait, MediaPipe Face Detection only gives mouth center. | |
| # IF WE NEED ACCURATE ALIGNMENT FOR ARCFACE, WE NEED EYE CENTERS AND MOUTH CORNERS. | |
| # MediaPipe Face Mesh gives 468 landmarks, detailed mouth corners. | |
| # BUT Face Mesh is heavier. | |
| # ALTERNATIVE: Use just eyes and nose and estimate/generic transform, or use Face Mesh. | |
| # Let's use MediaPipe FaceMesh for better alignment (it has specific eye/mouth corner indices). | |
| # FaceMesh Refined is still fast on CPU. | |
| # Actually, for ArcFace, similarity transformation can be estimated from just Eyes + Nose + Mouth Center if needed, | |
| # but standard is 5 points. | |
| # If using MediaPipe Face Detection (BlazeFace), we have: | |
| # 0: Right Eye (Image coordinator: Left side of face from cam perspective if selfie, but usually Left/Right refers to subject's left/right) | |
| # MediaPipe docs: 0: Right eye, 1: Left eye, 2: Nose tip, 3: Mouth center, 4: Right ear, 5: Left ear. | |
| dst = landmarks.astype(np.float32) | |
| tform = cv2.estimateAffinePartial2D(dst, src, method=cv2.LMEDS)[0] # or RANSAC | |
| warped = cv2.warpAffine(img, tform, (112, 112)) | |
| return warped | |
| def compute_cosine_similarity(embed1, embed2): | |
| # Ensure numpy arrays | |
| e1 = np.array(embed1).flatten() | |
| e2 = np.array(embed2).flatten() | |
| norm1 = np.linalg.norm(e1) | |
| norm2 = np.linalg.norm(e2) | |
| if norm1 == 0 or norm2 == 0: | |
| return 0.0 | |
| return np.dot(e1, e2) / (norm1 * norm2) | |