File size: 7,665 Bytes
83d5d1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import torchvision.models as models
from PIL import Image
import onnxruntime as ort
from huggingface_hub import hf_hub_download
# ==========================================
# CẤU HÌNH REPO
# ==========================================
REPO_ID = "biometric-ai-lab/Face_Recognition"
RECOG_FILENAME = "pytorch_model.bin"
YOLO_FILENAME = "yolov8s-face-lindevs.onnx"
# ==========================================
# 1. MODEL ARCHITECTURE (Giống hệt code bạn)
# ==========================================
class FaceRecognitionModel(nn.Module):
def __init__(self):
super(FaceRecognitionModel, self).__init__()
# Khởi tạo backbone, để weights=None vì ta sẽ load weight train của bạn
self.backbone = models.wide_resnet101_2(weights=None)
self.backbone.fc = nn.Identity()
self.embed = nn.Sequential(
nn.Linear(2048, 512),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
)
def forward(self, img):
features = self.backbone(img)
embedding = self.embed(features)
return F.normalize(embedding, p=2, dim=1)
# ==========================================
# 2. YOLO DETECTOR (Logic chuẩn của bạn)
# ==========================================
class YOLOFaceDetector:
def __init__(self, model_path, conf_threshold=0.5):
self.session = ort.InferenceSession(model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
self.input_name = self.session.get_inputs()[0].name
self.output_names = [output.name for output in self.session.get_outputs()]
self.conf_threshold = conf_threshold
self.input_size = 640
def detect_extract_face(self, image_pil, expand_ratio=0.0):
"""
Input: PIL Image
Output: PIL Image (Cropped Face)
"""
# Convert PIL -> OpenCV (BGR) để giống logic cũ
image_np = np.array(image_pil)
image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
img_height, img_width = image_bgr.shape[:2]
# Preprocess (Resize -> RGB -> Norm -> Transpose)
img_resized = cv2.resize(image_bgr, (self.input_size, self.input_size))
# Lưu ý: YOLO training thường dùng RGB
img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
img_normalized = img_rgb.astype(np.float32) / 255.0
img_transposed = np.transpose(img_normalized, (2, 0, 1))
img_batch = np.expand_dims(img_transposed, axis=0)
# Inference
outputs = self.session.run(self.output_names, {self.input_name: img_batch})
predictions = outputs[0]
if len(predictions.shape) == 3:
predictions = predictions[0].T
best_face = None
max_area = 0
# Post-process
for pred in predictions:
conf = pred[4]
if conf > self.conf_threshold:
x_center, y_center, w, h = pred[:4]
# Scale về ảnh gốc
x_center = x_center * img_width / self.input_size
y_center = y_center * img_height / self.input_size
w = w * img_width / self.input_size
h = h * img_height / self.input_size
x1 = int(x_center - w / 2)
y1 = int(y_center - h / 2)
x2 = int(x_center + w / 2)
y2 = int(y_center + h / 2)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(img_width, x2)
y2 = min(img_height, y2)
area = (x2 - x1) * (y2 - y1)
# Lấy mặt to nhất
if area > max_area:
max_area = area
best_face = (x1, y1, x2, y2)
# Crop ảnh
if best_face:
x1, y1, x2, y2 = best_face
# Xử lý expand_ratio (nếu có dùng)
if expand_ratio != 0:
w_box = x2 - x1
h_box = y2 - y1
pad = int(expand_ratio * max(w_box, h_box))
x1 = max(0, x1 - pad)
y1 = max(0, y1 - pad)
x2 = min(img_width, x2 + pad)
y2 = min(img_height, y2 + pad)
# Crop từ ảnh gốc PIL (để giữ chất lượng tốt nhất)
return image_pil.crop((x1, y1, x2, y2))
print("⚠️ Warning: No face detected. Using full image.")
return image_pil
# ==========================================
# 3. FACE ANALYSIS WRAPPER
# ==========================================
class FaceAnalysis:
def __init__(self, device=None):
self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Initializing Face Analysis on {self.device}...")
# 1. Tải Model
try:
print(f"📥 Checking models from {REPO_ID}...")
recog_path = hf_hub_download(repo_id=REPO_ID, filename=RECOG_FILENAME)
yolo_path = hf_hub_download(repo_id=REPO_ID, filename=YOLO_FILENAME)
except Exception as e:
raise RuntimeError(f"❌ Failed to download models. Check internet or Repo ID.\nError: {e}")
# 2. Init YOLO
self.yolo = YOLOFaceDetector(yolo_path, conf_threshold=0.5)
# 3. Init Recognition
self.model = FaceRecognitionModel().to(self.device)
# Load weights an toàn
checkpoint = torch.load(recog_path, map_location=self.device)
if 'model' in checkpoint:
self.model.load_state_dict(checkpoint['model'])
else:
# Fallback nếu file chỉ chứa weight không
self.model.load_state_dict(checkpoint)
self.model.eval()
# 4. Transform (Giống hệt inference_transform của bạn)
self.transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
])
print("✅ System Ready!")
def process_image(self, image_source, expand_ratio=0.0):
# Load ảnh
if isinstance(image_source, str):
if not os.path.exists(image_source):
raise FileNotFoundError(f"Image not found: {image_source}")
img_pil = Image.open(image_source).convert('RGB')
elif isinstance(image_source, Image.Image):
img_pil = image_source.convert('RGB')
elif isinstance(image_source, np.ndarray):
img_pil = Image.fromarray(cv2.cvtColor(image_source, cv2.COLOR_BGR2RGB))
else:
raise ValueError("Input must be filepath, PIL Image, or Numpy Array")
# 1. YOLO Detect & Crop
face_crop = self.yolo.detect_extract_face(img_pil, expand_ratio=expand_ratio)
# 2. Transform & Embedding
img_tensor = self.transform(face_crop).unsqueeze(0).to(self.device)
with torch.no_grad():
embedding = self.model(img_tensor)
return embedding
def compare(self, img1, img2, threshold=0.45, expand_ratio=0.01):
"""
So sánh 2 ảnh.
expand_ratio=0.01 giống code demo của bạn.
"""
emb1 = self.process_image(img1, expand_ratio)
emb2 = self.process_image(img2, expand_ratio)
# Cosine Similarity
similarity = F.cosine_similarity(emb1, emb2).item()
is_same = similarity > threshold
return similarity, is_same |