Spaces:
Sleeping
Sleeping
| import torch | |
| import torchvision | |
| import numpy as np | |
| import argparse | |
| import copy | |
| import cv2 | |
| import os | |
| from contextlib import nullcontext | |
| from huggingface_hub import hf_hub_download | |
| from facenet_pytorch import MTCNN | |
| from models import MobileGenerator, MobileNetV3MultiTask | |
| class Face: | |
| def __init__(self, keypoint: list[tuple[int, int]]): | |
| self.keypoint = keypoint | |
| e0, e1, n, m0, m1 = keypoint | |
| x_ = e1 - e0 | |
| y_ = 0.5 * (e0 + e1) - 0.5 * (m0 + m1) | |
| c = 0.5 * (e0 + e1) - 0.1 * y_ | |
| cx, cy = int(c[0]), int(c[1]) | |
| theta = np.arctan2(x_[1], x_[0]) | |
| s = max(4.0 * np.linalg.norm(x_), 3.6 * np.linalg.norm(y_)) | |
| s = int(s) | |
| # bbox: (x, y, w, h) | |
| self.bbox = (cx-s//2, cy-s//2, s, s) | |
| self.theta = theta | |
| def get_center(self): | |
| return self.bbox[0] + self.bbox[2] // 2, self.bbox[1] + self.bbox[3] // 2 | |
| def get_size(self): | |
| return self.bbox[2] | |
| def set_attributes(self, age: int, gender: str): | |
| self.age = age | |
| self.gender = gender | |
| def update(self, keypoint: list[tuple[int, int]]): | |
| self.__init__(keypoint) | |
| def calc_iou(self, other) -> float: | |
| x1 = max(self.bbox[0], other.bbox[0]) | |
| y1 = max(self.bbox[1], other.bbox[1]) | |
| x2 = min(self.bbox[0] + self.bbox[2], other.bbox[0] + other.bbox[2]) | |
| y2 = min(self.bbox[1] + self.bbox[3], other.bbox[1] + other.bbox[3]) | |
| inter_area = max(0, x2 - x1) * max(0, y2 - y1) | |
| union_area = self.bbox[2] * self.bbox[3] + other.bbox[2] * other.bbox[3] - inter_area | |
| if union_area == 0: | |
| return 0.0 | |
| return inter_area / union_area | |
| class FaceSet: | |
| latent_ids = np.load( | |
| hf_hub_download( | |
| repo_id=os.getenv("HF_GEN_REPO_ID"), | |
| filename="latent_ids.npz", | |
| token=os.getenv("HF_HUB_TOKEN") | |
| ) | |
| ) | |
| def __init__(self): | |
| self.faces = [] | |
| self.nonused_counter = [] | |
| def append(self, face: Face): | |
| self.faces.append(face) | |
| self.nonused_counter.append(0) | |
| def set_attributes(self, i: int, age: int, gender: str): | |
| self.faces[i].set_attributes(age, gender) | |
| if age[0] == 80 and gender[0] == "M": | |
| age[0] = 70 | |
| self.faces[i].latent_id = self.latent_ids[f"{age[0]}_{gender[0]}_jp"] | |
| def __len__(self) -> int: | |
| # s = sum(c == 0 for c in self.nonused_counter) | |
| # return s | |
| return len(self.faces) | |
| def __getitem__(self, idx: int) -> Face: | |
| return self.faces[idx] | |
| def __iter__(self): | |
| # s = sum(c == 0 for c in self.nonused_counter) | |
| # return iter(self.faces[:s]) | |
| return iter(self.faces) | |
| def update(self, other, reset_nonused_threshold: int): | |
| matched_self_indices = [] | |
| for i, other_face in enumerate(other): | |
| max_iou = 0 | |
| max_j = -1 | |
| for j, self_face in enumerate(self.faces): | |
| iou = other_face.calc_iou(self_face) | |
| if iou > max_iou: | |
| max_iou = iou | |
| max_j = j | |
| if max_iou > 0.3: | |
| self.faces[max_j].update(other_face.keypoint) | |
| self.nonused_counter[max_j] = 0 | |
| matched_self_indices.append(max_j) | |
| else: | |
| self.append(other_face) | |
| matched_self_indices.append(len(self.faces)-1) | |
| for j in range(len(self.faces)): | |
| if j not in matched_self_indices: | |
| self.nonused_counter[j] += 1 | |
| argsort = np.argsort(self.nonused_counter)[::-1] | |
| self.faces = [self.faces[j] for j in argsort] | |
| self.nonused_counter = [self.nonused_counter[j] for j in argsort] | |
| self.faces = [face for j, face in enumerate(self.faces) if self.nonused_counter[j] < reset_nonused_threshold] | |
| self.nonused_counter = [count for count in self.nonused_counter if count < reset_nonused_threshold] | |
| class FaceCropper: | |
| def __init__(self): | |
| self.size = 256 | |
| self.crop_size = 224 | |
| self.detector = MTCNN(select_largest=False, keep_all=True, device="cuda" if torch.cuda.is_available() else "cpu") | |
| mask = np.zeros((self.crop_size, self.crop_size), dtype=np.uint8) | |
| mask[8:-8, 8:-8] = 255 | |
| mask = cv2.GaussianBlur(mask, (31, 31), 0) | |
| self.mask = mask | |
| def detect_keypoints(self, image: np.ndarray) -> FaceSet: | |
| height, width = image.shape[:2] | |
| _, _, points = self.detector.detect(image, landmarks=True) | |
| faces_list = FaceSet() | |
| if points is None: | |
| return faces_list | |
| for i in range(len(points)): | |
| left_eye = points[i][0] | |
| right_eye = points[i][1] | |
| nose = points[i][2] | |
| left_mouth = points[i][3] | |
| right_mouth = points[i][4] | |
| faces_list.append(Face(keypoint=[left_eye, right_eye, nose, left_mouth, right_mouth])) | |
| return faces_list | |
| def crop_and_resize(self, image: np.ndarray, face: Face) -> np.ndarray: | |
| cx, cy = face.get_center() | |
| theta = face.theta | |
| s = face.get_size() | |
| M = cv2.getRotationMatrix2D((cx, cy), np.degrees(theta), self.size / s * 1.14) | |
| M[0, 2] += self.crop_size // 2 - cx | |
| M[1, 2] += self.crop_size // 2 - cy | |
| cropped = cv2.warpAffine(image, M, (self.crop_size, self.crop_size), flags=cv2.INTER_LINEAR) | |
| return cropped | |
| def invert_image(self, image: np.ndarray, cropped: np.ndarray, face: Face) -> np.ndarray: | |
| cx, cy = face.get_center() | |
| theta = face.theta | |
| s = face.get_size() | |
| x0 = max(0, int(np.floor(cx - s))) | |
| y0 = max(0, int(np.floor(cy - s))) | |
| x1 = min(image.shape[1], int(np.ceil(cx + s))) | |
| y1 = min(image.shape[0], int(np.ceil(cy + s))) | |
| if x0 >= x1 or y0 >= y1: | |
| return image | |
| cropped_image = image[y0:y1, x0:x1] | |
| cx_local = cx - x0 | |
| cy_local = cy - y0 | |
| M = cv2.getRotationMatrix2D((cx_local, cy_local), np.degrees(theta), self.size / s * 1.14) | |
| M[0, 2] += self.crop_size // 2 - cx_local | |
| M[1, 2] += self.crop_size // 2 - cy_local | |
| M_inv = cv2.invertAffineTransform(M) | |
| inverted = cv2.warpAffine(cropped, M_inv, (x1-x0, y1-y0), flags=cv2.INTER_LINEAR) | |
| mask = cv2.warpAffine(self.mask, M_inv, (x1-x0, y1-y0)) | |
| mask = mask.astype(np.float32)[:, :, None] / 255.0 | |
| blended = cropped_image.astype(np.float32) * (1 - mask) + inverted.astype(np.float32) * mask | |
| result = image.copy() | |
| result[y0:y1, x0:x1] = blended.astype(np.uint8) | |
| return result | |
| class FaceSwapper: | |
| def __init__(self, model_path: str, classifier_checkpoint: str): | |
| self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
| self.generator = MobileGenerator(input_nc=3, output_nc=3, latent_dim=512, n_blocks=6) | |
| self.generator.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"), weights_only=False)) | |
| self.generator.to(self.device).eval() | |
| self.classifier = MobileNetV3MultiTask(model_name="mobilenetv3_small_100", num_age_classes=10, num_gender_classes=2) | |
| self.classifier.to(self.device).eval() | |
| self.classifier.load_state_dict(torch.load(classifier_checkpoint, map_location=torch.device("cpu"), weights_only=False)["model_state_dict"]) | |
| self.mean = torch.tensor([0.485, 0.456, 0.406]).reshape(1,3,1,1) | |
| self.std = torch.tensor([0.229, 0.224, 0.225]).reshape(1,3,1,1) | |
| def np2tensor(self, imgs: np.ndarray) -> torch.Tensor: | |
| if not isinstance(imgs, list): | |
| imgs = [imgs] | |
| imgs = np.stack(imgs, axis=0) | |
| imgs = torch.from_numpy(imgs.astype(np.float32) / 255).permute(0, 3, 1, 2) | |
| return (imgs - self.mean) / self.std | |
| def tensor2np(self, imgs: torch.Tensor) -> np.ndarray: | |
| imgs = imgs * self.std + self.mean | |
| imgs = imgs.permute(0, 2, 3, 1).detach().numpy() | |
| imgs = np.clip(imgs, 0, 1) | |
| return (imgs * 255).astype(np.uint8) | |
| def classify(self, img: np.ndarray) -> list[tuple[int, str]]: | |
| autocast_context = torch.autocast("cuda", torch.float16) if self.device.type == "cuda" else nullcontext() | |
| with torch.no_grad(), autocast_context: | |
| img_tensor = self.np2tensor(img).to(self.device) | |
| ages, genders = self.classifier(img_tensor) | |
| ages = torch.softmax(ages, dim=1) | |
| genders = torch.softmax(genders, dim=1) | |
| attributes = [] | |
| for i in range(len(img_tensor)): | |
| age = ages[i].argmax().item() * 10 | |
| age_logit = ages[i].max().item() | |
| gender = "F" if genders[i].argmax().item() == 0 else "M" | |
| gender_logit = genders[i].max().item() | |
| attributes.append(([age, age_logit], [gender, gender_logit])) | |
| return attributes | |
| def swap(self, img_att: np.ndarray, latent_ids: list[np.ndarray]) -> np.ndarray: | |
| autocast_context = torch.autocast("cuda", torch.float16) if self.device.type == "cuda" else nullcontext() | |
| with torch.no_grad(), autocast_context: | |
| img_att = self.np2tensor(img_att).to(self.device) | |
| latent_ids = torch.from_numpy(np.vstack(latent_ids)).to(self.device) | |
| output = self.generator(img_att, latent_ids) | |
| return self.tensor2np(output.to("cpu")) | |