lhx05 commited on Apr 16

Commit

fb24bef

verified ·

1 Parent(s): a2f2478

Upload CVLFace experiment code

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

cvlface/research/recognition/code/run_v1/README.md +0 -0
cvlface/research/recognition/code/run_v1/aligners/__init__.py +25 -0
cvlface/research/recognition/code/run_v1/aligners/base/__init__.py +60 -0
cvlface/research/recognition/code/run_v1/aligners/base/utils.py +91 -0
cvlface/research/recognition/code/run_v1/aligners/configs/dfa.yaml +10 -0
cvlface/research/recognition/code/run_v1/aligners/configs/none.yaml +3 -0
cvlface/research/recognition/code/run_v1/aligners/configs/retinaface.yaml +3 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/__init__.py +117 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/aligner_helper.py +97 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/__init__.py +27 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/config.py +18 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/__init__.py +2 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/functions/prior_box.py +140 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/modules/__init__.py +3 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/modules/multibox_loss.py +144 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/__init__.py +0 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/net.py +132 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/retinaface.py +142 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/preprocessor.py +93 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/utils/box_utils.py +239 -0
cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/utils/model_utils.py +36 -0
cvlface/research/recognition/code/run_v1/aligners/none/__init__.py +20 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/__init__.py +246 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/aligner_helper.py +97 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/__init__.py +28 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/config.py +18 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/__init__.py +2 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/functions/prior_box.py +140 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/modules/__init__.py +3 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/modules/multibox_loss.py +144 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/__init__.py +0 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/net.py +132 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/retinaface.py +123 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/preprocessor.py +93 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/utils/box_utils.py +239 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/utils/model_utils.py +36 -0
cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface_pipeline.py +247 -0
cvlface/research/recognition/code/run_v1/base.yaml +12 -0
cvlface/research/recognition/code/run_v1/classifiers/__init__.py +31 -0
cvlface/research/recognition/code/run_v1/classifiers/base/__init__.py +87 -0
cvlface/research/recognition/code/run_v1/classifiers/base/utils.py +91 -0
cvlface/research/recognition/code/run_v1/classifiers/configs/fc.yaml +4 -0
cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc.yaml +4 -0
cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_freeze.yaml +4 -0
cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_sample10.yaml +4 -0
cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_sample10_freeze.yaml +4 -0
cvlface/research/recognition/code/run_v1/classifiers/fc/__init__.py +55 -0
cvlface/research/recognition/code/run_v1/classifiers/fc/fc.py +67 -0
cvlface/research/recognition/code/run_v1/classifiers/partial_fc/__init__.py +39 -0
cvlface/research/recognition/code/run_v1/classifiers/partial_fc/partial_fc.py +289 -0

cvlface/research/recognition/code/run_v1/README.md ADDED Viewed

File without changes

cvlface/research/recognition/code/run_v1/aligners/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from .base import BaseAligner
+def get_aligner(aligner_cfg):
+    if aligner_cfg.name == 'none':
+        from .none import NoneAligner
+        aligner = NoneAligner.from_config(aligner_cfg)
+    elif aligner_cfg.name == 'retinaface_aligner':
+        from .retinaface_aligner import RetinaFaceAligner
+        aligner = RetinaFaceAligner.from_config(aligner_cfg)
+    elif aligner_cfg.name == 'differentiable_face_aligner':
+        from .differentiable_face_aligner import DifferentiableFaceAligner
+        aligner = DifferentiableFaceAligner.from_config(aligner_cfg)
+    else:
+        raise ValueError(f"Unknown classifier: {aligner_cfg.name}")
+    if aligner_cfg.start_from:
+        aligner.load_state_dict_from_path(aligner_cfg.start_from)
+    if aligner_cfg.freeze:
+        for param in aligner.parameters():
+            param.requires_grad = False
+    return aligner

cvlface/research/recognition/code/run_v1/aligners/base/__init__.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+from typing import Union
+import torch
+from torch import device
+from .utils import get_parameter_device, get_parameter_dtype, save_state_dict_and_config, load_state_dict_from_path
+class BaseAligner(torch.nn.Module):
+    def __init__(self, config=None):
+        super().__init__()
+        self.config = config
+    @classmethod
+    def from_config(cls, config) -> "BaseAligner":
+        raise NotImplementedError('from_config must be implemented in subclass')
+    def make_train_transform(self):
+        raise NotImplementedError('from_config must be implemented in subclass')
+    def make_test_transform(self):
+        raise NotImplementedError('from_config must be implemented in subclass')
+    def forward(self, x):
+        raise NotImplementedError('from_config must be implemented in subclass')
+    def save_pretrained(
+        self,
+        save_dir: Union[str, os.PathLike],
+        name: str = 'model.pt',
+        rank: int = 0,
+    ):
+        save_path = os.path.join(save_dir, name)
+        if rank == 0:
+            save_state_dict_and_config(self.state_dict(), self.config, save_path)
+    def load_state_dict_from_path(self, pretrained_model_path):
+        state_dict = load_state_dict_from_path(pretrained_model_path)
+        result = self.load_state_dict(state_dict)
+        print(f"Loaded pretrained aligner from {pretrained_model_path}")
+    @property
+    def device(self) -> device:
+        return get_parameter_device(self)
+    @property
+    def dtype(self) -> torch.dtype:
+        return get_parameter_dtype(self)
+    def num_parameters(self, only_trainable: bool = False) -> int:
+        return sum(p.numel() for p in self.parameters() if p.requires_grad or not only_trainable)
+    def has_trainable_params(self):
+        for param in self.parameters():
+            if param.requires_grad:
+                return True
+        return False
+    def has_params(self):
+        return len(list(self.parameters())) > 0

cvlface/research/recognition/code/run_v1/aligners/base/utils.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import itertools
+from typing import List, Optional, Tuple, Union
+import safetensors
+import torch
+from torch import Tensor
+import os
+from pathlib import Path
+from omegaconf import DictConfig, OmegaConf
+def get_parameter_device(parameter: torch.nn.Module):
+    try:
+        parameters_and_buffers = itertools.chain(parameter.parameters(), parameter.buffers())
+        return next(parameters_and_buffers).device
+    except StopIteration:
+        # For torch.nn.DataParallel compatibility in PyTorch 1.5
+        def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
+            tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+            return tuples
+        gen = parameter._named_members(get_members_fn=find_tensor_attributes)
+        first_tuple = next(gen)
+        return first_tuple[1].device
+def get_parameter_dtype(parameter: torch.nn.Module):
+    try:
+        params = tuple(parameter.parameters())
+        if len(params) > 0:
+            return params[0].dtype
+        buffers = tuple(parameter.buffers())
+        if len(buffers) > 0:
+            return buffers[0].dtype
+    except StopIteration:
+        # For torch.nn.DataParallel compatibility in PyTorch 1.5
+        def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
+            tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+            return tuples
+        gen = parameter._named_members(get_members_fn=find_tensor_attributes)
+        first_tuple = next(gen)
+        return first_tuple[1].dtype
+def get_parent_directory(save_path: Union[str, os.PathLike]) -> Path:
+    path_obj = Path(save_path)
+    return path_obj.parent
+def get_base_name(save_path: Union[str, os.PathLike]) -> str:
+    path_obj = Path(save_path)
+    return path_obj.name
+def load_state_dict_from_path(path: Union[str, os.PathLike]):
+    # Load a state dict from a path.
+    if 'safetensors' in path:
+        state_dict = safetensors.torch.load_file(path)
+    else:
+        state_dict = torch.load(path, map_location="cpu")
+    return state_dict
+def replace_extension(path, new_extension):
+    if not new_extension.startswith('.'):
+        new_extension = '.' + new_extension
+    return os.path.splitext(path)[0] + new_extension
+def make_config_path(save_path):
+    config_path = replace_extension(save_path, '.yaml')
+    return config_path
+def save_config(config, config_path):
+    assert isinstance(config, dict) or isinstance(config, DictConfig)
+    os.makedirs(get_parent_directory(config_path), exist_ok=True)
+    if isinstance(config, dict):
+        config = OmegaConf.create(config)
+    OmegaConf.save(config, config_path)
+def save_state_dict_and_config(state_dict, config, save_path):
+    os.makedirs(get_parent_directory(save_path), exist_ok=True)
+    # save config dict
+    config_path = make_config_path(save_path)
+    save_config(config, config_path)
+    # Save the model
+    if 'safetensors' in save_path:
+        safetensors.torch.save_file(state_dict, save_path, metadata={"format": "pt"})
+    else:
+        torch.save(state_dict, save_path)

cvlface/research/recognition/code/run_v1/aligners/configs/dfa.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+name: differentiable_face_aligner
+arch: 'mobile0.25'
+start_from: '../../../../pretrained_models/alignment/dfa_mobilenet/mobilenet0.25.pth'
+freeze: True
+input_padding_ratio: 0 # pad the input to this size before resize
+input_padding_val: 'zero'
+input_size: 160  # resize the input to this size
+output_size: 112  # size of the output of aligner
+color_space: 'RGB'  # color space of the input image

cvlface/research/recognition/code/run_v1/aligners/configs/none.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+name: none
+start_from: ''
+freeze: False

cvlface/research/recognition/code/run_v1/aligners/configs/retinaface.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+name: retinaface
+start_from: ''
+freeze: True

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/__init__.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from ..base import BaseAligner
+from torchvision import transforms
+from .dfa import get_landmark_predictor, get_preprocessor
+from . import aligner_helper
+import torch
+import torch.nn.functional as F
+import numpy as np
+class DifferentiableFaceAligner(BaseAligner):
+    '''
+    A differentiable face aligner that aligns the image with one face to a canonical position.
+    The aligner is based on the following paper (check out supplementary material for more details):
+    @inproceedings{kim2024kprpe,
+       title={{KeyPoint Relative Position Encoding for Face Recognition},
+       author={Kim, Minchul and Su, Yiyang and Liu, Feng and Liu, Xiaoming},
+       booktitle={CVPR},
+       year={2024}
+    }
+    '''
+    def __init__(self, net, prior_box, preprocessor, config):
+        super(DifferentiableFaceAligner, self).__init__()
+        self.net = net
+        self.prior_box = prior_box
+        self.preprocessor = preprocessor
+        self.config = config
+    @classmethod
+    def from_config(cls, config):
+        net, prior_box = get_landmark_predictor(network=config.arch,
+                                                use_aggregator=True,
+                                                input_size=config.input_size)
+        preprocessor = get_preprocessor(output_size=config.input_size,
+                                        padding=config.input_padding_ratio,
+                                        padding_val=config.input_padding_val)
+        if config.freeze:
+            for param in net.parameters():
+                param.requires_grad = False
+        model = cls(net, prior_box, preprocessor, config)
+        model.eval()
+        return model
+    def forward(self, x, padding_ratio_override=None):
+        # input size check
+        assert x.shape[1] == 3
+        assert x.ndim == 4
+        assert isinstance(x, torch.Tensor)
+        is_square = x.shape[2] == x.shape[3]
+        x = self.preprocessor(x, padding_ratio_override=padding_ratio_override)
+        assert self.prior_box.image_size == x.shape[2:]
+        # make image into BGR
+        x_bgr = x.flip(1)
+        result = self.net(x_bgr, self.prior_box)
+        orig_pred_ldmks, bbox, cls = aligner_helper.split_network_output(result)
+        score = torch.nn.Softmax(dim=-1)(cls)[:,1:]
+        reference_ldmk = aligner_helper.reference_landmark()
+        input_size = self.config.input_size
+        output_size = self.config.output_size
+        cv2_tfms = aligner_helper.get_cv2_affine_from_landmark(orig_pred_ldmks, reference_ldmk, input_size, input_size)
+        thetas = aligner_helper.cv2_param_to_torch_theta(cv2_tfms, input_size, input_size, output_size, output_size)
+        thetas = thetas.to(orig_pred_ldmks.device)
+        output_size = torch.Size((len(thetas), 3, output_size, output_size))
+        grid = F.affine_grid(thetas, output_size, align_corners=True)
+        aligned_x = F.grid_sample(x + 1, grid, align_corners=True) - 1  # +1, -1 for making padding pixel 0
+        aligned_ldmks = aligner_helper.adjust_ldmks(orig_pred_ldmks.view(-1, 5, 2), thetas)
+        orig_pred_ldmks = orig_pred_ldmks.view(-1, 5, 2)
+        # bbox (xmin, ymin, xmax, ymax)
+        normalized_bbox = bbox / torch.tensor([[x_bgr.size(3), x_bgr.size(2)] * 2]).to(bbox.device)
+        if padding_ratio_override is None:
+            padding_ratio = self.preprocessor.padding
+        else:
+            padding_ratio = padding_ratio_override
+        if padding_ratio > 0:
+            # unpad the landmark so that it is in the original image coordinate
+            scale = 1 / (1 + (2 * padding_ratio))
+            pad_inv_theta = torch.from_numpy(np.array([[1 / scale, 0, 0], [0, 1 / scale, 0]]))
+            pad_inv_theta = pad_inv_theta.unsqueeze(0).float().to(self.device).repeat(orig_pred_ldmks.size(0), 1, 1)
+            unpad_ldmk_pred = torch.concat([orig_pred_ldmks.view(-1, 5, 2),
+                                            torch.ones((orig_pred_ldmks.size(0), 5, 1)).to(self.device)], dim=-1)
+            unpad_ldmk_pred = (((unpad_ldmk_pred) * 2 - 1) @ pad_inv_theta.mT) / 2 + 0.5
+            unpad_ldmk_pred = unpad_ldmk_pred.view(orig_pred_ldmks.size(0), -1).detach()
+            unpad_ldmk_pred = unpad_ldmk_pred.view(-1, 5, 2)
+            if not is_square:
+                unpad_ldmk_pred = None  # cannot use this if the input is not square becaouse preprocessor changes input
+                normalized_bbox = None  # cannot use this if the input is not square becaouse preprocessor changes input
+            return aligned_x, unpad_ldmk_pred, aligned_ldmks, score, thetas, normalized_bbox
+        if not is_square:
+            orig_pred_ldmks = None  # cannot use this if the input is not square becaouse preprocessor changes input
+            normalized_bbox = None  # cannot use this if the input is not square becaouse preprocessor changes input
+        return aligned_x, orig_pred_ldmks, aligned_ldmks, score, thetas, normalized_bbox
+    def make_train_transform(self):
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ])
+        return transform
+    def make_test_transform(self):
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ])
+        return transform

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/aligner_helper.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import numpy as np
+import cv2
+from skimage import transform as trans
+import cv2
+def split_network_output(align_out):
+    anchor_bbox_pred, anchor_cls_pred, anchor_ldmk_pred, merged, _ = align_out
+    bbox, cls, ldmk = torch.split(merged, [4, 2, 10], dim=1)
+    return ldmk, bbox, cls
+def get_cv2_affine_from_landmark(ldmks, reference_ldmk, image_width, image_height, ):
+    assert ldmks.ndim == 2  # batchdim
+    assert ldmks.shape[1] == 10
+    assert isinstance(ldmks, torch.Tensor)
+    assert reference_ldmk.ndim == 2
+    assert reference_ldmk.shape[0] == 5
+    assert reference_ldmk.shape[1] == 2
+    assert isinstance(reference_ldmk, np.ndarray)
+    to_img_size = np.array([[[image_width, image_height]]])
+    ldmks = ldmks.view(ldmks.shape[0], 5, 2).detach().cpu().numpy()
+    ldmks = ldmks * to_img_size
+    transforms = []
+    for ldmk in ldmks:
+        tform = trans.SimilarityTransform()
+        tform.estimate(ldmk, reference_ldmk)
+        M = tform.params[0:2, :]
+        transforms.append(M)
+    transforms = np.stack(transforms, axis=0)
+    return transforms
+def cv2_param_to_torch_theta(cv2_tfms, image_width, image_height, output_width, output_height):
+    # https://github.com/wuneng/WarpAffine2GridSample
+    """4.Affine Transformation Matrix to theta"""
+    assert cv2_tfms.ndim == 3  # N, 2, 3
+    assert cv2_tfms.shape[1] == 2
+    assert cv2_tfms.shape[2] == 3
+    srcs = np.array([[0, 0], [0, 1], [1, 1]], dtype=np.float32)
+    srcs = np.expand_dims(srcs, axis=0).repeat(cv2_tfms.shape[0], axis=0)
+    dsts = np.matmul(srcs, cv2_tfms[:, :, :2].transpose(0, 2, 1)) + cv2_tfms[:, :, 2:3].transpose(0, 2, 1)
+    # normalize to [-1, 1]
+    srcs = srcs / np.array([[[image_width, image_height]]]) * 2 - 1
+    dsts = dsts / np.array([[[output_width, output_height]]]) * 2 - 1
+    thetas = []
+    for src, dst in zip(srcs, dsts):
+        theta = trans.estimate_transform("affine", src=dst, dst=src).params[:2]
+        thetas.append(theta)
+    thetas = np.stack(thetas, axis=0)
+    thetas = torch.from_numpy(thetas).float()
+    return thetas
+def adjust_ldmks(ldmks, thetas):
+    inv_thetas = inv_matrix(thetas).to(ldmks.device).float()
+    _ldmks = torch.cat([ldmks, torch.ones((ldmks.shape[0], 5, 1)).to(ldmks.device)], dim=2)
+    ldmk_aligned = (((_ldmks) * 2 - 1) @ inv_thetas.permute(0,2,1)) / 2 + 0.5
+    return ldmk_aligned
+def inv_matrix(theta):
+    # torch batched version
+    assert theta.ndim == 3
+    a, b, t1 = theta[:, 0,0], theta[:, 0,1], theta[:, 0,2]
+    c, d, t2 = theta[:, 1,0], theta[:, 1,1], theta[:, 1,2]
+    det = a * d - b * c
+    inv_det = 1.0 / det
+    inv_mat = torch.stack([
+        torch.stack([d * inv_det, -b * inv_det, (b * t2 - d * t1) * inv_det], dim=1),
+        torch.stack([-c * inv_det, a * inv_det, (c * t1 - a * t2) * inv_det], dim=1)
+    ], dim=1)
+    return inv_mat
+def reference_landmark():
+    return np.array([[38.29459953, 51.69630051],
+                     [73.53179932, 51.50139999],
+                     [56.02519989, 71.73660278],
+                     [41.54930115, 92.3655014],
+                     [70.72990036, 92.20410156]])
+def draw_ldmk(img, ldmk):
+    if ldmk is None:
+        return img
+    colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
+    img = img.copy()
+    for i in range(5):
+        color = colors[i]
+        cv2.circle(img, (int(ldmk[i*2] * img.shape[1]), int(ldmk[i*2+1] * img.shape[0])), 1, color, 4)
+    return img

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from .models.retinaface import RetinaFace
+from .utils.model_utils import load_model
+from .config import cfg_mnet, cfg_re50
+from .layers.functions.prior_box import PriorBox
+from .preprocessor import Preprocessor
+def get_landmark_predictor(network='mobile0.25', use_aggregator=True, input_size=160):
+    cfg = None
+    if network == "mobile0.25":
+        cfg = cfg_mnet
+    elif network == "resnet50":
+        cfg = cfg_re50
+    net = RetinaFace(cfg=cfg, phase = 'test', use_aggregator=use_aggregator)
+    priorbox = PriorBox(image_size=(input_size, input_size),
+                        min_sizes=[[64, 80], [96, 112], [128, 144]],
+                        steps=[8, 16, 32],
+                        clip=False,
+                        variances=[0.1, 0.2],)
+    # aligner = Aligner(net, priorbox, input_size, output_size=output_size)
+    # return aligner
+    return net, priorbox
+def get_preprocessor(output_size=160, padding=0.0, padding_val='zero'):
+    return Preprocessor(output_size=output_size, padding=padding, padding_val=padding_val)

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# config.py
+cfg_mnet = {
+    'name': 'mobilenet0.25',
+    'pretrain': True,
+    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+    'in_channel': 32,
+    'out_channel': 64
+}
+cfg_re50 = {
+    'name': 'Resnet50',
+    'pretrain': True,
+    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
+    'in_channel': 256,
+    'out_channel': 256
+}

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .functions import *
2	+ from .modules import *

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/functions/prior_box.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import torch
+from itertools import product as product
+from math import ceil
+class PriorBox(object):
+    def __init__(self,
+                 image_size,
+                 min_sizes=[[64, 80], [96, 112], [128, 144]],
+                 steps=[8,16,32],
+                 clip=False,
+                 variances=[0.1, 0.2],
+                 ):
+        super(PriorBox, self).__init__()
+        self.min_sizes = min_sizes
+        self.steps = steps
+        self.clip = clip
+        self.variances = variances
+        self.image_size = image_size
+        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
+        with torch.no_grad():
+            self.priors = self.forward()
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
+                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
+                    for cy, cx in product(dense_cy, dense_cx):
+                        anchors += [cx, cy, s_kx, s_ky]
+        # back to torch land
+        output = torch.Tensor(anchors).view(-1, 4)
+        # import pandas as pd
+        # pd.DataFrame(output.numpy()).to_csv('/mckim/temp/temp.csv')
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+    def encode(self, matched):
+        """Encode the variances from the priorbox layers into the ground truth boxes
+        we have matched (based on jaccard overlap) with the prior boxes.
+        """
+        self.priors = self.priors.to(matched.device)
+        # dist b/t match center and prior's center
+        g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - self.priors[:, :2]
+        # encode variance
+        g_cxcy /= (self.variances[0] * self.priors[:, 2:])
+        # match wh / prior wh
+        g_wh = (matched[:, 2:] - matched[:, :2]) / self.priors[:, 2:]
+        g_wh = torch.log(g_wh) / self.variances[1]
+        # return target for smooth_l1_loss
+        return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
+    def encode_landm(self, matched):
+        """Encode the variances from the priorbox layers into the ground truth boxes
+        we have matched (based on jaccard overlap) with the prior boxes.
+        """
+        self.priors = self.priors.to(matched.device)
+        # dist b/t match center and prior's center
+        matched = torch.reshape(matched, (matched.size(0), 5, 2))
+        priors_cx = self.priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_cy = self.priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_w = self.priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_h = self.priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
+        g_cxcy = matched[:, :, :2] - priors[:, :, :2]
+        # encode variance
+        g_cxcy /= (self.variances[0] * priors[:, :, 2:])
+        # g_cxcy /= priors[:, :, 2:]
+        g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
+        # return target for smooth_l1_loss
+        return g_cxcy
+    # Adapted from https://github.com/Hakuyume/chainer-ssd
+    def decode(self, loc):
+        """Decode locations from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(loc.device)
+        boxes = torch.cat((
+            self.priors[:, :2] + loc[:, :2] * self.variances[0] * self.priors[:, 2:],
+            self.priors[:, 2:] * torch.exp(loc[:, 2:] * self.variances[1])), 1)
+        boxes[:, :2] -= boxes[:, 2:] / 2
+        boxes[:, 2:] += boxes[:, :2]
+        return boxes
+    def decode_landm(self, pre):
+        """Decode landm from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(pre.device)
+        landms = torch.cat((self.priors[:, :2] + pre[:, :2] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 2:4] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 4:6] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 6:8] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 8:10] * self.variances[0] * self.priors[:, 2:],
+                            ), dim=1)
+        return landms
+    def decode_batch(self, loc):
+        """Decode locations from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(loc.device)
+        assert loc.ndim == 3
+        priors = self.priors.unsqueeze(0).expand(loc.size(0), -1, -1)
+        boxes = torch.cat((
+            priors[:, :, :2] + loc[:, :, :2] * self.variances[0] * priors[:, :, 2:],
+            priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * self.variances[1])), -1)
+        boxes[:, :, :2] -= boxes[:, :, 2:] / 2
+        boxes[:, :, 2:] += boxes[:, :, :2]
+        return boxes
+    def decode_landm_batch(self, prediction):
+        """Decode landm from prediction using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        assert prediction.ndim == 3
+        self.priors = self.priors.to(prediction.device)
+        priors = self.priors.unsqueeze(0).expand(prediction.size(0), -1, -1)
+        landms = torch.cat((priors[:, :, :2] + prediction[:, :, :2] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 2:4] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 4:6] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 6:8] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 8:10] * self.variances[0] * priors[:, :, 2:],
+                            ), dim=-1)
+        return landms

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .multibox_loss import MultiBoxLoss
2	+
3	+ __all__ = ['MultiBoxLoss']

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/layers/modules/multibox_loss.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ...utils.box_utils import match, log_sum_exp
+class MultiBoxLoss(nn.Module):
+    """SSD Weighted Loss Function
+    Compute Targets:
+        1) Produce Confidence Target Indices by matching  ground truth boxes
+           with (default) 'priorboxes' that have jaccard index > threshold parameter
+           (default threshold: 0.5).
+        2) Produce localization target by 'encoding' variance into offsets of ground
+           truth boxes and their matched  'priorboxes'.
+        3) Hard negative mining to filter the excessive number of negative examples
+           that comes with using a large number of default bounding boxes.
+           (default negative:positive ratio 3:1)
+    Objective Loss:
+        $L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N$
+        Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
+        weighted by α which is set to 1 by cross val.
+        Args:
+            c: class confidences,
+            l: predicted boxes,
+            g: ground truth boxes
+            N: number of matched default boxes
+        See: https://arxiv.org/pdf/1512.02325.pdf for more details.
+    """
+    def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
+        super(MultiBoxLoss, self).__init__()
+        self.num_classes = num_classes
+        self.threshold = overlap_thresh
+        self.background_label = bkg_label
+        self.encode_target = encode_target
+        self.use_prior_for_matching = prior_for_matching
+        self.do_neg_mining = neg_mining
+        self.negpos_ratio = neg_pos
+        self.neg_overlap = neg_overlap
+    def forward(self, predictions, priorbox, targets):
+        """Multibox Loss
+        Args:
+            predictions (tuple): A tuple containing loc preds, conf preds,
+            and prior boxes from SSD net.
+                conf shape: torch.size(batch_size,num_priors,num_classes)
+                loc shape: torch.size(batch_size,num_priors,4)
+                priors shape: torch.size(num_priors,4)
+            ground_truth (tensor): Ground truth boxes and labels for a batch,
+                shape: [batch_size,num_objs,5] (last idx is the label).
+        """
+        loc_data, conf_data, landm_data, aggs, thetas = predictions
+        num = loc_data.size(0)
+        num_priors = (priorbox.priors.size(0))
+        if aggs is not None:
+            stacked_target = torch.stack(targets, dim=0).squeeze(1)
+            pos_idx = stacked_target[:, -1] > 0
+            agg_ldmk = aggs[:, 6:][pos_idx]
+            tgt_ldmk = stacked_target[:, 4:14][pos_idx]
+            agg_loss_landm = F.smooth_l1_loss(agg_ldmk, tgt_ldmk, reduction='sum') / len(tgt_ldmk)
+            pos_idx = stacked_target[:, -1] != 0
+            agg_bbox = aggs[:, :4][pos_idx]
+            tgt_bbox = stacked_target[:, :4][pos_idx]
+            agg_loss_box = F.smooth_l1_loss(agg_bbox, tgt_bbox, reduction='sum') / len(tgt_bbox)
+            agg_cls = aggs[:, 4:6]
+            tgt_cls = (stacked_target[:, -1] > 0).long()
+            agg_loss_cls = F.cross_entropy(agg_cls, tgt_cls, reduction='sum') / len(tgt_cls)
+            aux_loss_dict = {
+                'agg_loss_landm': agg_loss_landm,
+                'agg_loss_box': agg_loss_box,
+                'agg_loss_cls': agg_loss_cls
+            }
+        else:
+            aux_loss_dict = None
+        # match priors (default boxes) and ground truth boxes
+        loc_t = torch.Tensor(num, num_priors, 4)
+        landm_t = torch.Tensor(num, num_priors, 10)
+        conf_t = torch.LongTensor(num, num_priors)
+        for idx in range(num):
+            truths = targets[idx][:, :4].data
+            labels = targets[idx][:, -1].data
+            landms = targets[idx][:, 4:14].data
+            match(self.threshold, truths, priorbox, labels, landms, loc_t, conf_t, landm_t, idx)
+        loc_t = loc_t.cuda()
+        conf_t = conf_t.cuda()
+        landm_t = landm_t.cuda()
+        zeros = torch.tensor(0).cuda()
+        # landm Loss (Smooth L1)
+        # Shape: [batch,num_priors,10]
+        pos1 = conf_t > zeros
+        num_pos_landm = pos1.long().sum(1, keepdim=True)
+        N1 = max(num_pos_landm.data.sum().float(), 1)
+        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
+        landm_p = landm_data[pos_idx1].view(-1, 10)
+        landm_t = landm_t[pos_idx1].view(-1, 10)
+        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
+        pos = conf_t != zeros
+        conf_t[pos] = 1
+        # Localization Loss (Smooth L1)
+        # Shape: [batch,num_priors,4]
+        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
+        loc_p = loc_data[pos_idx].view(-1, 4)
+        loc_t = loc_t[pos_idx].view(-1, 4)
+        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
+        # Compute max conf across batch for hard negative mining
+        batch_conf = conf_data.view(-1, self.num_classes)
+        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
+        # Hard Negative Mining
+        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
+        loss_c = loss_c.view(num, -1)
+        _, loss_idx = loss_c.sort(1, descending=True)
+        _, idx_rank = loss_idx.sort(1)
+        num_pos = pos.long().sum(1, keepdim=True)
+        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
+        neg = idx_rank < num_neg.expand_as(idx_rank)
+        # Confidence Loss Including Positive and Negative Examples
+        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
+        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
+        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
+        targets_weighted = conf_t[(pos+neg).gt(0)]
+        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
+        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
+        N = max(num_pos.data.sum().float(), 1)
+        loss_l /= N
+        loss_c /= N
+        loss_landm /= N1
+        return loss_l, loss_c, loss_landm, aux_loss_dict

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/__init__.py ADDED Viewed

File without changes

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/net.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import time
+import torch
+import torch.nn as nn
+import torchvision.models._utils as _utils
+import torchvision.models as models
+import torch.nn.functional as F
+from torch.autograd import Variable
+def conv_bn(inp, oup, stride = 1, leaky = 0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+def conv_bn_no_relu(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+    )
+def conv_bn1X1(inp, oup, stride, leaky=0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+def conv_dw(inp, oup, stride, leaky=0.1):
+    return nn.Sequential(
+        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+        nn.BatchNorm2d(inp),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+    )
+class SSH(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(SSH, self).__init__()
+        assert out_channel % 4 == 0
+        leaky = 0
+        if (out_channel <= 64):
+            leaky = 0.1
+        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
+        self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
+        self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+        self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
+        self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+    def forward(self, input):
+        conv3X3 = self.conv3X3(input)
+        conv5X5_1 = self.conv5X5_1(input)
+        conv5X5 = self.conv5X5_2(conv5X5_1)
+        conv7X7_2 = self.conv7X7_2(conv5X5_1)
+        conv7X7 = self.conv7x7_3(conv7X7_2)
+        out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+        out = F.relu(out)
+        return out
+class FPN(nn.Module):
+    def __init__(self,in_channels_list,out_channels):
+        super(FPN,self).__init__()
+        leaky = 0
+        if (out_channels <= 64):
+            leaky = 0.1
+        self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
+        self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
+        self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
+        self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
+        self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
+    def forward(self, input):
+        # names = list(input.keys())
+        input = list(input.values())
+        output1 = self.output1(input[0])
+        output2 = self.output2(input[1])
+        output3 = self.output3(input[2])
+        up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
+        output2 = output2 + up3
+        output2 = self.merge2(output2)
+        up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
+        output1 = output1 + up2
+        output1 = self.merge1(output1)
+        out = [output1, output2, output3]
+        return out
+class MobileNetV1(nn.Module):
+    def __init__(self):
+        super(MobileNetV1, self).__init__()
+        self.stage1 = nn.Sequential(
+            conv_bn(3, 8, 2, leaky = 0.1),    # 3
+            conv_dw(8, 16, 1),   # 7
+            conv_dw(16, 32, 2),  # 11
+            conv_dw(32, 32, 1),  # 19
+            conv_dw(32, 64, 2),  # 27
+            conv_dw(64, 64, 1),  # 43
+        )
+        self.stage2 = nn.Sequential(
+            conv_dw(64, 128, 2),  # 43 + 16 = 59
+            conv_dw(128, 128, 1), # 59 + 32 = 91
+            conv_dw(128, 128, 1), # 91 + 32 = 123
+            conv_dw(128, 128, 1), # 123 + 32 = 155
+            conv_dw(128, 128, 1), # 155 + 32 = 187
+            conv_dw(128, 128, 1), # 187 + 32 = 219
+        )
+        self.stage3 = nn.Sequential(
+            conv_dw(128, 256, 2), # 219 +3 2 = 241
+            conv_dw(256, 256, 1), # 241 + 64 = 301
+        )
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        return x

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/models/retinaface.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import torch
+import torch.nn as nn
+import torchvision.models._utils as _utils
+import torch.nn.functional as F
+from .net import MobileNetV1 as MobileNetV1
+from .net import FPN as FPN
+from .net import SSH as SSH
+from timm.models import mlp_mixer
+class ClassHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(ClassHead,self).__init__()
+        self.num_anchors = num_anchors
+        self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 2)
+class BboxHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(BboxHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 4)
+class LandmarkHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(LandmarkHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 10)
+class RetinaFace(nn.Module):
+    def __init__(self, cfg = None, phase = 'train', use_aggregator=False):
+        """
+        :param cfg:  Network related settings.
+        :param phase: train or test.
+        """
+        super(RetinaFace,self).__init__()
+        self.phase = phase
+        backbone = None
+        if cfg['name'] == 'mobilenet0.25':
+            backbone = MobileNetV1()
+            # if cfg['pretrain']:
+                # checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
+                # from collections import OrderedDict
+                # new_state_dict = OrderedDict()
+                # for k, v in checkpoint['state_dict'].items():
+                #     name = k[7:]  # remove module.
+                #     new_state_dict[name] = v
+                # load params
+                # backbone.load_state_dict(new_state_dict)
+        elif cfg['name'] == 'Resnet50':
+            import torchvision.models as models
+            backbone = models.resnet50(pretrained=cfg['pretrain'])
+        self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
+        in_channels_stage2 = cfg['in_channel']
+        in_channels_list = [
+            in_channels_stage2 * 2,
+            in_channels_stage2 * 4,
+            in_channels_stage2 * 8,
+        ]
+        out_channels = cfg['out_channel']
+        self.fpn = FPN(in_channels_list,out_channels)
+        self.ssh1 = SSH(out_channels, out_channels)
+        self.ssh2 = SSH(out_channels, out_channels)
+        self.ssh3 = SSH(out_channels, out_channels)
+        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.use_aggregator = use_aggregator
+        if self.use_aggregator:
+            modules = [mlp_mixer.MixerBlock(16, 1050) for _ in range(3)]
+            modules.append(nn.Linear(16, 1))
+            self.aggregator = nn.Sequential(*modules)
+        else:
+            self.aggregator = None
+    def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        classhead = nn.ModuleList()
+        for i in range(fpn_num):
+            classhead.append(ClassHead(inchannels,anchor_num))
+        return classhead
+    def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        bboxhead = nn.ModuleList()
+        for i in range(fpn_num):
+            bboxhead.append(BboxHead(inchannels,anchor_num))
+        return bboxhead
+    def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        landmarkhead = nn.ModuleList()
+        for i in range(fpn_num):
+            landmarkhead.append(LandmarkHead(inchannels,anchor_num))
+        return landmarkhead
+    def forward(self, inputs, priorbox):
+        out = self.body(inputs)
+        # FPN
+        fpn = self.fpn(out)
+        # SSH
+        feature1 = self.ssh1(fpn[0])
+        feature2 = self.ssh2(fpn[1])
+        feature3 = self.ssh3(fpn[2])
+        features = [feature1, feature2, feature3]
+        bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+        classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
+        ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
+        if self.use_aggregator:
+            decoded_bbox = priorbox.decode_batch(bbox_regressions)
+            decoded_ldmk = priorbox.decode_landm_batch(ldm_regressions)
+            combined = torch.cat([decoded_bbox, classifications, decoded_ldmk], dim=2)
+            weight = self.aggregator(combined)
+            weight = F.softmax(weight, dim=1)
+            agg = torch.sum(weight * combined, dim=1)
+            theta = None
+        else:
+            agg = None
+            theta = None
+        if self.phase == 'train':
+            output = (bbox_regressions, classifications, ldm_regressions, agg, theta)
+        else:
+            output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions, agg, theta)
+        return output

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/preprocessor.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn.functional as F
+class Preprocessor():
+    def __init__(self, output_size=160, padding=0.0, padding_val='zero'):
+        self.output_size = output_size
+        self.padding = padding
+        self.padding_val = padding_val
+    def preprocess_batched(self, imgs, padding_ratio_override=None):
+        # check img is of float
+        if imgs.dtype == torch.float32:
+            if self.padding_val == 'zero':
+                padding_val = -1.0
+            elif self.padding_val == 'mean':
+                padding_val = imgs.mean()
+            else:
+                raise ValueError('padding_val must be "zero" or "mean"')
+        elif imgs.dtype == torch.uint8:
+            if self.padding_val == 'zero':
+                padding_val = 0
+            elif self.padding_val == 'mean':
+                padding_val = imgs.mean()
+            else:
+                raise ValueError('padding_val must be "zero" or "mean"')
+        else:
+            raise ValueError('imgs.dtype must be torch.float32 or torch.uint8')
+        square_imgs = self.make_square_img_batched(imgs, padding_val=padding_val)
+        if padding_ratio_override is not None:
+            padding = padding_ratio_override
+        else:
+            padding = self.padding
+        padded_imgs = self.make_padded_img_batched(square_imgs, padding=padding, padding_val=padding_val)
+        size=(self.output_size, self.output_size)
+        if imgs.dtype == torch.float32:
+            resized_imgs = F.interpolate(padded_imgs, size=size, mode='bilinear', align_corners=True)
+        elif imgs.dtype == torch.uint8:
+            padded_imgs = padded_imgs.to(torch.float32)
+            resized_imgs = F.interpolate(padded_imgs, size=size, mode='bilinear', align_corners=True)
+            resized_imgs = torch.clip(resized_imgs, 0, 255)
+            resized_imgs = resized_imgs.to(torch.uint8)
+        else:
+            raise ValueError('imgs.dtype must be torch.float32 or torch.uint8')
+        return resized_imgs
+    def make_square_img_batched(self, imgs, padding_val):
+        assert imgs.ndim == 4
+        # squarify the image
+        h, w = imgs.shape[2:]
+        if h > w:
+            diff = (h - w)
+            pad_left = diff // 2
+            pad_right = diff - pad_left
+            imgs = F.pad(imgs, (pad_left, pad_right, 0, 0), value=padding_val)
+        elif w > h:
+            diff = (w - h)
+            pad_top = diff // 2
+            pad_bottom = diff - pad_top
+            imgs = F.pad(imgs, (0, 0, pad_top, pad_bottom), value=padding_val)
+        assert imgs.shape[2] == imgs.shape[3]
+        return imgs
+    def make_padded_img_batched(self, imgs, padding, padding_val):
+        if padding == 0:
+            return imgs
+        assert imgs.ndim == 4
+        # pad the image
+        h, w = imgs.shape[2:]
+        pad_h = int(h * padding)
+        pad_w = int(w * padding)
+        imgs = F.pad(imgs, (pad_w, pad_w, pad_h, pad_h), value=padding_val)
+        return imgs
+    def __call__(self, input, padding_ratio_override=None):
+        if input.ndim == 3:
+            assert input.shape[0] == 3
+            batch_input = input.unsqueeze(0)
+            return self.preprocess_batched(batch_input, padding_ratio_override=padding_ratio_override)[0]
+        elif input.ndim == 4:
+            assert input.shape[1] == 3
+            return self.preprocess_batched(input, padding_ratio_override=padding_ratio_override)
+        else:
+            raise ValueError(f'Invalid input shape: {input.shape}')

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/utils/box_utils.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import torch
+import numpy as np
+def point_form(boxes):
+    """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+    representation for comparison to point form ground truth data.
+    Args:
+        boxes: (tensor) center-size default boxes from priorbox layers.
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
+                     boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
+def center_size(boxes):
+    """ Convert prior_boxes to (cx, cy, w, h)
+    representation for comparison to center-size form ground truth data.
+    Args:
+        boxes: (tensor) point_form boxes
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
+                     boxes[:, 2:] - boxes[:, :2], 1)  # w, h
+def intersect(box_a, box_b):
+    """ We resize both tensors to [A,B,2] without new malloc:
+    [A,2] -> [A,1,2] -> [A,B,2]
+    [B,2] -> [1,B,2] -> [A,B,2]
+    Then we compute the area of intersect between box_a and box_b.
+    Args:
+      box_a: (tensor) bounding boxes, Shape: [A,4].
+      box_b: (tensor) bounding boxes, Shape: [B,4].
+    Return:
+      (tensor) intersection area, Shape: [A,B].
+    """
+    A = box_a.size(0)
+    B = box_b.size(0)
+    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+    inter = torch.clamp((max_xy - min_xy), min=0)
+    return inter[:, :, 0] * inter[:, :, 1]
+def jaccard(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.  Here we operate on
+    ground truth boxes and default boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+    Return:
+        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+    """
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2]-box_a[:, 0]) *
+              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, 2]-box_b[:, 0]) *
+              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+    return inter / union  # [A,B]
+def matrix_iou(a, b):
+    """
+    return iou of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+    return area_i / (area_a[:, np.newaxis] + area_b - area_i)
+def matrix_iof(a, b):
+    """
+    return iof of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    return area_i / np.maximum(area_a[:, np.newaxis], 1)
+def match(threshold, truths, priorbox, labels, landms, loc_t, conf_t, landm_t, idx):
+    """Match each prior box with the ground truth box of the highest jaccard
+    overlap, encode the bounding boxes, then return the matched indices
+    corresponding to both confidence and location preds.
+    Args:
+        threshold: (float) The overlap threshold used when mathing boxes.
+        truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
+        priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
+        variances: (tensor) Variances corresponding to each prior coord,
+            Shape: [num_priors, 4].
+        labels: (tensor) All the class labels for the image, Shape: [num_obj].
+        landms: (tensor) Ground truth landms, Shape [num_obj, 10].
+        loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
+        conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
+        landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
+        idx: (int) current batch index
+    Return:
+        The matched indices corresponding to 1)location 2)confidence 3)landm preds.
+    """
+    # jaccard index
+    overlaps = jaccard(
+        truths,
+        point_form(priorbox.priors)
+    )
+    # (Bipartite Matching)
+    # [1,num_objects] best prior for each ground truth
+    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
+    # ignore hard gt
+    valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
+    best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
+    if best_prior_idx_filter.shape[0] <= 0:
+        loc_t[idx] = 0
+        conf_t[idx] = 0
+        return
+    # [1,num_priors] best ground truth for each prior
+    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
+    best_truth_idx.squeeze_(0)
+    best_truth_overlap.squeeze_(0)
+    best_prior_idx.squeeze_(1)
+    best_prior_idx_filter.squeeze_(1)
+    best_prior_overlap.squeeze_(1)
+    best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
+    # TODO refactor: index  best_prior_idx with long tensor
+    # ensure every gt matches with its prior of max overlap
+    for j in range(best_prior_idx.size(0)):     # 判别此anchor是预测哪一个boxes
+        best_truth_idx[best_prior_idx[j]] = j
+    matches = truths[best_truth_idx]            # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
+    conf = labels[best_truth_idx]               # Shape: [num_priors]      此处为每一个anchor对应的label取出来
+    conf[best_truth_overlap < threshold] = 0    # label as background   overlap<0.35的全部作为负样本
+    loc = priorbox.encode(matches)
+    matches_landm = landms[best_truth_idx]
+    landm = priorbox.encode_landm(matches_landm)
+    loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
+    conf_t[idx] = conf  # [num_priors] top class label for each prior
+    landm_t[idx] = landm
+def log_sum_exp(x):
+    """Utility function for computing log_sum_exp while determining
+    This will be used to determine unaveraged confidence loss across
+    all examples in a batch.
+    Args:
+        x (Variable(tensor)): conf_preds from conf layers
+    """
+    x_max = x.data.max()
+    return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+    """Apply non-maximum suppression at test time to avoid detecting too many
+    overlapping bounding boxes for a given object.
+    Args:
+        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+        scores: (tensor) The class predscores for the img, Shape:[num_priors].
+        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+        top_k: (int) The Maximum number of box preds to consider.
+    Return:
+        The indices of the kept boxes with respect to num_priors.
+    """
+    keep = torch.Tensor(scores.size(0)).fill_(0).long()
+    if boxes.numel() == 0:
+        return keep
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    area = torch.mul(x2 - x1, y2 - y1)
+    v, idx = scores.sort(0)  # sort in ascending order
+    # I = I[v >= 0.01]
+    idx = idx[-top_k:]  # indices of the top-k largest vals
+    xx1 = boxes.new()
+    yy1 = boxes.new()
+    xx2 = boxes.new()
+    yy2 = boxes.new()
+    w = boxes.new()
+    h = boxes.new()
+    # keep = torch.Tensor()
+    count = 0
+    while idx.numel() > 0:
+        i = idx[-1]  # index of current largest val
+        # keep.append(i)
+        keep[count] = i
+        count += 1
+        if idx.size(0) == 1:
+            break
+        idx = idx[:-1]  # remove kept element from view
+        # load bboxes of next highest vals
+        torch.index_select(x1, 0, idx, out=xx1)
+        torch.index_select(y1, 0, idx, out=yy1)
+        torch.index_select(x2, 0, idx, out=xx2)
+        torch.index_select(y2, 0, idx, out=yy2)
+        # store element-wise max with next highest score
+        xx1 = torch.clamp(xx1, min=x1[i])
+        yy1 = torch.clamp(yy1, min=y1[i])
+        xx2 = torch.clamp(xx2, max=x2[i])
+        yy2 = torch.clamp(yy2, max=y2[i])
+        w.resize_as_(xx2)
+        h.resize_as_(yy2)
+        w = xx2 - xx1
+        h = yy2 - yy1
+        # check sizes of xx1 and xx2.. after each iteration
+        w = torch.clamp(w, min=0.0)
+        h = torch.clamp(h, min=0.0)
+        inter = w*h
+        # IoU = i / (area(a) + area(b) - i)
+        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
+        union = (rem_areas - inter) + area[i]
+        IoU = inter/union  # store result in iou
+        # keep only elements with an IoU <= overlap
+        idx = idx[IoU.le(overlap)]
+    return keep, count

cvlface/research/recognition/code/run_v1/aligners/differentiable_face_aligner/dfa/utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+def remove_prefix(state_dict, prefix):
+    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
+    print('remove prefix \'{}\''.format(prefix))
+    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
+    return {f(key): value for key, value in state_dict.items()}
+def check_keys(model, pretrained_state_dict):
+    ckpt_keys = set(pretrained_state_dict.keys())
+    model_keys = set(model.state_dict().keys())
+    used_pretrained_keys = model_keys & ckpt_keys
+    unused_pretrained_keys = ckpt_keys - model_keys
+    missing_keys = model_keys - ckpt_keys
+    print('Missing keys:{}'.format(len(missing_keys)))
+    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
+    print('Used keys:{}'.format(len(used_pretrained_keys)))
+    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
+    return True
+def load_model(model, pretrained_path, load_to_cpu):
+    print('Loading pretrained model from {}'.format(pretrained_path))
+    if load_to_cpu:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    else:
+        device = torch.cuda.current_device()
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    if "state_dict" in pretrained_dict.keys():
+        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
+    else:
+        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
+    check_keys(model, pretrained_dict)
+    model.load_state_dict(pretrained_dict, strict=False)
+    return model

cvlface/research/recognition/code/run_v1/aligners/none/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from ..base import BaseAligner
+class NoneAligner(BaseAligner):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+    @classmethod
+    def from_config(cls, aligner_config):
+        return cls(aligner_config)
+    def make_train_transform(self):
+        return lambda x:x
+    def make_test_transform(self):
+        return lambda x:x
+    def forward(self, x):
+        return x

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/__init__.py ADDED Viewed

	@@ -0,0 +1,246 @@

+from ..base import BaseAligner
+from torchvision import transforms
+from .retinaface import get_landmark_predictor, get_preprocessor
+from . import aligner_helper
+import torch
+import torch.nn.functional as F
+import numpy as np
+class RetinaFaceAligner(BaseAligner):
+    """
+    A non-differentiable face aligner that aligns the image with one face to a canonical position.
+    The aligner is based on the following paper:
+    ```
+    @inproceedings{deng2020retinaface,
+      title={Retinaface: Single-shot multi-level face localisation in the wild},
+      author={Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
+      booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
+      pages={5203--5212},
+      year={2020}
+    }
+    ```
+    """
+    def __init__(self, net, prior_box, preprocessor, config):
+        super(RetinaFaceAligner, self).__init__()
+        self.net = net
+        self.prior_box = prior_box
+        self.preprocessor = preprocessor
+        self.config = config
+    @classmethod
+    def from_config(cls, config):
+        net, prior_box = get_landmark_predictor(network=config.arch,
+                                                input_size=config.input_size)
+        preprocessor = get_preprocessor(output_size=config.input_size,
+                                        padding=config.input_padding_ratio,
+                                        padding_val=config.input_padding_val)
+        if config.freeze:
+            for param in net.parameters():
+                param.requires_grad = False
+        model = cls(net, prior_box, preprocessor, config)
+        model.eval()
+        return model
+    def forward(self, x, padding_ratio_override=None):
+        # input size check
+        assert x.shape[1] == 3
+        assert x.ndim == 4
+        assert isinstance(x, torch.Tensor)
+        is_square = x.shape[2] == x.shape[3]
+        x = self.preprocessor(x, padding_ratio_override=padding_ratio_override)
+        assert self.prior_box.image_size == x.shape[2:]
+        # make image into BGR
+        x_bgr = x.flip(1)
+        input_img = normalize_for_net(unnormalize(x_bgr))
+        result = self.net(input_img, self.prior_box)
+        batch_loc, batch_conf, batch_landms = result
+        batch_loc = torch.split(batch_loc, 1, dim=0)
+        batch_conf = torch.split(batch_conf, 1, dim=0)
+        batch_landms = torch.split(batch_landms, 1, dim=0)
+        nms_ldmks = []
+        nms_scores = []
+        nms_bbox = []
+        for loc, conf, landms, in zip(batch_loc, batch_conf, batch_landms):
+            dets = postprocess(self.prior_box, loc, conf, landms, confidence_threshold=0.0, nms_threshold=0.4)
+            bbox, score, ldmks = parse_one_det_result(dets)
+            ldmks = ldmks / np.array( [self.prior_box.image_size[0], self.prior_box.image_size[1]] * 5)
+            nms_ldmks.append(ldmks)
+            nms_scores.append(score)
+            nms_bbox.append(bbox)
+        orig_pred_ldmks = torch.from_numpy(np.array(nms_ldmks)).to(self.device).float()
+        score = torch.from_numpy(np.array(nms_scores)).to(self.device).float().unsqueeze(-1)
+        bbox = torch.from_numpy(np.array(nms_bbox)).to(self.device).float()
+        reference_ldmk = aligner_helper.reference_landmark()
+        input_size = self.config.input_size
+        output_size = self.config.output_size
+        cv2_tfms = aligner_helper.get_cv2_affine_from_landmark(orig_pred_ldmks, reference_ldmk, input_size, input_size)
+        thetas = aligner_helper.cv2_param_to_torch_theta(cv2_tfms, input_size, input_size, output_size, output_size)
+        thetas = thetas.to(orig_pred_ldmks.device)
+        output_size = torch.Size((len(thetas), 3, output_size, output_size))
+        grid = F.affine_grid(thetas, output_size, align_corners=True)
+        aligned_x = F.grid_sample(x + 1, grid, align_corners=True) - 1  # +1, -1 for making padding pixel 0
+        aligned_ldmks = aligner_helper.adjust_ldmks(orig_pred_ldmks.view(-1, 5, 2), thetas)
+        orig_pred_ldmks = orig_pred_ldmks.view(-1, 5, 2)
+        # bbox (xmin, ymin, xmax, ymax)
+        normalized_bbox = bbox / torch.tensor([[input_img.size(3), input_img.size(2)] * 2]).to(bbox.device)
+        if padding_ratio_override is None:
+            padding_ratio = self.preprocessor.padding
+        else:
+            padding_ratio = padding_ratio_override
+        if padding_ratio > 0:
+            # unpad the landmark so that it is in the original image coordinate
+            scale = 1 / (1 + (2 * padding_ratio))
+            pad_inv_theta = torch.from_numpy(np.array([[1 / scale, 0, 0], [0, 1 / scale, 0]]))
+            pad_inv_theta = pad_inv_theta.unsqueeze(0).float().to(self.device).repeat(orig_pred_ldmks.size(0), 1, 1)
+            unpad_ldmk_pred = torch.concat([orig_pred_ldmks.view(-1, 5, 2),
+                                            torch.ones((orig_pred_ldmks.size(0), 5, 1)).to(self.device)], dim=-1)
+            unpad_ldmk_pred = (((unpad_ldmk_pred) * 2 - 1) @ pad_inv_theta.mT) / 2 + 0.5
+            unpad_ldmk_pred = unpad_ldmk_pred.view(orig_pred_ldmks.size(0), -1).detach()
+            unpad_ldmk_pred = unpad_ldmk_pred.view(-1, 5, 2)
+            if not is_square:
+                unpad_ldmk_pred = None  # cannot use this if the input is not square becaouse preprocessor changes input
+                normalized_bbox = None  # cannot use this if the input is not square becaouse preprocessor changes input
+            return aligned_x, unpad_ldmk_pred, aligned_ldmks, score, thetas, normalized_bbox
+        if not is_square:
+            orig_pred_ldmks = None  # cannot use this if the input is not square becaouse preprocessor changes input
+            normalized_bbox = None  # cannot use this if the input is not square becaouse preprocessor changes input
+        return aligned_x, orig_pred_ldmks, aligned_ldmks, score, thetas, normalized_bbox
+    def make_train_transform(self):
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ])
+        return transform
+    def make_test_transform(self):
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ])
+        return transform
+def normalize(image):
+    image = image / 255.
+    image = (image - 0.5) / 0.5
+    return image
+def unnormalize(image):
+    image = image * 0.5 + 0.5
+    image = image * 255.
+    return image
+def normalize_for_net(bgr_image_0_255):
+    # bgr_image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+    return bgr_image_0_255 - torch.tensor([104, 117, 123])[None, :, None, None].to(bgr_image_0_255.device)
+def postprocess(priorbox, loc, conf, landms, confidence_threshold, nms_threshold):
+    device = loc.device
+    im_height, im_width = priorbox.image_size
+    scale = torch.Tensor([im_width, im_height, im_width, im_height])
+    scale = scale.to(device)
+    boxes = priorbox.decode(loc.data.squeeze(0))
+    boxes = boxes * scale
+    boxes = boxes.cpu().numpy()
+    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+    landms = priorbox.decode_landm(landms.data.squeeze(0))
+    scale1 = torch.Tensor([im_width, im_height, im_width, im_height,
+                           im_width, im_height, im_width, im_height,
+                           im_width, im_height])
+    scale1 = scale1.to(device)
+    landms = landms * scale1
+    landms = landms.cpu().numpy()
+    # ignore low scores
+    inds = np.where(scores > confidence_threshold)[0]
+    if len(inds) == 0:
+        inds = np.where(scores >= 0)[0]
+    boxes = boxes[inds]
+    landms = landms[inds]
+    scores = scores[inds]
+    # keep top-K before NMS
+    order = scores.argsort()[::-1]
+    # order = scores.argsort()[::-1][:args.top_k]
+    boxes = boxes[order]
+    landms = landms[order]
+    scores = scores[order]
+    # do NMS
+    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+    keep = py_cpu_nms(dets, nms_threshold)
+    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
+    dets = dets[keep, :]
+    landms = landms[keep]
+    # keep top-K faster NMS
+    # dets = dets[:args.keep_top_k, :]
+    # landms = landms[:args.keep_top_k, :]
+    dets = np.concatenate((dets, landms), axis=1)
+    return dets
+def py_cpu_nms(dets,
+               thresh):
+    """
+    Pure Python NMS baseline.
+    """
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+    return keep
+def parse_one_det_result(dets):
+    dets_sorted = dets[dets[:, 4].argsort()[::-1]]
+    result = dets_sorted[0]
+    bbox = result[:4]
+    score = result[4]
+    ldmks = result[5:]
+    return bbox, score, ldmks

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/aligner_helper.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import numpy as np
+import cv2
+from skimage import transform as trans
+import cv2
+def split_network_output(align_out):
+    anchor_bbox_pred, anchor_cls_pred, anchor_ldmk_pred, merged, _ = align_out
+    bbox, cls, ldmk = torch.split(merged, [4, 2, 10], dim=1)
+    return ldmk, bbox, cls
+def get_cv2_affine_from_landmark(ldmks, reference_ldmk, image_width, image_height, ):
+    assert ldmks.ndim == 2  # batchdim
+    assert ldmks.shape[1] == 10
+    assert isinstance(ldmks, torch.Tensor)
+    assert reference_ldmk.ndim == 2
+    assert reference_ldmk.shape[0] == 5
+    assert reference_ldmk.shape[1] == 2
+    assert isinstance(reference_ldmk, np.ndarray)
+    to_img_size = np.array([[[image_width, image_height]]])
+    ldmks = ldmks.view(ldmks.shape[0], 5, 2).detach().cpu().numpy()
+    ldmks = ldmks * to_img_size
+    transforms = []
+    for ldmk in ldmks:
+        tform = trans.SimilarityTransform()
+        tform.estimate(ldmk, reference_ldmk)
+        M = tform.params[0:2, :]
+        transforms.append(M)
+    transforms = np.stack(transforms, axis=0)
+    return transforms
+def cv2_param_to_torch_theta(cv2_tfms, image_width, image_height, output_width, output_height):
+    # https://github.com/wuneng/WarpAffine2GridSample
+    """4.Affine Transformation Matrix to theta"""
+    assert cv2_tfms.ndim == 3  # N, 2, 3
+    assert cv2_tfms.shape[1] == 2
+    assert cv2_tfms.shape[2] == 3
+    srcs = np.array([[0, 0], [0, 1], [1, 1]], dtype=np.float32)
+    srcs = np.expand_dims(srcs, axis=0).repeat(cv2_tfms.shape[0], axis=0)
+    dsts = np.matmul(srcs, cv2_tfms[:, :, :2].transpose(0, 2, 1)) + cv2_tfms[:, :, 2:3].transpose(0, 2, 1)
+    # normalize to [-1, 1]
+    srcs = srcs / np.array([[[image_width, image_height]]]) * 2 - 1
+    dsts = dsts / np.array([[[output_width, output_height]]]) * 2 - 1
+    thetas = []
+    for src, dst in zip(srcs, dsts):
+        theta = trans.estimate_transform("affine", src=dst, dst=src).params[:2]
+        thetas.append(theta)
+    thetas = np.stack(thetas, axis=0)
+    thetas = torch.from_numpy(thetas).float()
+    return thetas
+def adjust_ldmks(ldmks, thetas):
+    inv_thetas = inv_matrix(thetas).to(ldmks.device).float()
+    _ldmks = torch.cat([ldmks, torch.ones((ldmks.shape[0], 5, 1)).to(ldmks.device)], dim=2)
+    ldmk_aligned = (((_ldmks) * 2 - 1) @ inv_thetas.permute(0,2,1)) / 2 + 0.5
+    return ldmk_aligned
+def inv_matrix(theta):
+    # torch batched version
+    assert theta.ndim == 3
+    a, b, t1 = theta[:, 0,0], theta[:, 0,1], theta[:, 0,2]
+    c, d, t2 = theta[:, 1,0], theta[:, 1,1], theta[:, 1,2]
+    det = a * d - b * c
+    inv_det = 1.0 / det
+    inv_mat = torch.stack([
+        torch.stack([d * inv_det, -b * inv_det, (b * t2 - d * t1) * inv_det], dim=1),
+        torch.stack([-c * inv_det, a * inv_det, (c * t1 - a * t2) * inv_det], dim=1)
+    ], dim=1)
+    return inv_mat
+def reference_landmark():
+    return np.array([[38.29459953, 51.69630051],
+                     [73.53179932, 51.50139999],
+                     [56.02519989, 71.73660278],
+                     [41.54930115, 92.3655014],
+                     [70.72990036, 92.20410156]])
+def draw_ldmk(img, ldmk):
+    if ldmk is None:
+        return img
+    colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
+    img = img.copy()
+    for i in range(5):
+        color = colors[i]
+        cv2.circle(img, (int(ldmk[i*2] * img.shape[1]), int(ldmk[i*2+1] * img.shape[0])), 1, color, 4)
+    return img

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from .models.retinaface import RetinaFace
+from .utils.model_utils import load_model
+from .config import cfg_mnet, cfg_re50
+from .layers.functions.prior_box import PriorBox
+from .preprocessor import Preprocessor
+def get_landmark_predictor(network='mobile0.25', input_size=160):
+    cfg = None
+    if network == "mobile0.25":
+        cfg = cfg_mnet
+    elif network == "resnet50":
+        cfg = cfg_re50
+    net = RetinaFace(cfg=cfg, phase = 'test')
+    priorbox = PriorBox(image_size=(input_size, input_size),
+                        # min_sizes=[[64, 80], [96, 112], [128, 144]],
+                        min_sizes=[[16, 32], [64, 128], [256, 512]],
+                        steps=[8, 16, 32],
+                        clip=False,
+                        variances=[0.1, 0.2],)
+    # aligner = Aligner(net, priorbox, input_size, output_size=output_size)
+    # return aligner
+    return net, priorbox
+def get_preprocessor(output_size=160, padding=0.0, padding_val='zero'):
+    return Preprocessor(output_size=output_size, padding=padding, padding_val=padding_val)

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# config.py
+cfg_mnet = {
+    'name': 'mobilenet0.25',
+    'pretrain': True,
+    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+    'in_channel': 32,
+    'out_channel': 64
+}
+cfg_re50 = {
+    'name': 'Resnet50',
+    'pretrain': True,
+    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
+    'in_channel': 256,
+    'out_channel': 256
+}

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .functions import *
2	+ from .modules import *

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/functions/prior_box.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import torch
+from itertools import product as product
+from math import ceil
+class PriorBox(object):
+    def __init__(self,
+                 image_size,
+                 min_sizes=[[64, 80], [96, 112], [128, 144]],
+                 steps=[8,16,32],
+                 clip=False,
+                 variances=[0.1, 0.2],
+                 ):
+        super(PriorBox, self).__init__()
+        self.min_sizes = min_sizes
+        self.steps = steps
+        self.clip = clip
+        self.variances = variances
+        self.image_size = image_size
+        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
+        with torch.no_grad():
+            self.priors = self.forward()
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
+                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
+                    for cy, cx in product(dense_cy, dense_cx):
+                        anchors += [cx, cy, s_kx, s_ky]
+        # back to torch land
+        output = torch.Tensor(anchors).view(-1, 4)
+        # import pandas as pd
+        # pd.DataFrame(output.numpy()).to_csv('/mckim/temp/temp.csv')
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+    def encode(self, matched):
+        """Encode the variances from the priorbox layers into the ground truth boxes
+        we have matched (based on jaccard overlap) with the prior boxes.
+        """
+        self.priors = self.priors.to(matched.device)
+        # dist b/t match center and prior's center
+        g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - self.priors[:, :2]
+        # encode variance
+        g_cxcy /= (self.variances[0] * self.priors[:, 2:])
+        # match wh / prior wh
+        g_wh = (matched[:, 2:] - matched[:, :2]) / self.priors[:, 2:]
+        g_wh = torch.log(g_wh) / self.variances[1]
+        # return target for smooth_l1_loss
+        return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
+    def encode_landm(self, matched):
+        """Encode the variances from the priorbox layers into the ground truth boxes
+        we have matched (based on jaccard overlap) with the prior boxes.
+        """
+        self.priors = self.priors.to(matched.device)
+        # dist b/t match center and prior's center
+        matched = torch.reshape(matched, (matched.size(0), 5, 2))
+        priors_cx = self.priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_cy = self.priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_w = self.priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors_h = self.priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+        priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
+        g_cxcy = matched[:, :, :2] - priors[:, :, :2]
+        # encode variance
+        g_cxcy /= (self.variances[0] * priors[:, :, 2:])
+        # g_cxcy /= priors[:, :, 2:]
+        g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
+        # return target for smooth_l1_loss
+        return g_cxcy
+    # Adapted from https://github.com/Hakuyume/chainer-ssd
+    def decode(self, loc):
+        """Decode locations from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(loc.device)
+        boxes = torch.cat((
+            self.priors[:, :2] + loc[:, :2] * self.variances[0] * self.priors[:, 2:],
+            self.priors[:, 2:] * torch.exp(loc[:, 2:] * self.variances[1])), 1)
+        boxes[:, :2] -= boxes[:, 2:] / 2
+        boxes[:, 2:] += boxes[:, :2]
+        return boxes
+    def decode_landm(self, pre):
+        """Decode landm from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(pre.device)
+        landms = torch.cat((self.priors[:, :2] + pre[:, :2] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 2:4] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 4:6] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 6:8] * self.variances[0] * self.priors[:, 2:],
+                            self.priors[:, :2] + pre[:, 8:10] * self.variances[0] * self.priors[:, 2:],
+                            ), dim=1)
+        return landms
+    def decode_batch(self, loc):
+        """Decode locations from predictions using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        self.priors = self.priors.to(loc.device)
+        assert loc.ndim == 3
+        priors = self.priors.unsqueeze(0).expand(loc.size(0), -1, -1)
+        boxes = torch.cat((
+            priors[:, :, :2] + loc[:, :, :2] * self.variances[0] * priors[:, :, 2:],
+            priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * self.variances[1])), -1)
+        boxes[:, :, :2] -= boxes[:, :, 2:] / 2
+        boxes[:, :, 2:] += boxes[:, :, :2]
+        return boxes
+    def decode_landm_batch(self, prediction):
+        """Decode landm from prediction using priors to undo
+        the encoding we did for offset regression at train time.
+        """
+        assert prediction.ndim == 3
+        self.priors = self.priors.to(prediction.device)
+        priors = self.priors.unsqueeze(0).expand(prediction.size(0), -1, -1)
+        landms = torch.cat((priors[:, :, :2] + prediction[:, :, :2] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 2:4] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 4:6] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 6:8] * self.variances[0] * priors[:, :, 2:],
+                            priors[:, :, :2] + prediction[:, :, 8:10] * self.variances[0] * priors[:, :, 2:],
+                            ), dim=-1)
+        return landms

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .multibox_loss import MultiBoxLoss
2	+
3	+ __all__ = ['MultiBoxLoss']

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/layers/modules/multibox_loss.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ...utils.box_utils import match, log_sum_exp
+class MultiBoxLoss(nn.Module):
+    """SSD Weighted Loss Function
+    Compute Targets:
+        1) Produce Confidence Target Indices by matching  ground truth boxes
+           with (default) 'priorboxes' that have jaccard index > threshold parameter
+           (default threshold: 0.5).
+        2) Produce localization target by 'encoding' variance into offsets of ground
+           truth boxes and their matched  'priorboxes'.
+        3) Hard negative mining to filter the excessive number of negative examples
+           that comes with using a large number of default bounding boxes.
+           (default negative:positive ratio 3:1)
+    Objective Loss:
+        L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
+        Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
+        weighted by α which is set to 1 by cross val.
+        Args:
+            c: class confidences,
+            l: predicted boxes,
+            g: ground truth boxes
+            N: number of matched default boxes
+        See: https://arxiv.org/pdf/1512.02325.pdf for more details.
+    """
+    def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
+        super(MultiBoxLoss, self).__init__()
+        self.num_classes = num_classes
+        self.threshold = overlap_thresh
+        self.background_label = bkg_label
+        self.encode_target = encode_target
+        self.use_prior_for_matching = prior_for_matching
+        self.do_neg_mining = neg_mining
+        self.negpos_ratio = neg_pos
+        self.neg_overlap = neg_overlap
+    def forward(self, predictions, priorbox, targets):
+        """Multibox Loss
+        Args:
+            predictions (tuple): A tuple containing loc preds, conf preds,
+            and prior boxes from SSD net.
+                conf shape: torch.size(batch_size,num_priors,num_classes)
+                loc shape: torch.size(batch_size,num_priors,4)
+                priors shape: torch.size(num_priors,4)
+            ground_truth (tensor): Ground truth boxes and labels for a batch,
+                shape: [batch_size,num_objs,5] (last idx is the label).
+        """
+        loc_data, conf_data, landm_data, aggs, thetas = predictions
+        num = loc_data.size(0)
+        num_priors = (priorbox.priors.size(0))
+        if aggs is not None:
+            stacked_target = torch.stack(targets, dim=0).squeeze(1)
+            pos_idx = stacked_target[:, -1] > 0
+            agg_ldmk = aggs[:, 6:][pos_idx]
+            tgt_ldmk = stacked_target[:, 4:14][pos_idx]
+            agg_loss_landm = F.smooth_l1_loss(agg_ldmk, tgt_ldmk, reduction='sum') / len(tgt_ldmk)
+            pos_idx = stacked_target[:, -1] != 0
+            agg_bbox = aggs[:, :4][pos_idx]
+            tgt_bbox = stacked_target[:, :4][pos_idx]
+            agg_loss_box = F.smooth_l1_loss(agg_bbox, tgt_bbox, reduction='sum') / len(tgt_bbox)
+            agg_cls = aggs[:, 4:6]
+            tgt_cls = (stacked_target[:, -1] > 0).long()
+            agg_loss_cls = F.cross_entropy(agg_cls, tgt_cls, reduction='sum') / len(tgt_cls)
+            aux_loss_dict = {
+                'agg_loss_landm': agg_loss_landm,
+                'agg_loss_box': agg_loss_box,
+                'agg_loss_cls': agg_loss_cls
+            }
+        else:
+            aux_loss_dict = None
+        # match priors (default boxes) and ground truth boxes
+        loc_t = torch.Tensor(num, num_priors, 4)
+        landm_t = torch.Tensor(num, num_priors, 10)
+        conf_t = torch.LongTensor(num, num_priors)
+        for idx in range(num):
+            truths = targets[idx][:, :4].data
+            labels = targets[idx][:, -1].data
+            landms = targets[idx][:, 4:14].data
+            match(self.threshold, truths, priorbox, labels, landms, loc_t, conf_t, landm_t, idx)
+        loc_t = loc_t.cuda()
+        conf_t = conf_t.cuda()
+        landm_t = landm_t.cuda()
+        zeros = torch.tensor(0).cuda()
+        # landm Loss (Smooth L1)
+        # Shape: [batch,num_priors,10]
+        pos1 = conf_t > zeros
+        num_pos_landm = pos1.long().sum(1, keepdim=True)
+        N1 = max(num_pos_landm.data.sum().float(), 1)
+        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
+        landm_p = landm_data[pos_idx1].view(-1, 10)
+        landm_t = landm_t[pos_idx1].view(-1, 10)
+        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
+        pos = conf_t != zeros
+        conf_t[pos] = 1
+        # Localization Loss (Smooth L1)
+        # Shape: [batch,num_priors,4]
+        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
+        loc_p = loc_data[pos_idx].view(-1, 4)
+        loc_t = loc_t[pos_idx].view(-1, 4)
+        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
+        # Compute max conf across batch for hard negative mining
+        batch_conf = conf_data.view(-1, self.num_classes)
+        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
+        # Hard Negative Mining
+        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
+        loss_c = loss_c.view(num, -1)
+        _, loss_idx = loss_c.sort(1, descending=True)
+        _, idx_rank = loss_idx.sort(1)
+        num_pos = pos.long().sum(1, keepdim=True)
+        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
+        neg = idx_rank < num_neg.expand_as(idx_rank)
+        # Confidence Loss Including Positive and Negative Examples
+        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
+        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
+        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
+        targets_weighted = conf_t[(pos+neg).gt(0)]
+        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
+        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
+        N = max(num_pos.data.sum().float(), 1)
+        loss_l /= N
+        loss_c /= N
+        loss_landm /= N1
+        return loss_l, loss_c, loss_landm, aux_loss_dict

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/__init__.py ADDED Viewed

File without changes

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/net.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import time
+import torch
+import torch.nn as nn
+import torchvision.models._utils as _utils
+import torchvision.models as models
+import torch.nn.functional as F
+from torch.autograd import Variable
+def conv_bn(inp, oup, stride = 1, leaky = 0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+def conv_bn_no_relu(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+    )
+def conv_bn1X1(inp, oup, stride, leaky=0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+def conv_dw(inp, oup, stride, leaky=0.1):
+    return nn.Sequential(
+        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+        nn.BatchNorm2d(inp),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+    )
+class SSH(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(SSH, self).__init__()
+        assert out_channel % 4 == 0
+        leaky = 0
+        if (out_channel <= 64):
+            leaky = 0.1
+        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
+        self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
+        self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+        self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
+        self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+    def forward(self, input):
+        conv3X3 = self.conv3X3(input)
+        conv5X5_1 = self.conv5X5_1(input)
+        conv5X5 = self.conv5X5_2(conv5X5_1)
+        conv7X7_2 = self.conv7X7_2(conv5X5_1)
+        conv7X7 = self.conv7x7_3(conv7X7_2)
+        out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+        out = F.relu(out)
+        return out
+class FPN(nn.Module):
+    def __init__(self,in_channels_list,out_channels):
+        super(FPN,self).__init__()
+        leaky = 0
+        if (out_channels <= 64):
+            leaky = 0.1
+        self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
+        self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
+        self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
+        self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
+        self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
+    def forward(self, input):
+        # names = list(input.keys())
+        input = list(input.values())
+        output1 = self.output1(input[0])
+        output2 = self.output2(input[1])
+        output3 = self.output3(input[2])
+        up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
+        output2 = output2 + up3
+        output2 = self.merge2(output2)
+        up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
+        output1 = output1 + up2
+        output1 = self.merge1(output1)
+        out = [output1, output2, output3]
+        return out
+class MobileNetV1(nn.Module):
+    def __init__(self):
+        super(MobileNetV1, self).__init__()
+        self.stage1 = nn.Sequential(
+            conv_bn(3, 8, 2, leaky = 0.1),    # 3
+            conv_dw(8, 16, 1),   # 7
+            conv_dw(16, 32, 2),  # 11
+            conv_dw(32, 32, 1),  # 19
+            conv_dw(32, 64, 2),  # 27
+            conv_dw(64, 64, 1),  # 43
+        )
+        self.stage2 = nn.Sequential(
+            conv_dw(64, 128, 2),  # 43 + 16 = 59
+            conv_dw(128, 128, 1), # 59 + 32 = 91
+            conv_dw(128, 128, 1), # 91 + 32 = 123
+            conv_dw(128, 128, 1), # 123 + 32 = 155
+            conv_dw(128, 128, 1), # 155 + 32 = 187
+            conv_dw(128, 128, 1), # 187 + 32 = 219
+        )
+        self.stage3 = nn.Sequential(
+            conv_dw(128, 256, 2), # 219 +3 2 = 241
+            conv_dw(256, 256, 1), # 241 + 64 = 301
+        )
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        return x

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/models/retinaface.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import torch
+import torch.nn as nn
+import torchvision.models._utils as _utils
+import torch.nn.functional as F
+from .net import MobileNetV1 as MobileNetV1
+from .net import FPN as FPN
+from .net import SSH as SSH
+from timm.models import mlp_mixer
+class ClassHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(ClassHead,self).__init__()
+        self.num_anchors = num_anchors
+        self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 2)
+class BboxHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(BboxHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 4)
+class LandmarkHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(LandmarkHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        return out.view(out.shape[0], -1, 10)
+class RetinaFace(nn.Module):
+    def __init__(self, cfg = None, phase = 'train'):
+        """
+        :param cfg:  Network related settings.
+        :param phase: train or test.
+        """
+        super(RetinaFace,self).__init__()
+        self.phase = phase
+        backbone = None
+        if cfg['name'] == 'mobilenet0.25':
+            backbone = MobileNetV1()
+            # if cfg['pretrain']:
+                # checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
+                # from collections import OrderedDict
+                # new_state_dict = OrderedDict()
+                # for k, v in checkpoint['state_dict'].items():
+                #     name = k[7:]  # remove module.
+                #     new_state_dict[name] = v
+                # load params
+                # backbone.load_state_dict(new_state_dict)
+        elif cfg['name'] == 'Resnet50':
+            import torchvision.models as models
+            backbone = models.resnet50(pretrained=cfg['pretrain'])
+        self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
+        in_channels_stage2 = cfg['in_channel']
+        in_channels_list = [
+            in_channels_stage2 * 2,
+            in_channels_stage2 * 4,
+            in_channels_stage2 * 8,
+        ]
+        out_channels = cfg['out_channel']
+        self.fpn = FPN(in_channels_list,out_channels)
+        self.ssh1 = SSH(out_channels, out_channels)
+        self.ssh2 = SSH(out_channels, out_channels)
+        self.ssh3 = SSH(out_channels, out_channels)
+        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
+    def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        classhead = nn.ModuleList()
+        for i in range(fpn_num):
+            classhead.append(ClassHead(inchannels,anchor_num))
+        return classhead
+    def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        bboxhead = nn.ModuleList()
+        for i in range(fpn_num):
+            bboxhead.append(BboxHead(inchannels,anchor_num))
+        return bboxhead
+    def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+        landmarkhead = nn.ModuleList()
+        for i in range(fpn_num):
+            landmarkhead.append(LandmarkHead(inchannels,anchor_num))
+        return landmarkhead
+    def forward(self, inputs, priorbox=None):
+        out = self.body(inputs)
+        # FPN
+        fpn = self.fpn(out)
+        # SSH
+        feature1 = self.ssh1(fpn[0])
+        feature2 = self.ssh2(fpn[1])
+        feature3 = self.ssh3(fpn[2])
+        features = [feature1, feature2, feature3]
+        bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+        classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
+        ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
+        if self.phase == 'train':
+            output = (bbox_regressions, classifications, ldm_regressions)
+        else:
+            output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
+        return output

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/preprocessor.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn.functional as F
+class Preprocessor():
+    def __init__(self, output_size=160, padding=0.0, padding_val='zero'):
+        self.output_size = output_size
+        self.padding = padding
+        self.padding_val = padding_val
+    def preprocess_batched(self, imgs, padding_ratio_override=None):
+        # check img is of float
+        if imgs.dtype == torch.float32:
+            if self.padding_val == 'zero':
+                padding_val = -1.0
+            elif self.padding_val == 'mean':
+                padding_val = imgs.mean()
+            else:
+                raise ValueError('padding_val must be "zero" or "mean"')
+        elif imgs.dtype == torch.uint8:
+            if self.padding_val == 'zero':
+                padding_val = 0
+            elif self.padding_val == 'mean':
+                padding_val = imgs.mean()
+            else:
+                raise ValueError('padding_val must be "zero" or "mean"')
+        else:
+            raise ValueError('imgs.dtype must be torch.float32 or torch.uint8')
+        square_imgs = self.make_square_img_batched(imgs, padding_val=padding_val)
+        if padding_ratio_override is not None:
+            padding = padding_ratio_override
+        else:
+            padding = self.padding
+        padded_imgs = self.make_padded_img_batched(square_imgs, padding=padding, padding_val=padding_val)
+        size=(self.output_size, self.output_size)
+        if imgs.dtype == torch.float32:
+            resized_imgs = F.interpolate(padded_imgs, size=size, mode='bilinear', align_corners=True)
+        elif imgs.dtype == torch.uint8:
+            padded_imgs = padded_imgs.to(torch.float32)
+            resized_imgs = F.interpolate(padded_imgs, size=size, mode='bilinear', align_corners=True)
+            resized_imgs = torch.clip(resized_imgs, 0, 255)
+            resized_imgs = resized_imgs.to(torch.uint8)
+        else:
+            raise ValueError('imgs.dtype must be torch.float32 or torch.uint8')
+        return resized_imgs
+    def make_square_img_batched(self, imgs, padding_val):
+        assert imgs.ndim == 4
+        # squarify the image
+        h, w = imgs.shape[2:]
+        if h > w:
+            diff = (h - w)
+            pad_left = diff // 2
+            pad_right = diff - pad_left
+            imgs = F.pad(imgs, (pad_left, pad_right, 0, 0), value=padding_val)
+        elif w > h:
+            diff = (w - h)
+            pad_top = diff // 2
+            pad_bottom = diff - pad_top
+            imgs = F.pad(imgs, (0, 0, pad_top, pad_bottom), value=padding_val)
+        assert imgs.shape[2] == imgs.shape[3]
+        return imgs
+    def make_padded_img_batched(self, imgs, padding, padding_val):
+        if padding == 0:
+            return imgs
+        assert imgs.ndim == 4
+        # pad the image
+        h, w = imgs.shape[2:]
+        pad_h = int(h * padding)
+        pad_w = int(w * padding)
+        imgs = F.pad(imgs, (pad_w, pad_w, pad_h, pad_h), value=padding_val)
+        return imgs
+    def __call__(self, input, padding_ratio_override=None):
+        if input.ndim == 3:
+            assert input.shape[0] == 3
+            batch_input = input.unsqueeze(0)
+            return self.preprocess_batched(batch_input, padding_ratio_override=padding_ratio_override)[0]
+        elif input.ndim == 4:
+            assert input.shape[1] == 3
+            return self.preprocess_batched(input, padding_ratio_override=padding_ratio_override)
+        else:
+            raise ValueError(f'Invalid input shape: {input.shape}')

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/utils/box_utils.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import torch
+import numpy as np
+def point_form(boxes):
+    """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+    representation for comparison to point form ground truth data.
+    Args:
+        boxes: (tensor) center-size default boxes from priorbox layers.
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
+                     boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
+def center_size(boxes):
+    """ Convert prior_boxes to (cx, cy, w, h)
+    representation for comparison to center-size form ground truth data.
+    Args:
+        boxes: (tensor) point_form boxes
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
+                     boxes[:, 2:] - boxes[:, :2], 1)  # w, h
+def intersect(box_a, box_b):
+    """ We resize both tensors to [A,B,2] without new malloc:
+    [A,2] -> [A,1,2] -> [A,B,2]
+    [B,2] -> [1,B,2] -> [A,B,2]
+    Then we compute the area of intersect between box_a and box_b.
+    Args:
+      box_a: (tensor) bounding boxes, Shape: [A,4].
+      box_b: (tensor) bounding boxes, Shape: [B,4].
+    Return:
+      (tensor) intersection area, Shape: [A,B].
+    """
+    A = box_a.size(0)
+    B = box_b.size(0)
+    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+    inter = torch.clamp((max_xy - min_xy), min=0)
+    return inter[:, :, 0] * inter[:, :, 1]
+def jaccard(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.  Here we operate on
+    ground truth boxes and default boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+    Return:
+        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+    """
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2]-box_a[:, 0]) *
+              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, 2]-box_b[:, 0]) *
+              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+    return inter / union  # [A,B]
+def matrix_iou(a, b):
+    """
+    return iou of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+    return area_i / (area_a[:, np.newaxis] + area_b - area_i)
+def matrix_iof(a, b):
+    """
+    return iof of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    return area_i / np.maximum(area_a[:, np.newaxis], 1)
+def match(threshold, truths, priorbox, labels, landms, loc_t, conf_t, landm_t, idx):
+    """Match each prior box with the ground truth box of the highest jaccard
+    overlap, encode the bounding boxes, then return the matched indices
+    corresponding to both confidence and location preds.
+    Args:
+        threshold: (float) The overlap threshold used when mathing boxes.
+        truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
+        priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
+        variances: (tensor) Variances corresponding to each prior coord,
+            Shape: [num_priors, 4].
+        labels: (tensor) All the class labels for the image, Shape: [num_obj].
+        landms: (tensor) Ground truth landms, Shape [num_obj, 10].
+        loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
+        conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
+        landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
+        idx: (int) current batch index
+    Return:
+        The matched indices corresponding to 1)location 2)confidence 3)landm preds.
+    """
+    # jaccard index
+    overlaps = jaccard(
+        truths,
+        point_form(priorbox.priors)
+    )
+    # (Bipartite Matching)
+    # [1,num_objects] best prior for each ground truth
+    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
+    # ignore hard gt
+    valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
+    best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
+    if best_prior_idx_filter.shape[0] <= 0:
+        loc_t[idx] = 0
+        conf_t[idx] = 0
+        return
+    # [1,num_priors] best ground truth for each prior
+    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
+    best_truth_idx.squeeze_(0)
+    best_truth_overlap.squeeze_(0)
+    best_prior_idx.squeeze_(1)
+    best_prior_idx_filter.squeeze_(1)
+    best_prior_overlap.squeeze_(1)
+    best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
+    # TODO refactor: index  best_prior_idx with long tensor
+    # ensure every gt matches with its prior of max overlap
+    for j in range(best_prior_idx.size(0)):     # 判别此anchor是预测哪一个boxes
+        best_truth_idx[best_prior_idx[j]] = j
+    matches = truths[best_truth_idx]            # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
+    conf = labels[best_truth_idx]               # Shape: [num_priors]      此处为每一个anchor对应的label取出来
+    conf[best_truth_overlap < threshold] = 0    # label as background   overlap<0.35的全部作为负样本
+    loc = priorbox.encode(matches)
+    matches_landm = landms[best_truth_idx]
+    landm = priorbox.encode_landm(matches_landm)
+    loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
+    conf_t[idx] = conf  # [num_priors] top class label for each prior
+    landm_t[idx] = landm
+def log_sum_exp(x):
+    """Utility function for computing log_sum_exp while determining
+    This will be used to determine unaveraged confidence loss across
+    all examples in a batch.
+    Args:
+        x (Variable(tensor)): conf_preds from conf layers
+    """
+    x_max = x.data.max()
+    return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+    """Apply non-maximum suppression at test time to avoid detecting too many
+    overlapping bounding boxes for a given object.
+    Args:
+        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+        scores: (tensor) The class predscores for the img, Shape:[num_priors].
+        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+        top_k: (int) The Maximum number of box preds to consider.
+    Return:
+        The indices of the kept boxes with respect to num_priors.
+    """
+    keep = torch.Tensor(scores.size(0)).fill_(0).long()
+    if boxes.numel() == 0:
+        return keep
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    area = torch.mul(x2 - x1, y2 - y1)
+    v, idx = scores.sort(0)  # sort in ascending order
+    # I = I[v >= 0.01]
+    idx = idx[-top_k:]  # indices of the top-k largest vals
+    xx1 = boxes.new()
+    yy1 = boxes.new()
+    xx2 = boxes.new()
+    yy2 = boxes.new()
+    w = boxes.new()
+    h = boxes.new()
+    # keep = torch.Tensor()
+    count = 0
+    while idx.numel() > 0:
+        i = idx[-1]  # index of current largest val
+        # keep.append(i)
+        keep[count] = i
+        count += 1
+        if idx.size(0) == 1:
+            break
+        idx = idx[:-1]  # remove kept element from view
+        # load bboxes of next highest vals
+        torch.index_select(x1, 0, idx, out=xx1)
+        torch.index_select(y1, 0, idx, out=yy1)
+        torch.index_select(x2, 0, idx, out=xx2)
+        torch.index_select(y2, 0, idx, out=yy2)
+        # store element-wise max with next highest score
+        xx1 = torch.clamp(xx1, min=x1[i])
+        yy1 = torch.clamp(yy1, min=y1[i])
+        xx2 = torch.clamp(xx2, max=x2[i])
+        yy2 = torch.clamp(yy2, max=y2[i])
+        w.resize_as_(xx2)
+        h.resize_as_(yy2)
+        w = xx2 - xx1
+        h = yy2 - yy1
+        # check sizes of xx1 and xx2.. after each iteration
+        w = torch.clamp(w, min=0.0)
+        h = torch.clamp(h, min=0.0)
+        inter = w*h
+        # IoU = i / (area(a) + area(b) - i)
+        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
+        union = (rem_areas - inter) + area[i]
+        IoU = inter/union  # store result in iou
+        # keep only elements with an IoU <= overlap
+        idx = idx[IoU.le(overlap)]
+    return keep, count

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface/utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+def remove_prefix(state_dict, prefix):
+    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
+    print('remove prefix \'{}\''.format(prefix))
+    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
+    return {f(key): value for key, value in state_dict.items()}
+def check_keys(model, pretrained_state_dict):
+    ckpt_keys = set(pretrained_state_dict.keys())
+    model_keys = set(model.state_dict().keys())
+    used_pretrained_keys = model_keys & ckpt_keys
+    unused_pretrained_keys = ckpt_keys - model_keys
+    missing_keys = model_keys - ckpt_keys
+    print('Missing keys:{}'.format(len(missing_keys)))
+    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
+    print('Used keys:{}'.format(len(used_pretrained_keys)))
+    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
+    return True
+def load_model(model, pretrained_path, load_to_cpu):
+    print('Loading pretrained model from {}'.format(pretrained_path))
+    if load_to_cpu:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    else:
+        device = torch.cuda.current_device()
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    if "state_dict" in pretrained_dict.keys():
+        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
+    else:
+        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
+    check_keys(model, pretrained_dict)
+    model.load_state_dict(pretrained_dict, strict=False)
+    return model

cvlface/research/recognition/code/run_v1/aligners/retinaface_aligner/retinaface_pipeline.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import torch
+import numpy as np
+import cv2
+from .retinaface.utils.model_utils import load_model
+from .retinaface.layers.functions.prior_box import PriorBox
+from .retinaface.models.retinaface import RetinaFace
+import torch.nn.functional as F
+cfg_mnet = {
+    'name': 'mobilenet0.25',
+    'gpu_train': True,
+    'ngpu': 1,
+    'epoch': 250,
+    'decay1': 190,
+    'decay2': 220,
+    # 'image_size': 640,
+    'pretrain': True,
+    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+    'in_channel': 32,
+    'out_channel': 64
+}
+cfg_re50 = {
+    'name': 'Resnet50',
+    'gpu_train': True,
+    'ngpu': 4,
+    'epoch': 100,
+    'decay1': 70,
+    'decay2': 90,
+    # 'image_size': 840,
+    'pretrain': True,
+    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
+    'in_channel': 256,
+    'out_channel': 256
+}
+def load_retinface_model(network='resnet50', trained_model_path=''):
+    cfg = None
+    if network == "mobile0.25":
+        cfg = cfg_mnet
+    elif network == "resnet50":
+        cfg = cfg_re50
+    # net and model
+    net = RetinaFace(cfg=cfg, phase = 'test')
+    net = load_model(net, trained_model_path, True)
+    net.eval()
+    # freeze grad
+    for param in net.parameters():
+        param.requires_grad = False
+    return net
+class RetinaFacePipeline(torch.nn.Module):
+    def __init__(self, net, priorbox, input_size, device='cuda'):
+        super().__init__()
+        self.net = net
+        self.priorbox = priorbox
+        self.input_size = input_size
+        self.output_size = 112
+        self.device = device
+    def normalize(self, image):
+        image = image / 255.
+        image = (image - 0.5) / 0.5
+        return image
+    def unnormalize(self, image):
+        image = image * 0.5 + 0.5
+        image = image * 255.
+        return image
+    def normalize_for_net(self, bgr_image_0_255):
+        # bgr_image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        return bgr_image_0_255 - torch.tensor([104, 117, 123])[None, :, None, None].to(self.device)
+    def prealign_preprocess(self, images, value=0.0):
+        # pad to input_size
+        assert isinstance(images, torch.Tensor)
+        assert images.ndim == 4 or images.ndim == 3
+        input_size = self.input_size
+        data_width = images.shape[-1]
+        data_height = images.shape[-2]
+        if data_width > input_size or data_height > input_size:
+            # image is biggert than the input size
+            # resize such that the larger side becomes the input_size without changing the aspect ratio
+            if data_width > data_height:
+                scale = input_size / data_width
+            else:
+                scale = input_size / data_height
+            if images.ndim == 4:
+                images = F.interpolate(input=images, scale_factor=scale,
+                                        mode='bilinear', align_corners=False)
+            else:
+                images = F.interpolate(input=images.unsqueeze(0), scale_factor=scale,
+                                        mode='bilinear', align_corners=False).squeeze(0)
+        data_width = images.shape[-1]
+        data_height = images.shape[-2]
+        padding_width1 = (input_size - data_width) // 2
+        padding_width2 = (input_size - data_width) - padding_width1
+        padding_height1 = (input_size - data_height) // 2
+        padding_height2 = (input_size - data_height) - padding_height1
+        result = torch.nn.functional.pad(input=images,
+                                         pad=(padding_width1, padding_width2,
+                                              padding_height1, padding_height2),
+                                         value=value)
+        assert result.shape[-1] == input_size
+        assert result.shape[-2] == input_size
+        return result
+    def forward(self, rgb_images):
+        # cv2.imwrite('/mckim/temp/temp.jpg', self.unnormalize(rgb_images[0]).cpu().numpy().transpose(1,2,0))
+        assert rgb_images.shape[1] == 3
+        assert rgb_images.ndim == 4
+        assert isinstance(rgb_images, torch.Tensor)
+        assert self.priorbox.image_size == rgb_images.shape[2:]
+        rgb_images = rgb_images.to(self.device)
+        # make image into BGR
+        bgr_images = rgb_images.flip(1)
+        input_img = self.normalize_for_net(self.unnormalize(bgr_images))
+        batch_loc, batch_conf, batch_landms = self.net(input_img)
+        batch_loc = torch.split(batch_loc, 1, dim=0)
+        batch_conf = torch.split(batch_conf, 1, dim=0)
+        batch_landms = torch.split(batch_landms, 1, dim=0)
+        all_ldmks = []
+        for loc, conf, landms, in zip(batch_loc, batch_conf, batch_landms):
+            dets = postprocess(self.priorbox, loc, conf, landms, confidence_threshold=0.0, nms_threshold=0.4)
+            bbox, score, ldmks = parse_one_det_result(dets)
+            ldmks = ldmks / np.array( [self.priorbox.image_size[0], self.priorbox.image_size[1]] * 5)
+            all_ldmks.append(ldmks)
+        all_ldmks = torch.from_numpy(np.array(all_ldmks)).to(self.device).float()
+        return all_ldmks
+def postprocess(priorbox, loc, conf, landms, confidence_threshold, nms_threshold):
+    device = loc.device
+    im_height, im_width = priorbox.image_size
+    scale = torch.Tensor([im_width, im_height, im_width, im_height])
+    scale = scale.to(device)
+    boxes = priorbox.decode(loc.data.squeeze(0))
+    boxes = boxes * scale
+    boxes = boxes.cpu().numpy()
+    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+    landms = priorbox.decode_landm(landms.data.squeeze(0))
+    scale1 = torch.Tensor([im_width, im_height, im_width, im_height,
+                           im_width, im_height, im_width, im_height,
+                           im_width, im_height])
+    scale1 = scale1.to(device)
+    landms = landms * scale1
+    landms = landms.cpu().numpy()
+    # ignore low scores
+    inds = np.where(scores > confidence_threshold)[0]
+    if len(inds) == 0:
+        inds = np.where(scores >= 0)[0]
+    boxes = boxes[inds]
+    landms = landms[inds]
+    scores = scores[inds]
+    # keep top-K before NMS
+    order = scores.argsort()[::-1]
+    # order = scores.argsort()[::-1][:args.top_k]
+    boxes = boxes[order]
+    landms = landms[order]
+    scores = scores[order]
+    # do NMS
+    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+    keep = py_cpu_nms(dets, nms_threshold)
+    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
+    dets = dets[keep, :]
+    landms = landms[keep]
+    # keep top-K faster NMS
+    # dets = dets[:args.keep_top_k, :]
+    # landms = landms[:args.keep_top_k, :]
+    dets = np.concatenate((dets, landms), axis=1)
+    return dets
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+    return keep
+def parse_one_det_result(dets):
+    dets_sorted = dets[dets[:, 4].argsort()[::-1]]
+    result = dets_sorted[0]
+    bbox = result[:4]
+    score = result[4]
+    ldmks = result[5:]
+    return bbox, score, ldmks
+def load_retinaface_pipeline(network, trained_model_path, input_size, device):
+    net = load_retinface_model(network='resnet50', trained_model_path=trained_model_path)
+    net = net.to(device)
+    priorbox = PriorBox(image_size=(input_size, input_size),
+                        min_sizes=[[16, 32], [64, 128], [256, 512]],
+                        steps=[8,16,32], clip=False,
+                        variances=[0.1, 0.2],
+                        device=device)
+    pipeline = RetinaFacePipeline(net, priorbox, input_size, device=device)
+    pipeline.cuda()
+    return pipeline

cvlface/research/recognition/code/run_v1/base.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+defaults:
+  - trainers : configs/default
+  - optims : configs/cosine
+  - pefts: configs/none
+  - models: vit/configs/v1_small
+  - classifiers: configs/partial_fc
+  - aligners: configs/none
+  - dataset: configs/casia
+  - data_augs: configs/v7
+  - losses: configs/adaface
+  - pipelines: configs/train_model_cls
+  - evaluations: configs/base

cvlface/research/recognition/code/run_v1/classifiers/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from . import partial_fc
+from . import fc
+def get_classifier(classifier_cfg, margin_loss_fn, model_cfg, num_classes, rank, world_size):
+    if margin_loss_fn is None:
+        classifier = None
+        print("No margin loss function provided, classifier will not be created")
+        return classifier
+    if classifier_cfg.name == 'partial_fc':
+        classifier = partial_fc.PartialFCClassifier.from_config(classifier_cfg, margin_loss_fn,
+                                                                model_cfg, num_classes,
+                                                                rank, world_size)
+    elif classifier_cfg.name == 'fc':
+        classifier = fc.FCClassifier.from_config(classifier_cfg, margin_loss_fn,
+                                                 model_cfg, num_classes,
+                                                 rank, world_size)
+    else:
+        raise ValueError(f"Unknown classifier: {classifier_cfg.name}")
+    if classifier_cfg.start_from:
+        classifier.load_state_dict_from_path(classifier_cfg.start_from)
+    if classifier_cfg.freeze:
+        for param in classifier.parameters():
+            param.requires_grad = False
+    return classifier

cvlface/research/recognition/code/run_v1/classifiers/base/__init__.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+from typing import Union
+import torch
+from torch import device
+from .utils import get_parameter_device, get_parameter_dtype, save_state_dict_and_config, load_state_dict_from_path
+from general_utils.os_utils import natural_sort
+class BaseClassifier(torch.nn.Module):
+    def __init__(self, config=None):
+        super(BaseClassifier, self).__init__()
+        self.config = config
+    @classmethod
+    def from_config(cls, classifier_cfg, margin_loss_fn, model_cfg, dataset_cfg, rank, world_size) -> "BaseClassifier":
+        raise NotImplementedError('from_config must be implemented in subclass')
+    def forward(self, local_embeddings, local_labels):
+        raise NotImplementedError('from_config must be implemented in subclass')
+    @property
+    def device(self) -> device:
+        return get_parameter_device(self)
+    @property
+    def dtype(self) -> torch.dtype:
+        return get_parameter_dtype(self)
+    def num_parameters(self, only_trainable: bool = False) -> int:
+        return sum(p.numel() for p in self.parameters() if p.requires_grad or not only_trainable)
+    def has_trainable_params(self):
+        for param in self.parameters():
+            if param.requires_grad:
+                return True
+        return False
+    def save_pretrained(
+        self,
+        save_dir: Union[str, os.PathLike],
+        name: str = 'model.pt',
+        rank: int = 0,
+    ):
+        rank_added_name = os.path.splitext(name)[0] + f'_rank{rank}' + os.path.splitext(name)[1]
+        save_path = os.path.join(save_dir, rank_added_name)
+        save_state_dict_and_config(self.state_dict(), self.config, save_path)
+    def load_state_dict_from_path(self, pretrained_model_path):
+        save_dir = os.path.dirname(pretrained_model_path)
+        save_name = os.path.basename(pretrained_model_path)
+        rank_added_name = os.path.splitext(save_name)[0] + f'_rank{self.rank}' + os.path.splitext(save_name)[1]
+        pretrained_model_path = os.path.join(save_dir, rank_added_name)
+        all_partitions = [name for name in os.listdir(save_dir) if '_rank' in name and '.pt' in name]
+        all_partitions = natural_sort(all_partitions)
+        ckpt_worldsize = len(all_partitions)
+        if self.world_size != ckpt_worldsize:
+            # we need to redistribute the partialfc weights
+            part_ckpts = [torch.load(os.path.join(save_dir, name), map_location='cpu') for name in all_partitions]
+            total_ckpt_num_subjects = sum([ckpt['partial_fc.weight'].shape[0] for ckpt in part_ckpts])
+            assert total_ckpt_num_subjects - self.partial_fc.num_classes < 10, \
+                (f"total_ckpt_num_subjects: {total_ckpt_num_subjects}, "
+                 f"self.partial_fc.num_classes: {self.partial_fc.num_classes}"
+                 f"The number can be slightly different due to the last partition.")
+            combined_weight = torch.cat([ckpt['partial_fc.weight'] for ckpt in part_ckpts], dim=0)
+            state_dict = part_ckpts[0]
+            class_start = self.partial_fc.class_start
+            num_sample = self.partial_fc.num_local
+            sub_center = combined_weight[class_start:class_start + num_sample, :]
+            if sub_center.shape[0] != num_sample:
+                # append zero
+                extra_center = torch.zeros(num_sample - sub_center.shape[0], sub_center.shape[1],
+                                           device=self.device, dtype=self.dtype)
+                sub_center = torch.cat([sub_center, extra_center], dim=0)
+            state_dict['partial_fc.weight'] = sub_center
+        else:
+            state_dict = load_state_dict_from_path(pretrained_model_path)
+        result = self.load_state_dict(state_dict, strict=False)
+        print(result)

cvlface/research/recognition/code/run_v1/classifiers/base/utils.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import itertools
+from typing import List, Optional, Tuple, Union
+import safetensors
+import torch
+from torch import Tensor
+import os
+from pathlib import Path
+from omegaconf import DictConfig, OmegaConf
+def get_parameter_device(parameter: torch.nn.Module):
+    try:
+        parameters_and_buffers = itertools.chain(parameter.parameters(), parameter.buffers())
+        return next(parameters_and_buffers).device
+    except StopIteration:
+        # For torch.nn.DataParallel compatibility in PyTorch 1.5
+        def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
+            tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+            return tuples
+        gen = parameter._named_members(get_members_fn=find_tensor_attributes)
+        first_tuple = next(gen)
+        return first_tuple[1].device
+def get_parameter_dtype(parameter: torch.nn.Module):
+    try:
+        params = tuple(parameter.parameters())
+        if len(params) > 0:
+            return params[0].dtype
+        buffers = tuple(parameter.buffers())
+        if len(buffers) > 0:
+            return buffers[0].dtype
+    except StopIteration:
+        # For torch.nn.DataParallel compatibility in PyTorch 1.5
+        def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
+            tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+            return tuples
+        gen = parameter._named_members(get_members_fn=find_tensor_attributes)
+        first_tuple = next(gen)
+        return first_tuple[1].dtype
+def get_parent_directory(save_path: Union[str, os.PathLike]) -> Path:
+    path_obj = Path(save_path)
+    return path_obj.parent
+def get_base_name(save_path: Union[str, os.PathLike]) -> str:
+    path_obj = Path(save_path)
+    return path_obj.name
+def load_state_dict_from_path(path: Union[str, os.PathLike]):
+    # Load a state dict from a path.
+    if 'safetensors' in path:
+        state_dict = safetensors.torch.load_file(path)
+    else:
+        state_dict = torch.load(path, map_location="cpu")
+    return state_dict
+def replace_extension(path, new_extension):
+    if not new_extension.startswith('.'):
+        new_extension = '.' + new_extension
+    return os.path.splitext(path)[0] + new_extension
+def make_config_path(save_path):
+    config_path = replace_extension(save_path, '.yaml')
+    return config_path
+def save_config(config, config_path):
+    assert isinstance(config, dict) or isinstance(config, DictConfig)
+    os.makedirs(get_parent_directory(config_path), exist_ok=True)
+    if isinstance(config, dict):
+        config = OmegaConf.create(config)
+    OmegaConf.save(config, config_path)
+def save_state_dict_and_config(state_dict, config, save_path):
+    os.makedirs(get_parent_directory(save_path), exist_ok=True)
+    # save config dict
+    config_path = make_config_path(save_path)
+    save_config(config, config_path)
+    # Save the model
+    if 'safetensors' in save_path:
+        safetensors.torch.save_file(state_dict, save_path, metadata={"format": "pt"})
+    else:
+        torch.save(state_dict, save_path)

cvlface/research/recognition/code/run_v1/classifiers/configs/fc.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+name: 'fc'
+sample_rate: 1.0
+start_from: ''
+freeze: False

cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+name: 'partial_fc'
+sample_rate: 1.0
+start_from: ''
+freeze: False

cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_freeze.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+name: 'partial_fc'
+sample_rate: 1.0
+start_from: ''
+freeze: True

cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_sample10.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+name: 'partial_fc'
+sample_rate: 0.1
+start_from: ''
+freeze: False

cvlface/research/recognition/code/run_v1/classifiers/configs/partial_fc_sample10_freeze.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+name: 'partial_fc'
+sample_rate: 0.1
+start_from: ''
+freeze: True

cvlface/research/recognition/code/run_v1/classifiers/fc/__init__.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from ..base import BaseClassifier, load_state_dict_from_path
+from .fc import FC
+from typing import Union
+import os
+class FCClassifier(BaseClassifier):
+    def __init__(self, classifier, config, rank, world_size):
+        super(FCClassifier, self).__init__()
+        self.classifier = classifier
+        self.config = config
+        self.rank = rank
+        self.world_size = world_size
+        self.apply_ddp = True
+    @classmethod
+    def from_config(cls, classifier_cfg, margin_loss_fn, model_cfg, num_classes, rank, world_size):
+        if classifier_cfg.name == 'fc':
+            classifier = FC(
+                margin_loss=margin_loss_fn,
+                embedding_size=model_cfg.output_dim,
+                num_classes=num_classes,
+            )
+        else:
+            raise NotImplementedError
+        model = cls(classifier, classifier_cfg, rank, world_size)
+        model.eval()
+        return model
+    def forward(self, local_embeddings, local_labels):
+        loss = self.classifier(local_embeddings, local_labels)
+        return loss
+    def save_pretrained(
+        self,
+        save_dir: Union[str, os.PathLike],
+        name: str = 'classifier.pt',
+        rank: int = 0,
+    ):
+        if rank == 0:
+            super().save_pretrained(save_dir, name, rank)
+    def load_state_dict_from_path(self, pretrained_model_path):
+        save_dir = os.path.dirname(pretrained_model_path)
+        save_name = os.path.basename(pretrained_model_path)
+        rank_added_name = os.path.splitext(save_name)[0] + f'_rank0' + os.path.splitext(save_name)[1]
+        pretrained_model_path = os.path.join(save_dir, rank_added_name)
+        state_dict = load_state_dict_from_path(pretrained_model_path)
+        result = self.load_state_dict(state_dict, strict=False)
+        print('classifier loading result', result)

cvlface/research/recognition/code/run_v1/classifiers/fc/fc.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from typing import Callable
+import torch
+from torch import distributed
+from torch.nn.functional import linear, normalize
+from losses.margin_loss import CombinedMarginLoss
+from losses.adaface import AdaFaceLoss
+class FC(torch.nn.Module):
+    def __init__(
+        self,
+        margin_loss: Callable,
+        embedding_size: int,
+        num_classes: int,
+    ):
+        super(FC, self).__init__()
+        self.cross_entropy = torch.nn.CrossEntropyLoss()
+        self.embedding_size = embedding_size
+        self.num_classes = num_classes
+        self.weight = torch.nn.Parameter(torch.normal(0, 0.01, (self.num_classes, embedding_size)))
+        # margin_loss
+        if isinstance(margin_loss, Callable):
+            self.margin_softmax = margin_loss
+            if isinstance(margin_loss, AdaFaceLoss):
+                self.register_buffer('batch_mean', torch.ones(1)*(20))
+                self.register_buffer('batch_std', torch.ones(1)*100)
+        else:
+            raise
+    def forward(
+        self,
+        local_embeddings: torch.Tensor,
+        local_labels: torch.Tensor,
+    ):
+        embeddings = local_embeddings
+        labels = local_labels
+        weight = self.weight
+        norms = embeddings.norm(p=2, dim=1, keepdim=True).clamp_min(1e-8)
+        norm_embeddings = embeddings / norms
+        norm_weight_activated = normalize(weight)
+        logits = linear(norm_embeddings, norm_weight_activated)
+        logits = logits.clamp(-1, 1)
+        if isinstance(self.margin_softmax, CombinedMarginLoss):
+            logits = self.margin_softmax(logits=logits, labels=labels)
+        elif isinstance(self.margin_softmax, AdaFaceLoss):
+            logits, batch_mean, batch_std = self.margin_softmax(logits=logits, labels=labels, norms=norms,
+                                                                batch_mean=self.batch_mean,
+                                                                batch_std=self.batch_std)
+            self.batch_mean.data = batch_mean.data
+            self.batch_std.data = batch_std.data
+        else:
+            raise ValueError('parital FC margin_softmax not supported type')
+        loss = self.cross_entropy(logits, labels)
+        return loss

cvlface/research/recognition/code/run_v1/classifiers/partial_fc/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from ..base import BaseClassifier
+from .partial_fc import PartialFC_V2
+class PartialFCClassifier(BaseClassifier):
+    def __init__(self, classifier, config, rank, world_size):
+        super(PartialFCClassifier, self).__init__()
+        self.partial_fc = classifier
+        self.config = config
+        self.rank = rank
+        self.world_size = world_size
+        self.apply_ddp = False
+    @classmethod
+    def from_config(cls, classifier_cfg, margin_loss_fn, model_cfg, num_classes, rank, world_size):
+        if classifier_cfg.name == 'partial_fc':
+            classifier = PartialFC_V2(
+                rank=rank,
+                world_size=world_size,
+                margin_loss=margin_loss_fn,
+                embedding_size=model_cfg.output_dim,
+                num_classes=num_classes,
+                sample_rate=classifier_cfg.sample_rate,
+            )
+        else:
+            raise NotImplementedError
+        model = cls(classifier, classifier_cfg, rank, world_size)
+        model.eval()
+        return model
+    def forward(self, local_embeddings, local_labels):
+        loss = self.partial_fc(local_embeddings, local_labels)
+        return loss

cvlface/research/recognition/code/run_v1/classifiers/partial_fc/partial_fc.py ADDED Viewed

	@@ -0,0 +1,289 @@

+from typing import Callable
+import torch
+from torch import distributed
+from torch.nn.functional import linear, normalize
+from losses.margin_loss import CombinedMarginLoss
+from losses.adaface import AdaFaceLoss
+class PartialFC_V2(torch.nn.Module):
+    """
+    https://arxiv.org/abs/2203.15565
+    A distributed sparsely updating variant of the FC layer, named Partial FC (PFC).
+    When sample rate less than 1, in each iteration, positive class centers and a random subset of
+    negative class centers are selected to compute the margin-based softmax loss, all class
+    centers are still maintained throughout the whole training process, but only a subset is
+    selected and updated in each iteration.
+    .. note::
+        When sample rate equal to 1, Partial FC is equal to model parallelism(default sample rate is 1).
+    Example:
+    --------
+    >>> module_pfc = PartialFC(embedding_size=512, num_classes=8000000, sample_rate=0.2)
+    >>> for img, labels in data_loader:
+    >>>     embeddings = net(img)
+    >>>     loss = module_pfc(embeddings, labels)
+    >>>     loss.backward()
+    >>>     optimizer.step()
+    """
+    _version = 2
+    def __init__(
+        self,
+        rank: int,
+        world_size: int,
+        margin_loss: Callable,
+        embedding_size: int,
+        num_classes: int,
+        sample_rate: float = 1.0,
+    ):
+        """
+        Paramenters:
+        -----------
+        embedding_size: int
+            The dimension of embedding, required
+        num_classes: int
+            Total number of classes, required
+        sample_rate: float
+            The rate of negative centers participating in the calculation, default is 1.0.
+        """
+        super(PartialFC_V2, self).__init__()
+        assert (
+            distributed.is_initialized()
+        ), "must initialize distributed before create this"
+        self.rank = rank
+        self.world_size = world_size
+        self.dist_cross_entropy = DistCrossEntropy()
+        self.embedding_size = embedding_size
+        self.sample_rate: float = sample_rate
+        # make num_class divisible by self.world_size for ddp
+        _num_classes = num_classes // self.world_size * self.world_size
+        if _num_classes < num_classes:
+            _num_classes = _num_classes + self.world_size
+        num_classes = _num_classes
+        self.num_classes: int = num_classes
+        self.num_local: int = num_classes // self.world_size + int(
+            self.rank < num_classes % self.world_size
+        )
+        # for i in range(8):
+        #     num_local = (num_classes // self.world_size + int( i < num_classes % self.world_size ))
+        #     class_start = num_classes // self.world_size * i + min( i, num_classes % self.world_size )
+        #     print(num_local, class_start)
+        self.class_start: int = num_classes // self.world_size * self.rank + min(
+            self.rank, num_classes % self.world_size
+        )
+        self.num_sample: int = int(self.sample_rate * self.num_local)
+        self.last_batch_size: int = 0
+        self.is_updated: bool = True
+        self.init_weight_update: bool = True
+        self.weight = torch.nn.Parameter(torch.normal(0, 0.01, (self.num_local, embedding_size)))
+        # margin_loss
+        if isinstance(margin_loss, Callable):
+            self.margin_softmax = margin_loss
+            if isinstance(margin_loss, AdaFaceLoss):
+                self.register_buffer('batch_mean', torch.ones(1)*(20))
+                self.register_buffer('batch_std', torch.ones(1)*100)
+        else:
+            raise
+    def sample(self, labels, index_positive):
+        """
+            This functions will change the value of labels
+            Parameters:
+            -----------
+            labels: torch.Tensor
+                pass
+            index_positive: torch.Tensor
+                pass
+            optimizer: torch.optim.Optimizer
+                pass
+        """
+        with torch.no_grad():
+            positive = torch.unique(labels[index_positive], sorted=True).cuda()
+            if self.num_sample - positive.size(0) >= 0:
+                perm = torch.rand(size=[self.num_local]).cuda()
+                perm[positive] = 2.0
+                index = torch.topk(perm, k=self.num_sample)[1].cuda()
+                index = index.sort()[0].cuda()
+            else:
+                index = positive
+            self.weight_index = index
+            labels[index_positive] = torch.searchsorted(index, labels[index_positive])
+        return self.weight[self.weight_index]
+    def forward(
+        self,
+        local_embeddings: torch.Tensor,
+        local_labels: torch.Tensor,
+    ):
+        """
+        Parameters:
+        ----------
+        local_embeddings: torch.Tensor
+            feature embeddings on each GPU(Rank).
+        local_labels: torch.Tensor
+            labels on each GPU(Rank).
+        Returns:
+        -------
+        loss: torch.Tensor
+            pass
+        """
+        local_labels.squeeze_()
+        local_labels = local_labels.long()
+        batch_size = local_embeddings.size(0)
+        if self.last_batch_size == 0:
+            self.last_batch_size = batch_size
+        assert self.last_batch_size == batch_size, (
+            f"last batch size do not equal current batch size: {self.last_batch_size} vs {batch_size}")
+        _gather_embeddings = [
+            torch.zeros((batch_size, self.embedding_size), dtype=local_embeddings.dtype, device=local_embeddings.device)
+            for _ in range(self.world_size)
+        ]
+        _gather_labels = [
+            torch.zeros(batch_size).long().cuda() for _ in range(self.world_size)
+        ]
+        _list_embeddings = AllGather(local_embeddings, *_gather_embeddings)
+        distributed.all_gather(_gather_labels, local_labels)
+        embeddings = torch.cat(_list_embeddings)
+        labels = torch.cat(_gather_labels)
+        labels = labels.view(-1, 1)
+        index_positive = (self.class_start <= labels) & (
+            labels < self.class_start + self.num_local
+        )
+        labels[~index_positive] = -1
+        labels[index_positive] -= self.class_start
+        if self.sample_rate < 1:
+            weight = self.sample(labels, index_positive)
+        else:
+            weight = self.weight
+        # with torch.cuda.amp.autocast(self.fp16):
+        norms = embeddings.norm(p=2, dim=1, keepdim=True).clamp_min(1e-8)
+        norm_embeddings = embeddings / norms
+        norm_weight_activated = normalize(weight)
+        logits = linear(norm_embeddings, norm_weight_activated)
+        logits = logits.clamp(-1, 1)
+        if isinstance(self.margin_softmax, CombinedMarginLoss):
+            logits = self.margin_softmax(logits=logits, labels=labels)
+        elif isinstance(self.margin_softmax, AdaFaceLoss):
+            logits, batch_mean, batch_std = self.margin_softmax(logits=logits, labels=labels, norms=norms,
+                                                                batch_mean=self.batch_mean,
+                                                                batch_std=self.batch_std)
+            self.batch_mean.data = batch_mean.data
+            self.batch_std.data = batch_std.data
+        else:
+            raise ValueError('parital FC margin_softmax not supported type')
+        loss = self.dist_cross_entropy(logits, labels)
+        return loss
+class DistCrossEntropyFunc(torch.autograd.Function):
+    """
+    CrossEntropy loss is calculated in parallel, allreduce denominator into single gpu and calculate softmax.
+    Implemented of ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf):
+    """
+    @staticmethod
+    def forward(ctx, logits: torch.Tensor, label: torch.Tensor):
+        """ """
+        batch_size = logits.size(0)
+        # for numerical stability
+        max_logits, _ = torch.max(logits, dim=1, keepdim=True)
+        # local to global
+        distributed.all_reduce(max_logits, distributed.ReduceOp.MAX)
+        logits.sub_(max_logits)
+        logits.exp_()
+        sum_logits_exp = torch.sum(logits, dim=1, keepdim=True)
+        # local to global
+        distributed.all_reduce(sum_logits_exp, distributed.ReduceOp.SUM)
+        logits.div_(sum_logits_exp)
+        index = torch.where(label != -1)[0]
+        # loss
+        loss = torch.zeros(batch_size, 1, device=logits.device, dtype=logits.dtype)
+        loss[index] = logits[index].gather(1, label[index])
+        distributed.all_reduce(loss, distributed.ReduceOp.SUM)
+        ctx.save_for_backward(index, logits, label)
+        return loss.clamp_min_(1e-30).log_().mean() * (-1)
+    @staticmethod
+    def backward(ctx, loss_gradient):
+        """
+        Args:
+            loss_grad (torch.Tensor): gradient backward by last layer
+        Returns:
+            gradients for each input in forward function
+            `None` gradients for one-hot label
+        """
+        (
+            index,
+            logits,
+            label,
+        ) = ctx.saved_tensors
+        batch_size = logits.size(0)
+        one_hot = torch.zeros(
+            size=[index.size(0), logits.size(1)], device=logits.device
+        )
+        one_hot.scatter_(1, label[index], 1)
+        logits[index] -= one_hot
+        logits.div_(batch_size)
+        return logits * loss_gradient.item(), None
+class DistCrossEntropy(torch.nn.Module):
+    def __init__(self):
+        super(DistCrossEntropy, self).__init__()
+    def forward(self, logit_part, label_part):
+        return DistCrossEntropyFunc.apply(logit_part, label_part)
+class AllGatherFunc(torch.autograd.Function):
+    """AllGather op with gradient backward"""
+    @staticmethod
+    def forward(ctx, tensor, *gather_list):
+        gather_list = list(gather_list)
+        distributed.all_gather(gather_list, tensor)
+        return tuple(gather_list)
+    @staticmethod
+    def backward(ctx, *grads):
+        grad_list = list(grads)
+        rank = distributed.get_rank()
+        grad_out = grad_list[rank]
+        dist_ops = [
+            distributed.reduce(grad_out, rank, distributed.ReduceOp.SUM, async_op=True)
+            if i == rank
+            else distributed.reduce(
+                grad_list[i], i, distributed.ReduceOp.SUM, async_op=True
+            )
+            for i in range(distributed.get_world_size())
+        ]
+        for _op in dist_ops:
+            _op.wait()
+        grad_out *= len(grad_list)  # cooperate with distributed loss function
+        return (grad_out, *[None for _ in range(len(grad_list))])
+AllGather = AllGatherFunc.apply