File size: 4,367 Bytes
53fe336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# SPDX-FileCopyrightText: Copyright © 2025 Idiap Research Institute <contact@idiap.ch>

# SPDX-FileContributor: Francois Poh <francois.poh22@imperial.ac.uk>

# SPDX-License-Identifier: GPL-3.0-or-later

# ArtFace contains the code for the paper: https://www.idiap.ch/paper/artface/
# It provides a facial recognition model for historical portraits, and scripts to reproduce the experiments in the paper.

from PIL import Image
import cv2
import numpy as np
import torch


class ImagePreprocessor:
    def __init__(self):
        pass

    def __call__(self, image):
        # --------------------
        # Accept path OR PIL image
        # --------------------
        if isinstance(image, str):
            image = Image.open(image).convert("RGB")
        elif isinstance(image, Image.Image):
            image = image.convert("RGB")
        else:
            raise TypeError(
                f"Unsupported input type {type(image)}. "
                "Expected file path or PIL.Image."
            )

        return self.process(image)

    def process(self, image):
        raise NotImplementedError("Subclasses should implement this method.")


class FaceAligner(ImagePreprocessor):
    def __init__(self, detector="buffalo_l", crop_size=(112, 112), padding=0):
        crop_size = tuple(map(int, crop_size))
        super().__init__()

        from lib.face_alignment import mtcnn
        from insightface.app import FaceAnalysis

        # --------------------
        # Device selection
        # --------------------
        self.use_cuda = torch.cuda.is_available()

        if self.use_cuda:
            device = "cuda:0"
            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
            ctx_id = 0
        else:
            device = "cpu"
            providers = ["CPUExecutionProvider"]
            ctx_id = -1

        # --------------------
        # MTCNN (landmark warping)
        # --------------------
        self.mtcnn = mtcnn.MTCNN(
            device=device,
            crop_size=tuple(int(s) for s in crop_size),
            padding=float(padding),
        )

        # --------------------
        # InsightFace detector
        # --------------------
        self.detector = FaceAnalysis(
            name=detector,
            root=".",
            providers=providers,
        )
        self.detector.prepare(ctx_id=ctx_id)

        print(
            f"✅ FaceAligner initialized | "
            f"CUDA: {self.use_cuda} | "
            f"providers: {providers}"
        )

    def process(self, image):
        from lib.face_alignment import mtcnn

        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

        # Detect face
        _, kpss = self.detector.det_model.detect(image, max_num=1, metric="default")

        if kpss is None or len(kpss) == 0:
            return None

        # Convert landmarks
        landmarks = np.array(
            [np.concatenate([kpss[:, :, 0][0], kpss[:, :, 1][0]], axis=0)]
        )
        facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]

        # Warp & crop
        warped_face = mtcnn.warp_and_crop_face(
            image,
            facial5points,
            self.mtcnn.refrence,
            crop_size=self.mtcnn.crop_size,
        )

        rgb_face = cv2.cvtColor(warped_face, cv2.COLOR_BGR2RGB)
        return Image.fromarray(rgb_face)


preprocessors = {
    "align": (FaceAligner, {"crop_size": (112, 112)}),
    "align-224": (FaceAligner, {"crop_size": (224, 224)}),
    "align-pad": (FaceAligner, {"crop_size": (224, 224), "padding": 0.5}),
}


def get_preprocessor(name, args={}):
    model_args = {}
    if isinstance(args, list):
        for arg in args:
            if "=" not in arg:
                raise ValueError(
                    f"Invalid argument format for model arguments. Expected 'key=value' pairs, got '{arg}'."
                )
            key, value = arg.split("=", 1)
            value = value.strip("'")
            if "," in value:
                value = [v.strip("'") for v in value.split(",")]
            model_args[key] = value
    if name in preprocessors:
        return preprocessors[name][0](**{**preprocessors[name][1], **model_args})
    else:
        raise ValueError(
            f"Unknown preprocessor: {name}\n  Please choose from: {', '.join(preprocessors.keys())}"
        )