minchul commited on
Commit
93a2970
·
verified ·
1 Parent(s): bdd868c

Upload directory

Browse files
Files changed (1) hide show
  1. README.md +157 -0
README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: mit
4
+ arxiv: 2403.14852
5
+ ---
6
+
7
+ <div align="center">
8
+ <h1>
9
+ CVLFace Pretrained Face Alignement Model (DFA RESNET50)
10
+ </h1>
11
+ </div>
12
+
13
+
14
+ <p align="center">
15
+ 🌎 <a href="https://github.com/mk-minchul/CVLface" target="_blank">GitHub</a> • 🤗 <a href="https://huggingface.co/minchul" target="_blank">Hugging Face</a>
16
+ </p>
17
+
18
+
19
+ -----
20
+
21
+
22
+ ## 1. Introduction
23
+
24
+ Model Name: DFA RESNET50
25
+
26
+ Related Paper: KeyPoint Relative Position Encoding for Face Recognition (https://arxiv.org/abs/2403.14852)
27
+
28
+ Please cite the original paper and follow the license of the training dataset.
29
+
30
+ ## 2. Quick Start
31
+
32
+ ```python
33
+ if __name__ == '__main__':
34
+
35
+ from transformers import AutoModel
36
+ from huggingface_hub import hf_hub_download
37
+ import shutil
38
+ import os
39
+
40
+
41
+ # helpfer function to download huggingface repo and use model
42
+ def download(repo_id, path, HF_TOKEN=None):
43
+ files_path = os.path.join(path, 'files.txt')
44
+ if not os.path.exists(files_path):
45
+ hf_hub_download(repo_id, 'files.txt', token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
46
+ with open(os.path.join(path, 'files.txt'), 'r') as f:
47
+ files = f.read().split('\n')
48
+ for file in [f for f in files if f] + ['config.json', 'wrapper.py', 'model.safetensors']:
49
+ full_path = os.path.join(path, file)
50
+ if not os.path.exists(full_path):
51
+ hf_hub_download(repo_id, file, token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
52
+
53
+
54
+ # helpfer function to download huggingface repo and use model
55
+ def load_model_from_local_path(path, HF_TOKEN=None):
56
+ cwd = os.getcwd()
57
+ os.chdir(path)
58
+ model = AutoModel.from_pretrained(path, trust_remote_code=True, token=HF_TOKEN)
59
+ os.chdir(cwd)
60
+ return model
61
+
62
+
63
+ # helpfer function to download huggingface repo and use model
64
+ def load_model_by_repo_id(repo_id, save_path, HF_TOKEN=None, force_download=False):
65
+ if force_download:
66
+ if os.path.exists(save_path):
67
+ shutil.rmtree(save_path)
68
+ download(repo_id, save_path, HF_TOKEN)
69
+ return load_model_from_local_path(save_path, HF_TOKEN)
70
+
71
+
72
+ # load model
73
+ aligner = load_model_by_repo_id(repo_id, path, HF_TOKEN, force_download=False)
74
+
75
+ # input is a rgb image normalized.
76
+ from torchvision.transforms import Compose, ToTensor, Normalize
77
+ from PIL import Image
78
+ img = Image.open('/path/to/img.png')
79
+ trans = Compose([ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
80
+ input = trans(img).unsqueeze(0) # torch.randn(1, 3, 256, 256) or any size with a single face
81
+
82
+ # predict landmarks and aligned image
83
+ aligned_x, orig_ldmks, aligned_ldmks, score, thetas, bbox = aligner(input)
84
+
85
+ # Documentation
86
+ # aligned_x: aligned face image (1, 3, 112, 112)
87
+ # orig_ldmks: predicted landmarks in the original image (1, 5, 2)
88
+ # aligned_ldmks: predicted landmarks in the aligned image (1, 5, 2)
89
+ # score: confidence score (1,)
90
+ # thetas: transformation matrix transforming (1, 2, 3). See below for how to use it.
91
+ # normalized_bbox: bounding box in the original image (1, 4)
92
+
93
+ # differentiable alignment
94
+ import torch.nn.functional as F
95
+ grid = F.affine_grid(thetas, (1, 3, 112, 112), align_corners=True)
96
+ manual_aligned_x = F.grid_sample(input, grid, align_corners=True)
97
+ # manual_aligned_x should be same as aligned_x (up to some numerical error due to interpolation error)
98
+ # here input can receive gradient through the grid_sample function.
99
+ ```
100
+
101
+ ## Example Outputs
102
+
103
+ <table align="center">
104
+ <tr>
105
+ <td><img src="orig.png" alt="Image 1"></td>
106
+ <td><img src="input.png" alt="Image 2"></td>
107
+ <td><img src="aligned.png" alt="Image 3"></td>
108
+ </tr>
109
+ <tr>
110
+ <td align="center">Input Image</td>
111
+ <td align="center">Input Image with Landmark</td>
112
+ <td align="center">Aligned Image with Landmark</td>
113
+ </tr>
114
+ </table>
115
+ ```
116
+
117
+ Code for visualizaton
118
+ ```python
119
+ def concat_pil(list_of_pil):
120
+ w, h = list_of_pil[0].size
121
+ new_im = Image.new('RGB', (w * len(list_of_pil), h))
122
+ for i, im in enumerate(list_of_pil):
123
+ new_im.paste(im, (i * w, 0))
124
+ return new_im
125
+
126
+
127
+ def draw_ldmk(img, ldmk):
128
+ import cv2
129
+ if ldmk is None:
130
+ return img
131
+ colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
132
+ img = img.copy()
133
+ for i in range(5):
134
+ color = colors[i]
135
+ cv2.circle(img, (int(ldmk[i*2] * img.shape[1]),
136
+ int(ldmk[i*2+1] * img.shape[0])), 1, color, 4)
137
+ return img
138
+
139
+ def tensor_to_numpy(tensor):
140
+ # -1 to 1 tensor to 0-255
141
+ arr = tensor.numpy().transpose(1,2,0)
142
+ return (arr * 0.5 + 0.5) * 255
143
+
144
+
145
+ def visualize(tensor, ldmks=None):
146
+ assert tensor.ndim == 4
147
+ images = [tensor_to_numpy(image_tensor) for image_tensor in tensor]
148
+ if ldmks is not None:
149
+ images = [draw_ldmk(images[j], ldmks[j].ravel()) for j in range(len(images))]
150
+ pil_images = [Image.fromarray(im.astype('uint8')) for im in images]
151
+ return concat_pil(pil_images)
152
+
153
+ visualize(input, None).save('orig.png')
154
+ visualize(aligned, aligned_ldmks).save('aligned.png')
155
+ visualize(input, orig_ldmks).save('input.png')
156
+ ```
157
+