dreamlessx commited on
Commit
cc423b0
·
verified ·
1 Parent(s): 4447638

Upload landmarkdiff/evaluation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. landmarkdiff/evaluation.py +93 -27
landmarkdiff/evaluation.py CHANGED
@@ -1,6 +1,8 @@
1
- """Evaluation metrics: FID, LPIPS, NME, ArcFace sim, SSIM.
2
 
3
- Stratified by Fitzpatrick skin type (I-VI) via ITA thresholding.
 
 
4
  """
5
 
6
  from __future__ import annotations
@@ -90,7 +92,19 @@ class EvalMetrics:
90
 
91
 
92
  def classify_fitzpatrick_ita(image: np.ndarray) -> str:
93
- """Fitzpatrick I-VI from ITA angle (Chardon et al. 1991 thresholds)."""
 
 
 
 
 
 
 
 
 
 
 
 
94
  if cv2 is None:
95
  raise ImportError("opencv-python is required for Fitzpatrick classification")
96
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
@@ -127,7 +141,19 @@ def compute_nme(
127
  left_eye_idx: int = 33,
128
  right_eye_idx: int = 263,
129
  ) -> float:
130
- """Compute Normalized Mean Error for landmarks."""
 
 
 
 
 
 
 
 
 
 
 
 
131
  iod = np.linalg.norm(
132
  target_landmarks[left_eye_idx] - target_landmarks[right_eye_idx]
133
  )
@@ -142,7 +168,11 @@ def compute_ssim(
142
  pred: np.ndarray,
143
  target: np.ndarray,
144
  ) -> float:
145
- """SSIM via skimage, falls back to global SSIM if not installed."""
 
 
 
 
146
  try:
147
  from skimage.metrics import structural_similarity
148
  # Convert to grayscale if color, or compute per-channel
@@ -173,6 +203,7 @@ def compute_ssim(
173
 
174
 
175
  _LPIPS_FN = None
 
176
 
177
 
178
  def _get_lpips_fn():
@@ -189,12 +220,15 @@ def compute_lpips(
189
  pred: np.ndarray,
190
  target: np.ndarray,
191
  ) -> float:
192
- """LPIPS perceptual distance (lower = more similar)."""
 
 
 
193
  try:
194
  import lpips
195
  import torch
196
  except ImportError:
197
- return 0.0
198
 
199
  _lpips_fn = _get_lpips_fn()
200
 
@@ -211,7 +245,17 @@ def compute_fid(
211
  real_dir: str,
212
  generated_dir: str,
213
  ) -> float:
214
- """Compute FID between directories of real and generated images."""
 
 
 
 
 
 
 
 
 
 
215
  try:
216
  from torch_fidelity import calculate_metrics
217
  except ImportError:
@@ -219,10 +263,11 @@ def compute_fid(
219
  "torch-fidelity is required for FID. Install with: pip install torch-fidelity"
220
  )
221
 
 
222
  metrics = calculate_metrics(
223
  input1=generated_dir,
224
  input2=real_dir,
225
- cuda=True,
226
  fid=True,
227
  verbose=False,
228
  )
@@ -233,14 +278,21 @@ def compute_identity_similarity(
233
  pred: np.ndarray,
234
  target: np.ndarray,
235
  ) -> float:
236
- """ArcFace cosine sim [0,1]. Falls back to SSIM if no InsightFace."""
 
 
 
 
237
  try:
238
  from insightface.app import FaceAnalysis
239
- app = FaceAnalysis(
240
- name="buffalo_l",
241
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
242
- )
243
- app.prepare(ctx_id=-1, det_size=(320, 320))
 
 
 
244
 
245
  pred_bgr = pred if pred.shape[2] == 3 else cv2.cvtColor(pred, cv2.COLOR_RGB2BGR)
246
  target_bgr = target if target.shape[2] == 3 else cv2.cvtColor(target, cv2.COLOR_RGB2BGR)
@@ -270,7 +322,21 @@ def evaluate_batch(
270
  procedures: list[str] | None = None,
271
  compute_identity: bool = False,
272
  ) -> EvalMetrics:
273
- """Evaluate a batch of predicted vs target images."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  n = len(predictions)
275
  ssim_scores = []
276
  lpips_scores = []
@@ -302,10 +368,10 @@ def evaluate_batch(
302
  proc_groups.setdefault(procedures[i], []).append(i)
303
 
304
  metrics = EvalMetrics(
305
- ssim=float(np.mean(ssim_scores)) if ssim_scores else 0.0,
306
- lpips=float(np.mean(lpips_scores)) if lpips_scores else 0.0,
307
- nme=float(np.mean(nme_scores)) if nme_scores else 0.0,
308
- identity_sim=float(np.mean(identity_scores)) if identity_scores else 0.0,
309
  )
310
 
311
  # Full Fitzpatrick stratification for ALL metrics
@@ -314,35 +380,35 @@ def evaluate_batch(
314
 
315
  group_lpips = [lpips_scores[i] for i in indices]
316
  if group_lpips:
317
- metrics.lpips_by_fitzpatrick[ftype] = float(np.mean(group_lpips))
318
 
319
  group_ssim = [ssim_scores[i] for i in indices]
320
  if group_ssim:
321
- metrics.ssim_by_fitzpatrick[ftype] = float(np.mean(group_ssim))
322
 
323
  if nme_scores:
324
  group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
325
  if group_nme:
326
- metrics.nme_by_fitzpatrick[ftype] = float(np.mean(group_nme))
327
 
328
  if identity_scores:
329
  group_id = [identity_scores[i] for i in indices if i < len(identity_scores)]
330
  if group_id:
331
- metrics.identity_sim_by_fitzpatrick[ftype] = float(np.mean(group_id))
332
 
333
  # Per-procedure breakdown
334
  for proc, indices in proc_groups.items():
335
  group_lpips = [lpips_scores[i] for i in indices]
336
  if group_lpips:
337
- metrics.lpips_by_procedure[proc] = float(np.mean(group_lpips))
338
 
339
  group_ssim = [ssim_scores[i] for i in indices]
340
  if group_ssim:
341
- metrics.ssim_by_procedure[proc] = float(np.mean(group_ssim))
342
 
343
  if nme_scores:
344
  group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
345
  if group_nme:
346
- metrics.nme_by_procedure[proc] = float(np.mean(group_nme))
347
 
348
  return metrics
 
1
+ """Evaluation metrics suite.
2
 
3
+ All metrics stratified by Fitzpatrick skin type (I-VI) using ITA-based thresholding.
4
+ Primary metrics: FID, LPIPS, NME, ArcFace identity similarity.
5
+ Secondary: SSIM (relaxed target >0.80).
6
  """
7
 
8
  from __future__ import annotations
 
92
 
93
 
94
  def classify_fitzpatrick_ita(image: np.ndarray) -> str:
95
+ """Classify Fitzpatrick skin type using Individual Typology Angle (ITA).
96
+
97
+ ITA = arctan((L - 50) / b) * (180 / pi)
98
+ where L, b are from CIE L*a*b* color space.
99
+
100
+ Thresholds from Chardon et al. (1991):
101
+ - ITA > 55: Type I (very light)
102
+ - 41 < ITA <= 55: Type II (light)
103
+ - 28 < ITA <= 41: Type III (intermediate)
104
+ - 10 < ITA <= 28: Type IV (tan)
105
+ - -30 < ITA <= 10: Type V (brown)
106
+ - ITA <= -30: Type VI (dark)
107
+ """
108
  if cv2 is None:
109
  raise ImportError("opencv-python is required for Fitzpatrick classification")
110
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
 
141
  left_eye_idx: int = 33,
142
  right_eye_idx: int = 263,
143
  ) -> float:
144
+ """Compute Normalized Mean Error for landmarks.
145
+
146
+ Normalized by inter-ocular distance.
147
+
148
+ Args:
149
+ pred_landmarks: (N, 2) predicted landmark positions.
150
+ target_landmarks: (N, 2) ground truth positions.
151
+ left_eye_idx: MediaPipe index for left eye center.
152
+ right_eye_idx: MediaPipe index for right eye center.
153
+
154
+ Returns:
155
+ NME value (lower is better).
156
+ """
157
  iod = np.linalg.norm(
158
  target_landmarks[left_eye_idx] - target_landmarks[right_eye_idx]
159
  )
 
168
  pred: np.ndarray,
169
  target: np.ndarray,
170
  ) -> float:
171
+ """Compute Structural Similarity Index (SSIM).
172
+
173
+ Uses scikit-image's windowed SSIM (Wang et al. 2004) for proper
174
+ per-window computation with 11x11 Gaussian kernel.
175
+ """
176
  try:
177
  from skimage.metrics import structural_similarity
178
  # Convert to grayscale if color, or compute per-channel
 
203
 
204
 
205
  _LPIPS_FN = None
206
+ _ARCFACE_APP = None
207
 
208
 
209
  def _get_lpips_fn():
 
220
  pred: np.ndarray,
221
  target: np.ndarray,
222
  ) -> float:
223
+ """Compute LPIPS perceptual distance between two images.
224
+
225
+ Returns LPIPS score (lower = more similar).
226
+ """
227
  try:
228
  import lpips
229
  import torch
230
  except ImportError:
231
+ return float("nan")
232
 
233
  _lpips_fn = _get_lpips_fn()
234
 
 
245
  real_dir: str,
246
  generated_dir: str,
247
  ) -> float:
248
+ """Compute FID between directories of real and generated images.
249
+
250
+ Uses torch-fidelity for GPU-accelerated computation.
251
+
252
+ Args:
253
+ real_dir: Path to directory of real images.
254
+ generated_dir: Path to directory of generated images.
255
+
256
+ Returns:
257
+ FID score (lower = more similar distributions).
258
+ """
259
  try:
260
  from torch_fidelity import calculate_metrics
261
  except ImportError:
 
263
  "torch-fidelity is required for FID. Install with: pip install torch-fidelity"
264
  )
265
 
266
+ import torch
267
  metrics = calculate_metrics(
268
  input1=generated_dir,
269
  input2=real_dir,
270
+ cuda=torch.cuda.is_available(),
271
  fid=True,
272
  verbose=False,
273
  )
 
278
  pred: np.ndarray,
279
  target: np.ndarray,
280
  ) -> float:
281
+ """Compute ArcFace identity cosine similarity between two face images.
282
+
283
+ Returns cosine similarity [0, 1] where 1 = identical identity.
284
+ Falls back to SSIM-based proxy if InsightFace unavailable.
285
+ """
286
  try:
287
  from insightface.app import FaceAnalysis
288
+ global _ARCFACE_APP
289
+ if _ARCFACE_APP is None:
290
+ _ARCFACE_APP = FaceAnalysis(
291
+ name="buffalo_l",
292
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
293
+ )
294
+ _ARCFACE_APP.prepare(ctx_id=-1, det_size=(320, 320))
295
+ app = _ARCFACE_APP
296
 
297
  pred_bgr = pred if pred.shape[2] == 3 else cv2.cvtColor(pred, cv2.COLOR_RGB2BGR)
298
  target_bgr = target if target.shape[2] == 3 else cv2.cvtColor(target, cv2.COLOR_RGB2BGR)
 
322
  procedures: list[str] | None = None,
323
  compute_identity: bool = False,
324
  ) -> EvalMetrics:
325
+ """Evaluate a batch of predicted vs target images.
326
+
327
+ Computes all metrics and stratifies by Fitzpatrick skin type and procedure.
328
+
329
+ Args:
330
+ predictions: List of predicted BGR images.
331
+ targets: List of target BGR images.
332
+ pred_landmarks: Optional list of (N, 2) predicted landmark arrays.
333
+ target_landmarks: Optional list of (N, 2) target landmark arrays.
334
+ procedures: Optional list of procedure names for per-procedure breakdown.
335
+ compute_identity: Whether to compute ArcFace identity similarity (slow).
336
+
337
+ Returns:
338
+ EvalMetrics with all computed values.
339
+ """
340
  n = len(predictions)
341
  ssim_scores = []
342
  lpips_scores = []
 
368
  proc_groups.setdefault(procedures[i], []).append(i)
369
 
370
  metrics = EvalMetrics(
371
+ ssim=float(np.nanmean(ssim_scores)) if ssim_scores else 0.0,
372
+ lpips=float(np.nanmean(lpips_scores)) if lpips_scores else 0.0,
373
+ nme=float(np.nanmean(nme_scores)) if nme_scores else 0.0,
374
+ identity_sim=float(np.nanmean(identity_scores)) if identity_scores else 0.0,
375
  )
376
 
377
  # Full Fitzpatrick stratification for ALL metrics
 
380
 
381
  group_lpips = [lpips_scores[i] for i in indices]
382
  if group_lpips:
383
+ metrics.lpips_by_fitzpatrick[ftype] = float(np.nanmean(group_lpips))
384
 
385
  group_ssim = [ssim_scores[i] for i in indices]
386
  if group_ssim:
387
+ metrics.ssim_by_fitzpatrick[ftype] = float(np.nanmean(group_ssim))
388
 
389
  if nme_scores:
390
  group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
391
  if group_nme:
392
+ metrics.nme_by_fitzpatrick[ftype] = float(np.nanmean(group_nme))
393
 
394
  if identity_scores:
395
  group_id = [identity_scores[i] for i in indices if i < len(identity_scores)]
396
  if group_id:
397
+ metrics.identity_sim_by_fitzpatrick[ftype] = float(np.nanmean(group_id))
398
 
399
  # Per-procedure breakdown
400
  for proc, indices in proc_groups.items():
401
  group_lpips = [lpips_scores[i] for i in indices]
402
  if group_lpips:
403
+ metrics.lpips_by_procedure[proc] = float(np.nanmean(group_lpips))
404
 
405
  group_ssim = [ssim_scores[i] for i in indices]
406
  if group_ssim:
407
+ metrics.ssim_by_procedure[proc] = float(np.nanmean(group_ssim))
408
 
409
  if nme_scores:
410
  group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
411
  if group_nme:
412
+ metrics.nme_by_procedure[proc] = float(np.nanmean(group_nme))
413
 
414
  return metrics