Spaces:
Running
Running
Evan Li commited on
Commit ·
05bd487
1
Parent(s): fbb6c1a
record screen, age, chopped formula, final?
Browse files- analyzers/aesthetic_analyzer.py +53 -21
- analyzers/age_analyzer.py +135 -0
- analyzers/insightface_analyzer.py +8 -75
- app.py +31 -11
analyzers/aesthetic_analyzer.py
CHANGED
|
@@ -53,12 +53,12 @@ from typing import Any
|
|
| 53 |
|
| 54 |
|
| 55 |
# How much weight the learned beauty regressor gets when both signals
|
| 56 |
-
# are available. 0.
|
| 57 |
-
# chopped score — rule factors contribute
|
| 58 |
-
# refinement
|
| 59 |
-
# learned
|
| 60 |
-
#
|
| 61 |
-
LEARNED_WEIGHT = 0.
|
| 62 |
|
| 63 |
# Baseline score. Penalties push up, bonuses pull down.
|
| 64 |
BASELINE = 50.0
|
|
@@ -153,10 +153,12 @@ class AestheticAnalyzer:
|
|
| 153 |
# ── Penalties (push score up = more chopped) ─────────────────
|
| 154 |
|
| 155 |
# Facial asymmetry: 0 = perfectly symmetric, 1 = very asymmetric.
|
| 156 |
-
# MediaPipe
|
|
|
|
|
|
|
| 157 |
asym = d.get("facial_asymmetry_score")
|
| 158 |
if isinstance(asym, (int, float)):
|
| 159 |
-
penalty = float(asym) *
|
| 160 |
score += penalty
|
| 161 |
breakdown["asymmetry_penalty"] = round(penalty, 2)
|
| 162 |
|
|
@@ -170,13 +172,15 @@ class AestheticAnalyzer:
|
|
| 170 |
score += penalty
|
| 171 |
breakdown["wrinkle_penalty"] = penalty
|
| 172 |
|
| 173 |
-
# Skin uniformity = LAB L* std-dev over the eroded interior
|
| 174 |
-
#
|
|
|
|
|
|
|
| 175 |
uniformity = d.get("skin_uniformity")
|
| 176 |
if isinstance(uniformity, (int, float)) and uniformity > 0:
|
| 177 |
-
# Empirically uniformity sits ~8-15 in clean skin and
|
| 178 |
-
#
|
| 179 |
-
penalty = min(
|
| 180 |
score += penalty
|
| 181 |
breakdown["skin_unevenness_penalty"] = round(penalty, 2)
|
| 182 |
|
|
@@ -184,9 +188,11 @@ class AestheticAnalyzer:
|
|
| 184 |
# detector was too noisy (shadows / pores counted as spots).
|
| 185 |
|
| 186 |
# Smile asymmetry: 0 = perfectly symmetric smile, larger = lopsided.
|
|
|
|
|
|
|
| 187 |
smile_asym = d.get("smile_asymmetry")
|
| 188 |
if isinstance(smile_asym, (int, float)):
|
| 189 |
-
penalty = min(
|
| 190 |
score += penalty
|
| 191 |
breakdown["smile_asymmetry_penalty"] = round(penalty, 2)
|
| 192 |
|
|
@@ -205,17 +211,43 @@ class AestheticAnalyzer:
|
|
| 205 |
|
| 206 |
# ── Bonuses (pull score down = less chopped) ─────────────────
|
| 207 |
|
| 208 |
-
# Defined jawline.
|
| 209 |
-
#
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
jaw_type = d.get("jawline_type")
|
| 212 |
jaw_type_bonus_map = {"sharp": -16.0, "strong": -10.0, "soft": 0.0}
|
| 213 |
if jaw_type in jaw_type_bonus_map:
|
| 214 |
-
|
|
|
|
|
|
|
| 215 |
jaw_angle = d.get("jawline_angle")
|
| 216 |
-
if isinstance(jaw_angle, (int, float))
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
if jaw_bonus:
|
| 220 |
score += jaw_bonus
|
| 221 |
breakdown["jaw_definition_bonus"] = round(jaw_bonus, 2)
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
# How much weight the learned beauty regressor gets when both signals
|
| 56 |
+
# are available. 0.85 means the SCUT-FBP5500 ResNet-50 strongly
|
| 57 |
+
# dominates the chopped score — rule factors contribute 15% as a
|
| 58 |
+
# refinement layer rather than a primary driver. The trained model
|
| 59 |
+
# learned from 60-rater-averaged human ground truth, which is a much
|
| 60 |
+
# better signal than any hand-tuned heuristic.
|
| 61 |
+
LEARNED_WEIGHT = 0.85
|
| 62 |
|
| 63 |
# Baseline score. Penalties push up, bonuses pull down.
|
| 64 |
BASELINE = 50.0
|
|
|
|
| 153 |
# ── Penalties (push score up = more chopped) ─────────────────
|
| 154 |
|
| 155 |
# Facial asymmetry: 0 = perfectly symmetric, 1 = very asymmetric.
|
| 156 |
+
# MediaPipe's score is noisier than we'd like — attractive faces
|
| 157 |
+
# still come back with measurable asymmetry from natural
|
| 158 |
+
# micro-expressions and camera angle. De-emphasised from ×30.
|
| 159 |
asym = d.get("facial_asymmetry_score")
|
| 160 |
if isinstance(asym, (int, float)):
|
| 161 |
+
penalty = float(asym) * 18.0
|
| 162 |
score += penalty
|
| 163 |
breakdown["asymmetry_penalty"] = round(penalty, 2)
|
| 164 |
|
|
|
|
| 172 |
score += penalty
|
| 173 |
breakdown["wrinkle_penalty"] = penalty
|
| 174 |
|
| 175 |
+
# Skin uniformity = LAB L* std-dev over the eroded interior face
|
| 176 |
+
# mask. Higher std means uneven tone (shadows, blemishes).
|
| 177 |
+
# De-emphasised: the metric over-penalises attractive faces in
|
| 178 |
+
# warm/directional lighting, which is most photos.
|
| 179 |
uniformity = d.get("skin_uniformity")
|
| 180 |
if isinstance(uniformity, (int, float)) and uniformity > 0:
|
| 181 |
+
# Empirically uniformity sits ~8-15 in clean skin and 20-30
|
| 182 |
+
# in uneven skin. Cap reduced from 14 to 9.
|
| 183 |
+
penalty = min(9.0, max(0.0, (float(uniformity) - 10.0) * 0.7))
|
| 184 |
score += penalty
|
| 185 |
breakdown["skin_unevenness_penalty"] = round(penalty, 2)
|
| 186 |
|
|
|
|
| 188 |
# detector was too noisy (shadows / pores counted as spots).
|
| 189 |
|
| 190 |
# Smile asymmetry: 0 = perfectly symmetric smile, larger = lopsided.
|
| 191 |
+
# De-emphasised — even attractive faces have natural smile
|
| 192 |
+
# asymmetry, and the MediaPipe blendshape signal exaggerates it.
|
| 193 |
smile_asym = d.get("smile_asymmetry")
|
| 194 |
if isinstance(smile_asym, (int, float)):
|
| 195 |
+
penalty = min(6.0, float(smile_asym) * 30.0)
|
| 196 |
score += penalty
|
| 197 |
breakdown["smile_asymmetry_penalty"] = round(penalty, 2)
|
| 198 |
|
|
|
|
| 211 |
|
| 212 |
# ── Bonuses (pull score down = less chopped) ─────────────────
|
| 213 |
|
| 214 |
+
# Defined jawline. EMPHASISED — strong jawline is one of the
|
| 215 |
+
# most consistent visual cues for "conventionally attractive."
|
| 216 |
+
# Two signals combine here:
|
| 217 |
+
#
|
| 218 |
+
# (a) The MediaPipe `jawline_type` bucket gives a coarse
|
| 219 |
+
# qualitative read.
|
| 220 |
+
# (b) The numeric `jawline_angle` (degrees subtended at the
|
| 221 |
+
# chin by the two gonion landmarks) gives a continuous
|
| 222 |
+
# signal where lower = sharper. We map it linearly into
|
| 223 |
+
# a bonus that maxes out at very sharp angles and fades
|
| 224 |
+
# to zero by ~145°.
|
| 225 |
+
#
|
| 226 |
+
# We take whichever signal is more generous so the cue isn't
|
| 227 |
+
# double-counted on a single face. Numeric bonus scales as:
|
| 228 |
+
#
|
| 229 |
+
# angle ≤ 95° → -22 (very sharp)
|
| 230 |
+
# angle 95-145 → linearly -22 → 0
|
| 231 |
+
# angle ≥ 145° → 0 (very soft)
|
| 232 |
+
jaw_bucket_bonus = 0.0
|
| 233 |
jaw_type = d.get("jawline_type")
|
| 234 |
jaw_type_bonus_map = {"sharp": -16.0, "strong": -10.0, "soft": 0.0}
|
| 235 |
if jaw_type in jaw_type_bonus_map:
|
| 236 |
+
jaw_bucket_bonus = jaw_type_bonus_map[jaw_type]
|
| 237 |
+
|
| 238 |
+
jaw_angle_bonus = 0.0
|
| 239 |
jaw_angle = d.get("jawline_angle")
|
| 240 |
+
if isinstance(jaw_angle, (int, float)):
|
| 241 |
+
if jaw_angle <= 95:
|
| 242 |
+
jaw_angle_bonus = -22.0
|
| 243 |
+
elif jaw_angle < 145:
|
| 244 |
+
# Linear ramp from -22 at 95° to 0 at 145°.
|
| 245 |
+
jaw_angle_bonus = -22.0 * (145 - jaw_angle) / 50.0
|
| 246 |
+
# else stays 0
|
| 247 |
+
|
| 248 |
+
# Use whichever bonus is more pronounced (smaller / more
|
| 249 |
+
# negative number = bigger bonus).
|
| 250 |
+
jaw_bonus = min(jaw_bucket_bonus, jaw_angle_bonus)
|
| 251 |
if jaw_bonus:
|
| 252 |
score += jaw_bonus
|
| 253 |
breakdown["jaw_definition_bonus"] = round(jaw_bonus, 2)
|
analyzers/age_analyzer.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AgeAnalyzer — FairFace age classifier with softmax-weighted estimate.
|
| 3 |
+
|
| 4 |
+
Model
|
| 5 |
+
-----
|
| 6 |
+
- HF repo : dima806/fairface_age_image_detection
|
| 7 |
+
- Arch : Vision Transformer (ViT-B/16)
|
| 8 |
+
- Trained : FairFace dataset (race-balanced)
|
| 9 |
+
- Reported : ~59% top-1 accuracy across 9 age buckets
|
| 10 |
+
- License : Apache 2.0
|
| 11 |
+
|
| 12 |
+
Why this and not InsightFace's bundled genderage head
|
| 13 |
+
-----------------------------------------------------
|
| 14 |
+
InsightFace's age regression head systematically over-predicts for
|
| 15 |
+
certain face types — strong jaw, brow ridge, beard shadow, or just
|
| 16 |
+
poor lighting can make it call a 20-year-old "52". Piecewise
|
| 17 |
+
calibration helps with mild overshoot but can't recover when the
|
| 18 |
+
raw prediction is already 50+ years off.
|
| 19 |
+
|
| 20 |
+
FairFace uses softmax classification across 9 age buckets. Even when
|
| 21 |
+
wrong it's wrong by ~5-10 years, not 30+. We take the softmax-weighted
|
| 22 |
+
expected value across all buckets to get a smooth continuous number
|
| 23 |
+
that moves with confidence — rather than always snapping to a fixed
|
| 24 |
+
bucket midpoint.
|
| 25 |
+
|
| 26 |
+
Inputs
|
| 27 |
+
------
|
| 28 |
+
img_rgb : np.ndarray (H, W, 3) uint8. Typically a face crop produced
|
| 29 |
+
by `_crop_to_face` in app.py.
|
| 30 |
+
|
| 31 |
+
Outputs (dict)
|
| 32 |
+
--------------
|
| 33 |
+
age_estimate : softmax-weighted expected age (float, years)
|
| 34 |
+
age_range : argmax bucket as a string (e.g. "20-29")
|
| 35 |
+
age_confidence : argmax softmax score
|
| 36 |
+
age_distribution : full {bucket: prob} dict over all 9 buckets
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
from typing import Any
|
| 40 |
+
|
| 41 |
+
from PIL import Image
|
| 42 |
+
from transformers import pipeline
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
MODEL_ID = "dima806/fairface_age_image_detection"
|
| 46 |
+
|
| 47 |
+
AGE_LABELS = [
|
| 48 |
+
"0-2", "3-9", "10-19", "20-29", "30-39",
|
| 49 |
+
"40-49", "50-59", "60-69", "70+",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
# Midpoint per bucket; used to compute the softmax-weighted expected
|
| 53 |
+
# age. The 70+ bucket midpoint is a guess — there's no upper bound in
|
| 54 |
+
# the FairFace label space.
|
| 55 |
+
AGE_MIDPOINTS = {
|
| 56 |
+
"0-2": 1.0,
|
| 57 |
+
"3-9": 6.0,
|
| 58 |
+
"10-19": 14.5,
|
| 59 |
+
"20-29": 24.5,
|
| 60 |
+
"30-39": 34.5,
|
| 61 |
+
"40-49": 44.5,
|
| 62 |
+
"50-59": 54.5,
|
| 63 |
+
"60-69": 64.5,
|
| 64 |
+
"70+": 75.0,
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class AgeAnalyzer:
|
| 69 |
+
def __init__(self):
|
| 70 |
+
self.classifier = None
|
| 71 |
+
try:
|
| 72 |
+
self.classifier = pipeline("image-classification", model=MODEL_ID)
|
| 73 |
+
except Exception as exc:
|
| 74 |
+
print(f"[AgeAnalyzer] Failed to load {MODEL_ID}: {exc}")
|
| 75 |
+
|
| 76 |
+
def analyze(self, img_rgb) -> dict[str, Any]:
|
| 77 |
+
if self.classifier is None:
|
| 78 |
+
return self._empty_result()
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
pil = Image.fromarray(img_rgb)
|
| 82 |
+
# Pull all 9 buckets so we can compute the weighted estimate.
|
| 83 |
+
preds = self.classifier(pil, top_k=len(AGE_LABELS))
|
| 84 |
+
except Exception as exc:
|
| 85 |
+
print(f"[AgeAnalyzer] Prediction failed: {exc}")
|
| 86 |
+
return self._empty_result()
|
| 87 |
+
|
| 88 |
+
if not preds:
|
| 89 |
+
return self._empty_result()
|
| 90 |
+
|
| 91 |
+
# Normalise label casing and build the {bucket: prob} dict.
|
| 92 |
+
distribution = {label: 0.0 for label in AGE_LABELS}
|
| 93 |
+
for pred in preds:
|
| 94 |
+
label = self._normalize_label(pred["label"])
|
| 95 |
+
if label in distribution:
|
| 96 |
+
distribution[label] = round(float(pred["score"]), 3)
|
| 97 |
+
|
| 98 |
+
# Softmax-weighted expected age. Sum over (midpoint × prob).
|
| 99 |
+
# Lets the number slide between buckets when the model is
|
| 100 |
+
# uncertain — e.g. 80% confident 20-29, 20% 30-39 → ~26.5
|
| 101 |
+
# instead of snapping to either bucket's midpoint.
|
| 102 |
+
total_weight = sum(distribution.values()) or 1.0
|
| 103 |
+
weighted_age = sum(
|
| 104 |
+
AGE_MIDPOINTS[label] * prob
|
| 105 |
+
for label, prob in distribution.items()
|
| 106 |
+
) / total_weight
|
| 107 |
+
|
| 108 |
+
# Argmax bucket = the model's top guess; report that as
|
| 109 |
+
# `age_range` for legacy UI compatibility.
|
| 110 |
+
top = max(distribution.items(), key=lambda kv: kv[1])
|
| 111 |
+
top_label, top_score = top
|
| 112 |
+
|
| 113 |
+
return {
|
| 114 |
+
"age_estimate": round(float(weighted_age), 1),
|
| 115 |
+
"age_range": top_label,
|
| 116 |
+
"age_confidence": round(float(top_score), 3),
|
| 117 |
+
"age_distribution": distribution,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
@staticmethod
|
| 121 |
+
def _normalize_label(label: str) -> str:
|
| 122 |
+
"""Map model output to canonical AGE_LABELS entry."""
|
| 123 |
+
normalized = label.strip().lower()
|
| 124 |
+
if normalized == "more than 70":
|
| 125 |
+
return "70+"
|
| 126 |
+
return label if label in AGE_LABELS else label.strip()
|
| 127 |
+
|
| 128 |
+
@staticmethod
|
| 129 |
+
def _empty_result() -> dict[str, Any]:
|
| 130 |
+
return {
|
| 131 |
+
"age_estimate": 0.0,
|
| 132 |
+
"age_range": "unknown",
|
| 133 |
+
"age_confidence": 0.0,
|
| 134 |
+
"age_distribution": {label: 0.0 for label in AGE_LABELS},
|
| 135 |
+
}
|
analyzers/insightface_analyzer.py
CHANGED
|
@@ -1,30 +1,20 @@
|
|
| 1 |
"""
|
| 2 |
-
InsightFaceAnalyzer — face detection + ArcFace recognition
|
| 3 |
|
| 4 |
Model
|
| 5 |
-----
|
| 6 |
- Package : `insightface` (https://github.com/deepinsight/insightface)
|
| 7 |
- Bundle : buffalo_l (ResNet50@WebFace600K backbone, ONNX)
|
| 8 |
-
- Used here : SCRFD-10GF detector
|
| 9 |
-
landmarks
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
- Size : ~280 MB (ONNX, mixed FP16/FP32)
|
| 14 |
- License : weights research-only; code Apache 2.0
|
| 15 |
- Source : https://github.com/deepinsight/insightface/tree/master/python-package
|
| 16 |
|
| 17 |
-
Age calibration
|
| 18 |
-
---------------
|
| 19 |
-
InsightFace's `genderage` head was trained on a dataset that skews
|
| 20 |
-
adult-heavy, so it systematically overshoots young faces by 6-10 years
|
| 21 |
-
while being roughly accurate for older adults. A flat offset (which is
|
| 22 |
-
what we tried first) helps the old end and hurts the young end. We
|
| 23 |
-
apply a piecewise subtractive correction instead — heavy for predicted
|
| 24 |
-
ages under 30, lighter for 30-50, and none for 50+. Tune via
|
| 25 |
-
`AGE_OFFSET_YOUNG`, `AGE_OFFSET_MID` env vars if your population
|
| 26 |
-
skews differently from ours.
|
| 27 |
-
|
| 28 |
Inputs
|
| 29 |
------
|
| 30 |
img_rgb : np.ndarray (H, W, 3) uint8
|
|
@@ -34,20 +24,14 @@ Outputs (dict)
|
|
| 34 |
face_bbox : [x1, y1, x2, y2] in pixel coordinates
|
| 35 |
face_confidence : SCRFD detection score
|
| 36 |
face_embedding : list[float] of length 512 (ArcFace, L2-normalised)
|
| 37 |
-
age_estimate : calibrated age in years (float)
|
| 38 |
-
age_range : string bucket for legacy UI compatibility
|
| 39 |
-
age_confidence : 1.0 — InsightFace's head is regression-only
|
| 40 |
_insight_landmarks_2d : 106 2D points (internal, stripped from JSON)
|
| 41 |
|
| 42 |
Accuracy
|
| 43 |
--------
|
| 44 |
- Recognition (ArcFace via buffalo_l): 99.83% LFW, 96.21% IJB-B FAR=1e-4.
|
| 45 |
- Detection (SCRFD-10GF): >99% recall on WIDER FACE easy / medium.
|
| 46 |
-
- Age (informal): ~5 yr MAE after piecewise calibration. No published
|
| 47 |
-
benchmark from InsightFace for buffalo_l's age head specifically.
|
| 48 |
"""
|
| 49 |
|
| 50 |
-
import os
|
| 51 |
from typing import Any
|
| 52 |
|
| 53 |
import numpy as np
|
|
@@ -63,38 +47,6 @@ except ImportError:
|
|
| 63 |
|
| 64 |
MODEL_NAME = "buffalo_l"
|
| 65 |
|
| 66 |
-
# Piecewise age calibration. The genderage head over-predicts young
|
| 67 |
-
# adults heavily and older adults barely. Override at runtime if the
|
| 68 |
-
# default offsets don't match your user population.
|
| 69 |
-
AGE_OFFSET_YOUNG = float(os.environ.get("AGE_OFFSET_YOUNG", "6")) # raw < 30
|
| 70 |
-
AGE_OFFSET_MID = float(os.environ.get("AGE_OFFSET_MID", "3")) # 30 ≤ raw < 50
|
| 71 |
-
AGE_OFFSET_OLD = float(os.environ.get("AGE_OFFSET_OLD", "0")) # raw ≥ 50
|
| 72 |
-
|
| 73 |
-
# Same legacy bucket schema as before so UI rows showing `age_range`
|
| 74 |
-
# render whatever the source.
|
| 75 |
-
AGE_BUCKETS = [
|
| 76 |
-
(0, 3, "0-2"), (3, 10, "3-9"), (10, 20, "10-19"),
|
| 77 |
-
(20, 30, "20-29"), (30, 40, "30-39"), (40, 50, "40-49"),
|
| 78 |
-
(50, 60, "50-59"), (60, 70, "60-69"), (70, 200, "70+"),
|
| 79 |
-
]
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def _calibrate_age(raw: float) -> float:
|
| 83 |
-
"""Piecewise calibration on InsightFace's raw age regression.
|
| 84 |
-
|
| 85 |
-
Worked examples (with defaults 6 / 3 / 0):
|
| 86 |
-
raw 28 → 22 (a 20-yr-old often comes back as 28 raw)
|
| 87 |
-
raw 35 → 32
|
| 88 |
-
raw 55 → 55 (no correction, model is already fine here)
|
| 89 |
-
"""
|
| 90 |
-
if raw < 30:
|
| 91 |
-
offset = AGE_OFFSET_YOUNG
|
| 92 |
-
elif raw < 50:
|
| 93 |
-
offset = AGE_OFFSET_MID
|
| 94 |
-
else:
|
| 95 |
-
offset = AGE_OFFSET_OLD
|
| 96 |
-
return max(1.0, raw - offset)
|
| 97 |
-
|
| 98 |
|
| 99 |
class InsightFaceAnalyzer:
|
| 100 |
def __init__(self):
|
|
@@ -102,7 +54,7 @@ class InsightFaceAnalyzer:
|
|
| 102 |
if not HAS_INSIGHTFACE:
|
| 103 |
print(
|
| 104 |
"[InsightFaceAnalyzer] insightface package not installed; "
|
| 105 |
-
"detection
|
| 106 |
)
|
| 107 |
return
|
| 108 |
|
|
@@ -148,18 +100,10 @@ class InsightFaceAnalyzer:
|
|
| 148 |
else None
|
| 149 |
)
|
| 150 |
|
| 151 |
-
# Age — read raw genderage output, then piecewise calibrate.
|
| 152 |
-
raw_age = float(getattr(face, "age", 0.0))
|
| 153 |
-
age = _calibrate_age(raw_age)
|
| 154 |
-
|
| 155 |
return {
|
| 156 |
"face_bbox": bbox,
|
| 157 |
"face_confidence": round(float(face.det_score), 3),
|
| 158 |
"face_embedding": embedding,
|
| 159 |
-
"age_estimate": round(age, 1),
|
| 160 |
-
"age_raw": round(raw_age, 1), # for debugging the calibration
|
| 161 |
-
"age_range": self._bucket_age(age),
|
| 162 |
-
"age_confidence": 1.0,
|
| 163 |
# 106 2D landmarks (forehead, jaw, brows, eyes, nose, lips).
|
| 164 |
# Underscore-prefixed → stripped from JSON, available to
|
| 165 |
# downstream analyzers that want tighter face geometry.
|
|
@@ -170,22 +114,11 @@ class InsightFaceAnalyzer:
|
|
| 170 |
),
|
| 171 |
}
|
| 172 |
|
| 173 |
-
@staticmethod
|
| 174 |
-
def _bucket_age(age: float) -> str:
|
| 175 |
-
for lo, hi, label in AGE_BUCKETS:
|
| 176 |
-
if lo <= age < hi:
|
| 177 |
-
return label
|
| 178 |
-
return "unknown"
|
| 179 |
-
|
| 180 |
@staticmethod
|
| 181 |
def _empty_result() -> dict[str, Any]:
|
| 182 |
return {
|
| 183 |
"face_bbox": None,
|
| 184 |
"face_confidence": 0.0,
|
| 185 |
"face_embedding": None,
|
| 186 |
-
"age_estimate": 0.0,
|
| 187 |
-
"age_raw": 0.0,
|
| 188 |
-
"age_range": "unknown",
|
| 189 |
-
"age_confidence": 0.0,
|
| 190 |
"_insight_landmarks_2d": None,
|
| 191 |
}
|
|
|
|
| 1 |
"""
|
| 2 |
+
InsightFaceAnalyzer — face detection + ArcFace recognition embedding.
|
| 3 |
|
| 4 |
Model
|
| 5 |
-----
|
| 6 |
- Package : `insightface` (https://github.com/deepinsight/insightface)
|
| 7 |
- Bundle : buffalo_l (ResNet50@WebFace600K backbone, ONNX)
|
| 8 |
+
- Used here : SCRFD-10GF detector + ArcFace 512-d recognition + 106
|
| 9 |
+
2D landmarks. The bundle ALSO ships a genderage head,
|
| 10 |
+
but we ignore it: it routinely calls 20-year-olds "52"
|
| 11 |
+
and no calibration trick reliably undoes that drift.
|
| 12 |
+
Age comes from FairFace ViT (AgeAnalyzer), gender from
|
| 13 |
+
FairFace ViT (GenderAnalyzer).
|
| 14 |
- Size : ~280 MB (ONNX, mixed FP16/FP32)
|
| 15 |
- License : weights research-only; code Apache 2.0
|
| 16 |
- Source : https://github.com/deepinsight/insightface/tree/master/python-package
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
Inputs
|
| 19 |
------
|
| 20 |
img_rgb : np.ndarray (H, W, 3) uint8
|
|
|
|
| 24 |
face_bbox : [x1, y1, x2, y2] in pixel coordinates
|
| 25 |
face_confidence : SCRFD detection score
|
| 26 |
face_embedding : list[float] of length 512 (ArcFace, L2-normalised)
|
|
|
|
|
|
|
|
|
|
| 27 |
_insight_landmarks_2d : 106 2D points (internal, stripped from JSON)
|
| 28 |
|
| 29 |
Accuracy
|
| 30 |
--------
|
| 31 |
- Recognition (ArcFace via buffalo_l): 99.83% LFW, 96.21% IJB-B FAR=1e-4.
|
| 32 |
- Detection (SCRFD-10GF): >99% recall on WIDER FACE easy / medium.
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
|
|
|
|
| 35 |
from typing import Any
|
| 36 |
|
| 37 |
import numpy as np
|
|
|
|
| 47 |
|
| 48 |
MODEL_NAME = "buffalo_l"
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
class InsightFaceAnalyzer:
|
| 52 |
def __init__(self):
|
|
|
|
| 54 |
if not HAS_INSIGHTFACE:
|
| 55 |
print(
|
| 56 |
"[InsightFaceAnalyzer] insightface package not installed; "
|
| 57 |
+
"face detection and recognition will be unavailable."
|
| 58 |
)
|
| 59 |
return
|
| 60 |
|
|
|
|
| 100 |
else None
|
| 101 |
)
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return {
|
| 104 |
"face_bbox": bbox,
|
| 105 |
"face_confidence": round(float(face.det_score), 3),
|
| 106 |
"face_embedding": embedding,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
# 106 2D landmarks (forehead, jaw, brows, eyes, nose, lips).
|
| 108 |
# Underscore-prefixed → stripped from JSON, available to
|
| 109 |
# downstream analyzers that want tighter face geometry.
|
|
|
|
| 114 |
),
|
| 115 |
}
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
@staticmethod
|
| 118 |
def _empty_result() -> dict[str, Any]:
|
| 119 |
return {
|
| 120 |
"face_bbox": None,
|
| 121 |
"face_confidence": 0.0,
|
| 122 |
"face_embedding": None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
"_insight_landmarks_2d": None,
|
| 124 |
}
|
app.py
CHANGED
|
@@ -11,19 +11,23 @@ Pipeline (in execution order)
|
|
| 11 |
-----------------------------
|
| 12 |
1. InsightFaceAnalyzer InsightFace buffalo_l (ONNX). SCRFD
|
| 13 |
detection + ArcFace 512-d embedding +
|
| 14 |
-
106 landmarks
|
| 15 |
-
|
| 16 |
-
so we get a real softmax confidence.
|
| 17 |
|
| 18 |
2. LandmarkAnalyzer MediaPipe Face Landmarker. 478 3D
|
| 19 |
landmarks + 52 ARKit blendshapes →
|
| 20 |
geometric features, smiling, mouth_open.
|
| 21 |
|
| 22 |
-
3a.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
InsightFace gender head so we get a real
|
| 24 |
softmax confidence instead of argmax 1.0.
|
| 25 |
|
| 26 |
-
|
| 27 |
5-class ethnicity widened to a 7-bucket
|
| 28 |
schema for legacy compatibility.
|
| 29 |
|
|
@@ -94,6 +98,7 @@ from analyzers.color_analyzer import ColorAnalyzer
|
|
| 94 |
from analyzers.obstruction_analyzer import ObstructionAnalyzer
|
| 95 |
from analyzers.hair_type_analyzer import HairTypeAnalyzer
|
| 96 |
from analyzers.insightface_analyzer import InsightFaceAnalyzer
|
|
|
|
| 97 |
from analyzers.gender_analyzer import GenderAnalyzer
|
| 98 |
from analyzers.beauty_analyzer import BeautyAnalyzer
|
| 99 |
from analyzers.aesthetic_analyzer import AestheticAnalyzer
|
|
@@ -115,6 +120,7 @@ app.add_middleware(
|
|
| 115 |
# model-load cost; subsequent requests are warm.
|
| 116 |
insightface_analyzer: Optional[InsightFaceAnalyzer] = None
|
| 117 |
landmark_analyzer: Optional[LandmarkAnalyzer] = None
|
|
|
|
| 118 |
gender_analyzer: Optional[GenderAnalyzer] = None
|
| 119 |
ethnicity_analyzer: Optional[EthnicityAnalyzer] = None
|
| 120 |
parsing_analyzer: Optional[ParsingAnalyzer] = None
|
|
@@ -156,7 +162,7 @@ def get_analyzers():
|
|
| 156 |
requests are warm.
|
| 157 |
"""
|
| 158 |
global insightface_analyzer, landmark_analyzer
|
| 159 |
-
global gender_analyzer, ethnicity_analyzer
|
| 160 |
global parsing_analyzer, emotion_analyzer, color_analyzer
|
| 161 |
global obstruction_analyzer, hair_type_analyzer
|
| 162 |
global beauty_analyzer, aesthetic_analyzer
|
|
@@ -169,6 +175,10 @@ def get_analyzers():
|
|
| 169 |
logger.info("Loading MediaPipe Face Landmarker...")
|
| 170 |
landmark_analyzer = LandmarkAnalyzer()
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
if gender_analyzer is None:
|
| 173 |
logger.info("Loading FairFace gender analyzer...")
|
| 174 |
gender_analyzer = GenderAnalyzer()
|
|
@@ -206,6 +216,7 @@ def get_analyzers():
|
|
| 206 |
return (
|
| 207 |
insightface_analyzer,
|
| 208 |
landmark_analyzer,
|
|
|
|
| 209 |
gender_analyzer,
|
| 210 |
ethnicity_analyzer,
|
| 211 |
parsing_analyzer,
|
|
@@ -259,6 +270,7 @@ def _run_pipeline(img_array: np.ndarray) -> dict:
|
|
| 259 |
(
|
| 260 |
insight,
|
| 261 |
landmarks,
|
|
|
|
| 262 |
genders,
|
| 263 |
ethnicities,
|
| 264 |
parsing,
|
|
@@ -272,9 +284,10 @@ def _run_pipeline(img_array: np.ndarray) -> dict:
|
|
| 272 |
|
| 273 |
results: dict = {}
|
| 274 |
|
| 275 |
-
# Step 1: InsightFace — detection
|
| 276 |
-
# 106 landmarks
|
| 277 |
-
#
|
|
|
|
| 278 |
logger.info("Running InsightFace analysis...")
|
| 279 |
insight_results = insight.analyze(img_array)
|
| 280 |
results.update(insight_results)
|
|
@@ -291,12 +304,19 @@ def _run_pipeline(img_array: np.ndarray) -> dict:
|
|
| 291 |
landmark_results = landmarks.analyze(img_array)
|
| 292 |
results.update(landmark_results)
|
| 293 |
|
| 294 |
-
# Step 3a: FairFace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
# score so the UI can show graded uncertainty.
|
| 296 |
logger.info("Running FairFace gender analysis...")
|
| 297 |
results.update(genders.analyze(face_crop))
|
| 298 |
|
| 299 |
-
# Step
|
| 300 |
logger.info("Running ethnicity analysis...")
|
| 301 |
results.update(ethnicities.analyze(face_crop))
|
| 302 |
|
|
|
|
| 11 |
-----------------------------
|
| 12 |
1. InsightFaceAnalyzer InsightFace buffalo_l (ONNX). SCRFD
|
| 13 |
detection + ArcFace 512-d embedding +
|
| 14 |
+
106 landmarks. Age & gender delegated
|
| 15 |
+
to FairFace ViTs (steps 3a / 3b).
|
|
|
|
| 16 |
|
| 17 |
2. LandmarkAnalyzer MediaPipe Face Landmarker. 478 3D
|
| 18 |
landmarks + 52 ARKit blendshapes →
|
| 19 |
geometric features, smiling, mouth_open.
|
| 20 |
|
| 21 |
+
3a. AgeAnalyzer FairFace ViT, softmax-weighted across 9
|
| 22 |
+
age buckets. Replaces the InsightFace
|
| 23 |
+
age regression which routinely missed
|
| 24 |
+
by 30+ years on certain face types.
|
| 25 |
+
|
| 26 |
+
3b. GenderAnalyzer FairFace ViT (~93.4% acc). Replaces the
|
| 27 |
InsightFace gender head so we get a real
|
| 28 |
softmax confidence instead of argmax 1.0.
|
| 29 |
|
| 30 |
+
3c. EthnicityAnalyzer cledoux42/Ethnicity_Test_v003 ViT.
|
| 31 |
5-class ethnicity widened to a 7-bucket
|
| 32 |
schema for legacy compatibility.
|
| 33 |
|
|
|
|
| 98 |
from analyzers.obstruction_analyzer import ObstructionAnalyzer
|
| 99 |
from analyzers.hair_type_analyzer import HairTypeAnalyzer
|
| 100 |
from analyzers.insightface_analyzer import InsightFaceAnalyzer
|
| 101 |
+
from analyzers.age_analyzer import AgeAnalyzer
|
| 102 |
from analyzers.gender_analyzer import GenderAnalyzer
|
| 103 |
from analyzers.beauty_analyzer import BeautyAnalyzer
|
| 104 |
from analyzers.aesthetic_analyzer import AestheticAnalyzer
|
|
|
|
| 120 |
# model-load cost; subsequent requests are warm.
|
| 121 |
insightface_analyzer: Optional[InsightFaceAnalyzer] = None
|
| 122 |
landmark_analyzer: Optional[LandmarkAnalyzer] = None
|
| 123 |
+
age_analyzer: Optional[AgeAnalyzer] = None
|
| 124 |
gender_analyzer: Optional[GenderAnalyzer] = None
|
| 125 |
ethnicity_analyzer: Optional[EthnicityAnalyzer] = None
|
| 126 |
parsing_analyzer: Optional[ParsingAnalyzer] = None
|
|
|
|
| 162 |
requests are warm.
|
| 163 |
"""
|
| 164 |
global insightface_analyzer, landmark_analyzer
|
| 165 |
+
global age_analyzer, gender_analyzer, ethnicity_analyzer
|
| 166 |
global parsing_analyzer, emotion_analyzer, color_analyzer
|
| 167 |
global obstruction_analyzer, hair_type_analyzer
|
| 168 |
global beauty_analyzer, aesthetic_analyzer
|
|
|
|
| 175 |
logger.info("Loading MediaPipe Face Landmarker...")
|
| 176 |
landmark_analyzer = LandmarkAnalyzer()
|
| 177 |
|
| 178 |
+
if age_analyzer is None:
|
| 179 |
+
logger.info("Loading FairFace age analyzer...")
|
| 180 |
+
age_analyzer = AgeAnalyzer()
|
| 181 |
+
|
| 182 |
if gender_analyzer is None:
|
| 183 |
logger.info("Loading FairFace gender analyzer...")
|
| 184 |
gender_analyzer = GenderAnalyzer()
|
|
|
|
| 216 |
return (
|
| 217 |
insightface_analyzer,
|
| 218 |
landmark_analyzer,
|
| 219 |
+
age_analyzer,
|
| 220 |
gender_analyzer,
|
| 221 |
ethnicity_analyzer,
|
| 222 |
parsing_analyzer,
|
|
|
|
| 270 |
(
|
| 271 |
insight,
|
| 272 |
landmarks,
|
| 273 |
+
ages,
|
| 274 |
genders,
|
| 275 |
ethnicities,
|
| 276 |
parsing,
|
|
|
|
| 284 |
|
| 285 |
results: dict = {}
|
| 286 |
|
| 287 |
+
# Step 1: InsightFace — detection + ArcFace 512-d recognition
|
| 288 |
+
# embedding + 106 landmarks. Age and gender both delegated to
|
| 289 |
+
# FairFace ViTs in step 3 because the bundled genderage head was
|
| 290 |
+
# too inaccurate (called 20-yr-olds "52" in real photos).
|
| 291 |
logger.info("Running InsightFace analysis...")
|
| 292 |
insight_results = insight.analyze(img_array)
|
| 293 |
results.update(insight_results)
|
|
|
|
| 304 |
landmark_results = landmarks.analyze(img_array)
|
| 305 |
results.update(landmark_results)
|
| 306 |
|
| 307 |
+
# Step 3a: FairFace age. Softmax-weighted estimate across 9
|
| 308 |
+
# buckets — slides between bucket midpoints when the model is
|
| 309 |
+
# uncertain instead of snapping. Much more reliable than
|
| 310 |
+
# InsightFace's regression head on younger faces.
|
| 311 |
+
logger.info("Running FairFace age analysis...")
|
| 312 |
+
results.update(ages.analyze(face_crop))
|
| 313 |
+
|
| 314 |
+
# Step 3b: FairFace gender. Provides a real softmax confidence
|
| 315 |
# score so the UI can show graded uncertainty.
|
| 316 |
logger.info("Running FairFace gender analysis...")
|
| 317 |
results.update(genders.analyze(face_crop))
|
| 318 |
|
| 319 |
+
# Step 3c: ethnicity classifier — likes a tighter face crop.
|
| 320 |
logger.info("Running ethnicity analysis...")
|
| 321 |
results.update(ethnicities.analyze(face_crop))
|
| 322 |
|