Spaces:
Sleeping
Sleeping
Evan Li commited on
Commit Β·
8aee038
1
Parent(s): 57be97e
Farl
Browse files- Dockerfile +4 -0
- analyzers/attribute_analyzer.py +48 -37
- analyzers/parsing_analyzer.py +1 -1
- architecture.md +1707 -0
- requirements.txt +3 -0
Dockerfile
CHANGED
|
@@ -18,6 +18,10 @@ RUN mkdir -p models && \
|
|
| 18 |
wget -q -O models/face_landmarker.task \
|
| 19 |
"https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
COPY . .
|
| 22 |
|
| 23 |
EXPOSE 7860
|
|
|
|
| 18 |
wget -q -O models/face_landmarker.task \
|
| 19 |
"https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
|
| 20 |
|
| 21 |
+
# Pre-download FaRL (face-tuned CLIP ViT-B/16) weights for attribute classifier
|
| 22 |
+
RUN wget -q -O models/FaRL-Base-Patch16-LAIONFace20M-ep64.pth \
|
| 23 |
+
"https://github.com/FacePerceiver/FaRL/releases/download/pretrained_weights/FaRL-Base-Patch16-LAIONFace20M-ep64.pth"
|
| 24 |
+
|
| 25 |
COPY . .
|
| 26 |
|
| 27 |
EXPOSE 7860
|
analyzers/attribute_analyzer.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
runs a fresh 2-way softmax per binary pair. Group labels (hair color,
|
| 12 |
-
hair texture) get their own N-way softmax. All scores are now independent
|
| 13 |
-
of how many other labels we happen to be asking about.
|
| 14 |
"""
|
| 15 |
|
|
|
|
|
|
|
| 16 |
from typing import Any
|
| 17 |
|
|
|
|
| 18 |
import torch
|
| 19 |
from PIL import Image
|
| 20 |
-
from transformers import CLIPModel, CLIPProcessor
|
| 21 |
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
PAIRS = {
|
| 26 |
"wearing_glasses": ("wearing eyeglasses", "not wearing eyeglasses"),
|
|
@@ -59,8 +61,6 @@ PAIRS = {
|
|
| 59 |
HAIR_COLOR_LABELS = ["black hair", "blond hair", "brown hair", "gray hair"]
|
| 60 |
HAIR_TEXTURE_LABELS = ["straight hair", "wavy hair", "curly hair"]
|
| 61 |
|
| 62 |
-
# Some pairs default to False unless CLIP is confidently past this threshold.
|
| 63 |
-
# Stops borderline cases from being flipped to True on a 51/49 split.
|
| 64 |
ACCESSORY_THRESHOLD = 0.65
|
| 65 |
ACCESSORY_KEYS = {
|
| 66 |
"wearing_earrings", "wearing_necklace", "wearing_necktie", "wearing_hat",
|
|
@@ -76,32 +76,47 @@ class AttributeAnalyzer:
|
|
| 76 |
def __init__(self):
|
| 77 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 78 |
self.model = None
|
| 79 |
-
self.
|
| 80 |
try:
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
except Exception as exc:
|
| 84 |
-
print(f"[AttributeAnalyzer] Failed to load
|
| 85 |
|
| 86 |
@torch.no_grad()
|
| 87 |
def analyze(self, img_rgb) -> dict[str, Any]:
|
| 88 |
-
if self.model is None or self.
|
| 89 |
return self._empty_result()
|
| 90 |
|
| 91 |
pil = Image.fromarray(img_rgb)
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
image_inputs = self.processor(images=pil, return_tensors="pt").to(self.device)
|
| 95 |
-
image_features = self.model.get_image_features(**image_inputs)
|
| 96 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
| 97 |
|
| 98 |
-
# Per-pair scoring: each pair gets its own independent 2-way softmax.
|
| 99 |
pair_scores: dict[str, float] = {}
|
| 100 |
for key, (positive, negative) in PAIRS.items():
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
-
# Group scoring (N-way softmax within each group).
|
| 105 |
color_scores = self._group_softmax(
|
| 106 |
image_features, [_prompt(x) for x in HAIR_COLOR_LABELS]
|
| 107 |
)
|
|
@@ -143,10 +158,8 @@ class AttributeAnalyzer:
|
|
| 143 |
|
| 144 |
@torch.no_grad()
|
| 145 |
def _softmax_positive(self, image_features: torch.Tensor, prompts: list[str]) -> float:
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
).to(self.device)
|
| 149 |
-
text_features = self.model.get_text_features(**text_inputs)
|
| 150 |
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
| 151 |
logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
|
| 152 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
@@ -154,10 +167,8 @@ class AttributeAnalyzer:
|
|
| 154 |
|
| 155 |
@torch.no_grad()
|
| 156 |
def _group_softmax(self, image_features: torch.Tensor, prompts: list[str]) -> list[float]:
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
).to(self.device)
|
| 160 |
-
text_features = self.model.get_text_features(**text_inputs)
|
| 161 |
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
| 162 |
logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
|
| 163 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
|
|
| 1 |
"""
|
| 2 |
+
FaRL-based facial attribute classification.
|
| 3 |
+
|
| 4 |
+
Same CLIP ViT-B/16 architecture as before, but loaded with FaRL weights
|
| 5 |
+
(CVPR 2022) which were pretrained on LAION-Face β the 50M face-text-pair
|
| 6 |
+
subset of LAION-400M β instead of OpenAI's generic web crawl. The encoder
|
| 7 |
+
discriminates facial attributes much better while keeping the prompt-pair
|
| 8 |
+
zero-shot interface intact.
|
| 9 |
+
|
| 10 |
+
Falls back to vanilla OpenAI CLIP ViT-B/16 if the FaRL .pth is missing.
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
+
import os
|
| 14 |
+
from pathlib import Path
|
| 15 |
from typing import Any
|
| 16 |
|
| 17 |
+
import clip
|
| 18 |
import torch
|
| 19 |
from PIL import Image
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
+
CLIP_ARCH = "ViT-B/16"
|
| 23 |
+
FARL_WEIGHTS_PATH = os.environ.get(
|
| 24 |
+
"FARL_WEIGHTS_PATH", "models/FaRL-Base-Patch16-LAIONFace20M-ep64.pth"
|
| 25 |
+
)
|
| 26 |
|
| 27 |
PAIRS = {
|
| 28 |
"wearing_glasses": ("wearing eyeglasses", "not wearing eyeglasses"),
|
|
|
|
| 61 |
HAIR_COLOR_LABELS = ["black hair", "blond hair", "brown hair", "gray hair"]
|
| 62 |
HAIR_TEXTURE_LABELS = ["straight hair", "wavy hair", "curly hair"]
|
| 63 |
|
|
|
|
|
|
|
| 64 |
ACCESSORY_THRESHOLD = 0.65
|
| 65 |
ACCESSORY_KEYS = {
|
| 66 |
"wearing_earrings", "wearing_necklace", "wearing_necktie", "wearing_hat",
|
|
|
|
| 76 |
def __init__(self):
|
| 77 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 78 |
self.model = None
|
| 79 |
+
self.preprocess = None
|
| 80 |
try:
|
| 81 |
+
model, preprocess = clip.load(CLIP_ARCH, device="cpu")
|
| 82 |
+
|
| 83 |
+
weights_path = Path(FARL_WEIGHTS_PATH)
|
| 84 |
+
if weights_path.exists():
|
| 85 |
+
farl_state = torch.load(weights_path, map_location="cpu")
|
| 86 |
+
state = farl_state.get("state_dict", farl_state)
|
| 87 |
+
missing, unexpected = model.load_state_dict(state, strict=False)
|
| 88 |
+
print(
|
| 89 |
+
f"[AttributeAnalyzer] Loaded FaRL weights from {weights_path} "
|
| 90 |
+
f"(missing={len(missing)}, unexpected={len(unexpected)})"
|
| 91 |
+
)
|
| 92 |
+
else:
|
| 93 |
+
print(
|
| 94 |
+
f"[AttributeAnalyzer] FaRL weights not found at {weights_path}; "
|
| 95 |
+
"falling back to vanilla OpenAI CLIP ViT-B/16"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Force float32 so per-pair softmax math is stable on both CPU and CUDA.
|
| 99 |
+
self.model = model.float().to(self.device).eval()
|
| 100 |
+
self.preprocess = preprocess
|
| 101 |
except Exception as exc:
|
| 102 |
+
print(f"[AttributeAnalyzer] Failed to load model: {exc}")
|
| 103 |
|
| 104 |
@torch.no_grad()
|
| 105 |
def analyze(self, img_rgb) -> dict[str, Any]:
|
| 106 |
+
if self.model is None or self.preprocess is None:
|
| 107 |
return self._empty_result()
|
| 108 |
|
| 109 |
pil = Image.fromarray(img_rgb)
|
| 110 |
+
image_tensor = self.preprocess(pil).unsqueeze(0).to(self.device)
|
| 111 |
+
image_features = self.model.encode_image(image_tensor)
|
|
|
|
|
|
|
| 112 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
| 113 |
|
|
|
|
| 114 |
pair_scores: dict[str, float] = {}
|
| 115 |
for key, (positive, negative) in PAIRS.items():
|
| 116 |
+
pair_scores[key] = self._softmax_positive(
|
| 117 |
+
image_features, [_prompt(positive), _prompt(negative)]
|
| 118 |
+
)
|
| 119 |
|
|
|
|
| 120 |
color_scores = self._group_softmax(
|
| 121 |
image_features, [_prompt(x) for x in HAIR_COLOR_LABELS]
|
| 122 |
)
|
|
|
|
| 158 |
|
| 159 |
@torch.no_grad()
|
| 160 |
def _softmax_positive(self, image_features: torch.Tensor, prompts: list[str]) -> float:
|
| 161 |
+
text_tokens = clip.tokenize(prompts).to(self.device)
|
| 162 |
+
text_features = self.model.encode_text(text_tokens)
|
|
|
|
|
|
|
| 163 |
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
| 164 |
logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
|
| 165 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
|
|
| 167 |
|
| 168 |
@torch.no_grad()
|
| 169 |
def _group_softmax(self, image_features: torch.Tensor, prompts: list[str]) -> list[float]:
|
| 170 |
+
text_tokens = clip.tokenize(prompts).to(self.device)
|
| 171 |
+
text_features = self.model.encode_text(text_tokens)
|
|
|
|
|
|
|
| 172 |
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
| 173 |
logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
|
| 174 |
probs = torch.softmax(logits, dim=-1)[0]
|
analyzers/parsing_analyzer.py
CHANGED
|
@@ -21,6 +21,7 @@ primary source for lip geometry/color in color_analyzer.
|
|
| 21 |
"""
|
| 22 |
|
| 23 |
from typing import Any
|
|
|
|
| 24 |
|
| 25 |
import cv2
|
| 26 |
import numpy as np
|
|
@@ -62,7 +63,6 @@ class ParsingAnalyzer:
|
|
| 62 |
self.processor = None
|
| 63 |
self.model = None
|
| 64 |
try:
|
| 65 |
-
self.processor = SegformerImageProcessor.from_pretrained(MODEL_ID)
|
| 66 |
self.model = SegformerForSemanticSegmentation.from_pretrained(MODEL_ID)
|
| 67 |
self.model.to(self.device).eval()
|
| 68 |
except Exception as exc:
|
|
|
|
| 21 |
"""
|
| 22 |
|
| 23 |
from typing import Any
|
| 24 |
+
import warnings
|
| 25 |
|
| 26 |
import cv2
|
| 27 |
import numpy as np
|
|
|
|
| 63 |
self.processor = None
|
| 64 |
self.model = None
|
| 65 |
try:
|
|
|
|
| 66 |
self.model = SegformerForSemanticSegmentation.from_pretrained(MODEL_ID)
|
| 67 |
self.model.to(self.device).eval()
|
| 68 |
except Exception as exc:
|
architecture.md
ADDED
|
@@ -0,0 +1,1707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HCP Face Analysis β Architecture Plan
|
| 2 |
+
|
| 3 |
+
## Revised Architecture & Best Models for Maximum Feature Coverage
|
| 4 |
+
|
| 5 |
+
Since the codebase is flexible and can use more languages and frameworks, we go beyond the Supabase Edge Function constraint to find the **absolute best models** for the full feature list.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Recommended Architecture: Python Microservice Sidecar
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
β CURRENT STACK β
|
| 14 |
+
β Next.js Frontend βββΊ Supabase (Auth, DB, Storage) β
|
| 15 |
+
ββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
β
|
| 17 |
+
βΌ
|
| 18 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
β NEW: Python Face Analysis Microservice β
|
| 20 |
+
β (Railway.app / Render.com / Hugging Face Spaces) β
|
| 21 |
+
β FREE TIER: 512MB RAM, shared CPU β
|
| 22 |
+
β β
|
| 23 |
+
β FastAPI Server β
|
| 24 |
+
β βββ MediaPipe Face Landmarker (478 landmarks, 4MB) β
|
| 25 |
+
β βββ InsightFace Buffalo_SC (recognition + attrs, 30MB) β
|
| 26 |
+
β βββ FairFace (age/gender/race, 90MB) β
|
| 27 |
+
β βββ HuggingFace ViT models (attributes, ~50MB each) β
|
| 28 |
+
β βββ BiSeNet (face parsing/segmentation, 50MB) β
|
| 29 |
+
β βββ Custom geometric analysis (your feature list) β
|
| 30 |
+
β β
|
| 31 |
+
β Total: ~250MB models (loaded lazily) β
|
| 32 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
**Why this is better:** Python gives access to the **entire deep learning ecosystem** β every model on HuggingFace, every research paper's pretrained weights. Free-tier hosting on Railway/Render gives 512MB RAM and enough CPU for per-request inference.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## Best Models Per Feature Category
|
| 40 |
+
|
| 41 |
+
### Tier 1: Core Models (Must Have)
|
| 42 |
+
|
| 43 |
+
#### 1. MediaPipe Face Landmarker β Geometric Features
|
| 44 |
+
- **478 3D landmarks + 52 blendshapes**
|
| 45 |
+
- **Size:** 4MB
|
| 46 |
+
- **Covers:** Face shape, jawline, chin, cheekbones, forehead, eye shape, eye spacing, eye size, eyebrow shape, nose shape, lip shape, mouth width, dimples, facial asymmetry
|
| 47 |
+
- **GitHub:** https://github.com/google-ai-edge/mediapipe
|
| 48 |
+
- **Python:** `pip install mediapipe`
|
| 49 |
+
- **Accuracy:** State-of-the-art landmark detection, handles 30Β° head rotation well
|
| 50 |
+
|
| 51 |
+
#### 2. InsightFace Buffalo_SC β Lightweight Recognition + Age/Gender
|
| 52 |
+
- **Size:** ~30MB (smallest Buffalo variant)
|
| 53 |
+
- **LFW Accuracy:** 99.5%
|
| 54 |
+
- **Covers:** Face detection, age, gender, face embedding (for recognition), 2D landmarks
|
| 55 |
+
- **GitHub:** https://github.com/deepinsight/insightface
|
| 56 |
+
- **Weights:** Auto-downloaded via `insightface.app.FaceAnalysis(name='buffalo_sc')`
|
| 57 |
+
- **Why not Buffalo_L:** 320MB is overkill; Buffalo_SC is 90% as accurate at 1/10th the size
|
| 58 |
+
|
| 59 |
+
#### 3. FairFace β Age, Gender, Race (Most Accurate)
|
| 60 |
+
- **Size:** ~90MB (ResNet-34)
|
| 61 |
+
- **Accuracy:** 93.4% race, 94.2% gender, MAE 3.4 years for age
|
| 62 |
+
- **Covers:** Age (9 buckets), gender, race (7 categories: White, Black, Latino, East Asian, Southeast Asian, Indian, Middle Eastern)
|
| 63 |
+
- **GitHub:** https://github.com/dchen236/FairFace
|
| 64 |
+
- **Weights:** https://drive.google.com/file/d/1xSfJQWMhm3AVlJYcPcabGO_bj1kDB0xw (res34_fair_align_multi_7_20190809.pt)
|
| 65 |
+
- **Why over InsightFace for this:** FairFace is specifically trained for fair demographic classification across races, not biased toward any group
|
| 66 |
+
|
| 67 |
+
#### 4. HSEmotion (EfficientNet) β Emotion Recognition
|
| 68 |
+
- **Size:** ~20MB
|
| 69 |
+
- **Accuracy:** 66.5% on AffectNet-8 (state-of-the-art), 8 emotions
|
| 70 |
+
- **Covers:** Angry, contempt, disgust, fear, happy, neutral, sad, surprise
|
| 71 |
+
- **GitHub:** https://github.com/HSE-asavchenko/face-emotion-recognition
|
| 72 |
+
- **Weights:** Available via `timm` or direct download from repo
|
| 73 |
+
- **Why over face-api.js:** Significantly more accurate, trained on AffectNet (largest emotion dataset)
|
| 74 |
+
|
| 75 |
+
### Tier 2: Specialized Models
|
| 76 |
+
|
| 77 |
+
#### 5. BiSeNet Face Parsing β Facial Segmentation
|
| 78 |
+
- **Size:** ~50MB
|
| 79 |
+
- **Covers:** Skin region, left/right eyebrow, left/right eye, nose, upper/lower lip, inner mouth, hair, left/right ear, neck, cloth, hat, earrings, glasses, background
|
| 80 |
+
- **GitHub:** https://github.com/zllrunning/face-parsing.PyTorch
|
| 81 |
+
- **Weights:** https://drive.google.com/file/d/154JgKpzCPW82qINcVieuPH3fZ2e0P812
|
| 82 |
+
- **Why this matters:** Precisely segments hair, skin, eyebrows for color analysis, facial hair detection, glasses detection, and wrinkle analysis
|
| 83 |
+
|
| 84 |
+
#### 6. microsoft/swin-base-patch4-window7-224-in22k fine-tuned for facial attributes
|
| 85 |
+
- **HuggingFace:** Various CelebA-trained attribute classifiers
|
| 86 |
+
- Specifically: https://huggingface.co/nateraw/vit-age-classifier (age)
|
| 87 |
+
- Specifically: https://huggingface.co/rizvandwiki/gender-classification-2 (gender)
|
| 88 |
+
|
| 89 |
+
#### 7. CelebA Attribute Classifier (Custom Multi-Label)
|
| 90 |
+
- **Dataset:** CelebA has 40 binary attributes already labeled
|
| 91 |
+
- Train a lightweight EfficientNet-B0 (~20MB) on CelebA for:
|
| 92 |
+
- `Attractive`, `Bald`, `Bangs`, `Big_Lips`, `Big_Nose`, `Black_Hair`, `Blond_Hair`, `Brown_Hair`, `Bushy_Eyebrows`, `Chubby`, `Double_Chin`, `Eyeglasses`, `Goatee`, `Gray_Hair`, `Heavy_Makeup`, `High_Cheekbones`, `Male`, `Mouth_Slightly_Open`, `Mustache`, `Narrow_Eyes`, `No_Beard`, `Oval_Face`, `Pointy_Nose`, `Receding_Hairline`, `Sideburns`, `Smiling`, `Straight_Hair`, `Wavy_Hair`, `Wearing_Hat`, `Young`
|
| 93 |
+
- **Pre-trained option:** https://github.com/dchen236/FairFace has CelebA-trained models
|
| 94 |
+
- **Better pre-trained option:** https://huggingface.co/jnferreira/attribute-prediction-celebA
|
| 95 |
+
|
| 96 |
+
#### 8. Hair Segmentation + Color Analysis
|
| 97 |
+
- **Model:** MODNet for matting + BiSeNet for hair segmentation
|
| 98 |
+
- **GitHub (MODNet):** https://github.com/ZHKKKe/MODNet (~25MB)
|
| 99 |
+
- Post-segmentation: K-means clustering on hair pixels for color
|
| 100 |
+
|
| 101 |
+
#### 9. Skin Analysis (Wrinkles, Acne, etc.)
|
| 102 |
+
- **Model:** https://huggingface.co/imfarzanansari/skin-disease-detection (for acne/skin conditions)
|
| 103 |
+
- **For wrinkles:** Edge detection (Canny/Sobel) on forehead/eye regions from BiSeNet parsing β no model needed
|
| 104 |
+
- **For freckles/moles:** Blob detection on skin regions from BiSeNet parsing
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Complete Feature Coverage Map
|
| 109 |
+
|
| 110 |
+
| Feature | Model/Method | Confidence |
|
| 111 |
+
|---------|-------------|------------|
|
| 112 |
+
| **Face shape** (oval, round, square, heart, diamond, oblong, triangle) | MediaPipe landmarks geometric ratios + CelebA (`Oval_Face`) | ββββ |
|
| 113 |
+
| **Jawline** (sharp, soft, strong) | MediaPipe jaw landmark angles | ββββ |
|
| 114 |
+
| **Chin** (receding, pointed, cleft, wide) | MediaPipe chin landmarks + depth (z) | βββ |
|
| 115 |
+
| **Cheekbones** (high, flat, full, hollow) | MediaPipe landmark z-depth + CelebA (`High_Cheekbones`, `Chubby`) | ββββ |
|
| 116 |
+
| **Forehead** (broad, narrow) | MediaPipe forehead span ratio | ββββ |
|
| 117 |
+
| **Eye shape** (almond, round, hooded, monolid, upturned, downturned) | MediaPipe eyelid curvature + corner angles | ββββ |
|
| 118 |
+
| **Eye spacing** (wide-set, close-set) | MediaPipe interpupillary distance ratio | βββββ |
|
| 119 |
+
| **Eye size** (large, small) | MediaPipe eye area / face area | βββββ |
|
| 120 |
+
| **Deep-set / protruding eyes** | MediaPipe landmark z-depth at eye region | βββ |
|
| 121 |
+
| **Eye color** (brown, blue, green, hazel) | Iris crop β HSV color histogram + KNN | ββββ |
|
| 122 |
+
| **Dark under-eyes / eye bags** | BiSeNet skin parsing β brightness analysis under eyes | βββ |
|
| 123 |
+
| **Crow's feet** | Canny edge detection on BiSeNet-parsed outer eye skin | βββ |
|
| 124 |
+
| **Eyebrow shape** (arched, straight, bushy, thick, thin) | MediaPipe brow landmarks + CelebA (`Bushy_Eyebrows`, `Arched_Eyebrows`) | ββββ |
|
| 125 |
+
| **Unibrow** | MediaPipe inner brow distance + pixel analysis between brows | ββββ |
|
| 126 |
+
| **Nose shape** (straight, aquiline, button, upturned, wide, narrow) | MediaPipe nose landmarks + CelebA (`Big_Nose`, `Pointy_Nose`) | ββββ |
|
| 127 |
+
| **Nose bridge** (flat, high) | MediaPipe z-depth at nasal bridge | βββ |
|
| 128 |
+
| **Nostrils** (wide, narrow) | MediaPipe nostril landmark width ratio | ββββ |
|
| 129 |
+
| **Lips** (full, thin) | MediaPipe lip landmarks + CelebA (`Big_Lips`) | ββββ |
|
| 130 |
+
| **Mouth width** | MediaPipe mouth corner distance ratio | βββββ |
|
| 131 |
+
| **Cupid's bow** | MediaPipe upper lip curvature analysis | βββ |
|
| 132 |
+
| **Teeth** (gap, crooked, straight, overbite, underbite) | Mouth crop when smiling β custom classifier or rule-based | ββ |
|
| 133 |
+
| **Dimples** | MediaPipe blendshapes during smile + cheek region analysis | βββ |
|
| 134 |
+
| **Smile lines** | Edge detection on nasolabial region | βββ |
|
| 135 |
+
| **Asymmetrical smile** | MediaPipe left/right smile blendshape difference | ββββ |
|
| 136 |
+
| **Hair type** (straight, wavy, curly, coily) | BiSeNet hair segmentation β texture frequency (FFT) + CelebA (`Straight_Hair`, `Wavy_Hair`) | βββ |
|
| 137 |
+
| **Hair length** (short, long, bald) | BiSeNet hair mask area + CelebA (`Bald`, `Bangs`) | ββββ |
|
| 138 |
+
| **Hair color** (black, brown, blonde, red, gray, dyed) | BiSeNet hair mask β K-means color clustering + CelebA (`Black_Hair`, `Brown_Hair`, `Blond_Hair`, `Gray_Hair`) | ββββ |
|
| 139 |
+
| **Receding hairline / widow's peak** | BiSeNet hair boundary analysis + CelebA (`Receding_Hairline`) | βββ |
|
| 140 |
+
| **Beard/facial hair** (full, stubble, goatee, mustache, sideburns, clean-shaven) | BiSeNet parsing lower face + CelebA (`5_o_Clock_Shadow`, `Goatee`, `Mustache`, `No_Beard`, `Sideburns`) | ββββ |
|
| 141 |
+
| **Skin tone** (light, medium, dark) | BiSeNet skin parsing β mean LAB brightness | βββββ |
|
| 142 |
+
| **Freckles** | BiSeNet skin mask β small blob detection (contrast) | βββ |
|
| 143 |
+
| **Moles / birthmark** | BiSeNet skin mask β dark blob detection | βββ |
|
| 144 |
+
| **Scars** | BiSeNet skin mask β linear edge anomaly detection | ββ |
|
| 145 |
+
| **Acne** | BiSeNet skin mask β red blob detection or HuggingFace skin model | βββ |
|
| 146 |
+
| **Wrinkles / forehead lines** | BiSeNet forehead mask β Gabor filter or Canny edges | βββ |
|
| 147 |
+
| **Facial asymmetry** | MediaPipe left/right landmark mirror distance | βββββ |
|
| 148 |
+
| **Prominent Adam's apple** | Neck region detection (limited accuracy) | β |
|
| 149 |
+
| **Glasses** | CelebA (`Eyeglasses`) + BiSeNet parsing | βββββ |
|
| 150 |
+
| **Age** | FairFace (MAE 3.4 years) | βββββ |
|
| 151 |
+
| **Gender** | FairFace (94.2%) | βββββ |
|
| 152 |
+
| **Race** | FairFace (93.4%, 7 categories) | βββββ |
|
| 153 |
+
| **Emotion** | HSEmotion (66.5% AffectNet-8, SOTA) | ββββ |
|
| 154 |
+
|
| 155 |
+
---
|
| 156 |
+
|
| 157 |
+
## Model Comparison Table
|
| 158 |
+
|
| 159 |
+
| Model | Accuracy (LFW) | Size | Runs in Deno/Browser? | Feature Depth | Notes |
|
| 160 |
+
|-------|----------------|------|----------------------|---------------|-------|
|
| 161 |
+
| **DeepFace** (Python) | 97.4% (VGG-Face) | 500MB+ | β No (Python only) | Age, gender, race, emotion | Too large, wrong runtime |
|
| 162 |
+
| **InsightFace Buffalo_L** | 99.8% (LFW) | ~320MB | β No (Python/C++) | Landmarks, age, gender | Too large for edge |
|
| 163 |
+
| **InsightFace MobileFaceNet** | 99.5% (LFW) | ~4MB | β οΈ ONNX possible | Recognition only, no attributes | Very small but limited features |
|
| 164 |
+
| **MediaPipe Face Landmarker** | N/A (landmark model) | ~4MB | β
Yes (TFJS/WASM) | 478 landmarks, blendshapes | Best for geometric features |
|
| 165 |
+
| **face-api.js** | 99.2% (LFW) | ~6MB (all models) | β
Yes (TFJS) | Age, gender, emotion, 68 landmarks | Browser/Node.js ready |
|
| 166 |
+
| **ONNX FER+ (emotion)** | ~85% (FER2013) | ~2MB | β
Yes (ONNX.js) | Emotion only | Supplement model |
|
| 167 |
+
| **HuggingFace ViT models** | Varies | 50-350MB | β οΈ ONNX export possible | Age, gender, various classifiers | Some fit under 50MB |
|
| 168 |
+
|
| 169 |
+
---
|
| 170 |
+
|
| 171 |
+
## Free Hosting Options for the Python Microservice
|
| 172 |
+
|
| 173 |
+
| Platform | Free Tier | RAM | Cold Start | Best For |
|
| 174 |
+
|----------|-----------|-----|------------|----------|
|
| 175 |
+
| **Hugging Face Spaces** | Unlimited | 2GB CPU | ~15s | Best free option, runs Gradio/FastAPI |
|
| 176 |
+
| **Railway.app** | $5 credit/month | 512MB | ~5s | Good for always-on API |
|
| 177 |
+
| **Render.com** | 750 hrs/month | 512MB | ~30s | Spins down after 15min inactivity |
|
| 178 |
+
| **Google Cloud Run** | 2M requests/month | 512MB | ~10s | Best scaling, pay-per-request |
|
| 179 |
+
| **Fly.io** | 3 shared VMs | 256MB | ~3s | Low latency, always on |
|
| 180 |
+
|
| 181 |
+
**Recommendation: Hugging Face Spaces** β 2GB RAM free, pre-installed ML libraries, no cold start limits, and you can use their Inference API for some models without even hosting.
|
| 182 |
+
|
| 183 |
+
---
|
| 184 |
+
|
| 185 |
+
## Full Implementation
|
| 186 |
+
|
| 187 |
+
### Python Microservice
|
| 188 |
+
|
| 189 |
+
#### requirements.txt
|
| 190 |
+
|
| 191 |
+
```
|
| 192 |
+
fastapi==0.115.0
|
| 193 |
+
uvicorn==0.30.0
|
| 194 |
+
python-multipart==0.0.9
|
| 195 |
+
mediapipe==0.10.14
|
| 196 |
+
insightface==0.7.3
|
| 197 |
+
onnxruntime==1.18.0
|
| 198 |
+
torch==2.3.0
|
| 199 |
+
torchvision==0.18.0
|
| 200 |
+
Pillow==10.4.0
|
| 201 |
+
numpy==1.26.4
|
| 202 |
+
opencv-python-headless==4.10.0.84
|
| 203 |
+
scipy==1.13.0
|
| 204 |
+
scikit-learn==1.5.0
|
| 205 |
+
huggingface-hub==0.23.0
|
| 206 |
+
```
|
| 207 |
+
|
| 208 |
+
#### face-service/app.py
|
| 209 |
+
|
| 210 |
+
```python
|
| 211 |
+
"""
|
| 212 |
+
Face Analysis Microservice
|
| 213 |
+
Combines multiple models for comprehensive facial attribute detection.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
import io
|
| 217 |
+
import logging
|
| 218 |
+
from typing import Optional
|
| 219 |
+
|
| 220 |
+
import cv2
|
| 221 |
+
import numpy as np
|
| 222 |
+
from fastapi import FastAPI, File, HTTPException, UploadFile
|
| 223 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 224 |
+
from PIL import Image
|
| 225 |
+
|
| 226 |
+
from analyzers.landmark_analyzer import LandmarkAnalyzer
|
| 227 |
+
from analyzers.demographic_analyzer import DemographicAnalyzer
|
| 228 |
+
from analyzers.attribute_analyzer import AttributeAnalyzer
|
| 229 |
+
from analyzers.parsing_analyzer import ParsingAnalyzer
|
| 230 |
+
from analyzers.emotion_analyzer import EmotionAnalyzer
|
| 231 |
+
from analyzers.color_analyzer import ColorAnalyzer
|
| 232 |
+
|
| 233 |
+
logging.basicConfig(level=logging.INFO)
|
| 234 |
+
logger = logging.getLogger(__name__)
|
| 235 |
+
|
| 236 |
+
app = FastAPI(title="Face Analysis Service", version="2.0.0")
|
| 237 |
+
|
| 238 |
+
app.add_middleware(
|
| 239 |
+
CORSMiddleware,
|
| 240 |
+
allow_origins=["*"], # Restrict in production
|
| 241 |
+
allow_credentials=True,
|
| 242 |
+
allow_methods=["*"],
|
| 243 |
+
allow_headers=["*"],
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# Initialize analyzers lazily
|
| 247 |
+
landmark_analyzer: Optional[LandmarkAnalyzer] = None
|
| 248 |
+
demographic_analyzer: Optional[DemographicAnalyzer] = None
|
| 249 |
+
attribute_analyzer: Optional[AttributeAnalyzer] = None
|
| 250 |
+
parsing_analyzer: Optional[ParsingAnalyzer] = None
|
| 251 |
+
emotion_analyzer: Optional[EmotionAnalyzer] = None
|
| 252 |
+
color_analyzer: Optional[ColorAnalyzer] = None
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def get_analyzers():
|
| 256 |
+
global landmark_analyzer, demographic_analyzer, attribute_analyzer
|
| 257 |
+
global parsing_analyzer, emotion_analyzer, color_analyzer
|
| 258 |
+
|
| 259 |
+
if landmark_analyzer is None:
|
| 260 |
+
logger.info("Loading MediaPipe landmarks...")
|
| 261 |
+
landmark_analyzer = LandmarkAnalyzer()
|
| 262 |
+
|
| 263 |
+
if demographic_analyzer is None:
|
| 264 |
+
logger.info("Loading FairFace demographics...")
|
| 265 |
+
demographic_analyzer = DemographicAnalyzer()
|
| 266 |
+
|
| 267 |
+
if attribute_analyzer is None:
|
| 268 |
+
logger.info("Loading CelebA attribute classifier...")
|
| 269 |
+
attribute_analyzer = AttributeAnalyzer()
|
| 270 |
+
|
| 271 |
+
if parsing_analyzer is None:
|
| 272 |
+
logger.info("Loading BiSeNet face parser...")
|
| 273 |
+
parsing_analyzer = ParsingAnalyzer()
|
| 274 |
+
|
| 275 |
+
if emotion_analyzer is None:
|
| 276 |
+
logger.info("Loading HSEmotion...")
|
| 277 |
+
emotion_analyzer = EmotionAnalyzer()
|
| 278 |
+
|
| 279 |
+
if color_analyzer is None:
|
| 280 |
+
color_analyzer = ColorAnalyzer()
|
| 281 |
+
|
| 282 |
+
return (
|
| 283 |
+
landmark_analyzer,
|
| 284 |
+
demographic_analyzer,
|
| 285 |
+
attribute_analyzer,
|
| 286 |
+
parsing_analyzer,
|
| 287 |
+
emotion_analyzer,
|
| 288 |
+
color_analyzer,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
@app.get("/health")
|
| 293 |
+
async def health():
|
| 294 |
+
return {"status": "ok"}
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
@app.post("/analyze")
|
| 298 |
+
async def analyze_face(file: UploadFile = File(...)):
|
| 299 |
+
"""Comprehensive face analysis endpoint."""
|
| 300 |
+
try:
|
| 301 |
+
contents = await file.read()
|
| 302 |
+
image = Image.open(io.BytesIO(contents)).convert("RGB")
|
| 303 |
+
img_array = np.array(image)
|
| 304 |
+
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
| 305 |
+
|
| 306 |
+
(
|
| 307 |
+
landmarks,
|
| 308 |
+
demographics,
|
| 309 |
+
attributes,
|
| 310 |
+
parsing,
|
| 311 |
+
emotions,
|
| 312 |
+
colors,
|
| 313 |
+
) = get_analyzers()
|
| 314 |
+
|
| 315 |
+
results = {}
|
| 316 |
+
|
| 317 |
+
# 1. MediaPipe Landmarks β geometric features
|
| 318 |
+
logger.info("Running landmark analysis...")
|
| 319 |
+
landmark_results = landmarks.analyze(img_array)
|
| 320 |
+
results.update(landmark_results)
|
| 321 |
+
|
| 322 |
+
# 2. FairFace β age, gender, race
|
| 323 |
+
logger.info("Running demographic analysis...")
|
| 324 |
+
demo_results = demographics.analyze(img_array)
|
| 325 |
+
results.update(demo_results)
|
| 326 |
+
|
| 327 |
+
# 3. CelebA attributes β 40 binary facial attributes
|
| 328 |
+
logger.info("Running attribute analysis...")
|
| 329 |
+
attr_results = attributes.analyze(img_array)
|
| 330 |
+
results.update(attr_results)
|
| 331 |
+
|
| 332 |
+
# 4. BiSeNet face parsing β segmentation masks
|
| 333 |
+
logger.info("Running face parsing...")
|
| 334 |
+
parse_results = parsing.analyze(img_bgr)
|
| 335 |
+
results.update(parse_results)
|
| 336 |
+
|
| 337 |
+
# 5. HSEmotion β emotion classification
|
| 338 |
+
logger.info("Running emotion analysis...")
|
| 339 |
+
emo_results = emotions.analyze(img_array)
|
| 340 |
+
results.update(emo_results)
|
| 341 |
+
|
| 342 |
+
# 6. Color analysis using parsing masks
|
| 343 |
+
logger.info("Running color analysis...")
|
| 344 |
+
color_results = colors.analyze(
|
| 345 |
+
img_array,
|
| 346 |
+
skin_mask=parse_results.get("_skin_mask"),
|
| 347 |
+
hair_mask=parse_results.get("_hair_mask"),
|
| 348 |
+
landmark_data=landmark_results.get("_raw_landmarks"),
|
| 349 |
+
)
|
| 350 |
+
results.update(color_results)
|
| 351 |
+
|
| 352 |
+
# Remove internal fields
|
| 353 |
+
results = {k: v for k, v in results.items() if not k.startswith("_")}
|
| 354 |
+
|
| 355 |
+
return {"success": True, "data": results}
|
| 356 |
+
|
| 357 |
+
except Exception as e:
|
| 358 |
+
logger.error(f"Analysis failed: {e}", exc_info=True)
|
| 359 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 360 |
+
```
|
| 361 |
+
|
| 362 |
+
#### face-service/analyzers/landmark_analyzer.py
|
| 363 |
+
|
| 364 |
+
```python
|
| 365 |
+
"""
|
| 366 |
+
MediaPipe Face Landmarker β 478 3D landmarks + 52 blendshapes
|
| 367 |
+
Derives geometric facial features from landmark positions.
|
| 368 |
+
"""
|
| 369 |
+
|
| 370 |
+
import math
|
| 371 |
+
from typing import Any
|
| 372 |
+
|
| 373 |
+
import mediapipe as mp
|
| 374 |
+
import numpy as np
|
| 375 |
+
from mediapipe.tasks import python as mp_python
|
| 376 |
+
from mediapipe.tasks.python import vision
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
class LandmarkAnalyzer:
|
| 380 |
+
def __init__(self):
|
| 381 |
+
base_options = mp_python.BaseOptions(
|
| 382 |
+
model_asset_path=self._download_model()
|
| 383 |
+
)
|
| 384 |
+
options = vision.FaceLandmarkerOptions(
|
| 385 |
+
base_options=base_options,
|
| 386 |
+
output_face_blendshapes=True,
|
| 387 |
+
output_facial_transformation_matrixes=True,
|
| 388 |
+
num_faces=1,
|
| 389 |
+
)
|
| 390 |
+
self.detector = vision.FaceLandmarker.create_from_options(options)
|
| 391 |
+
|
| 392 |
+
def _download_model(self) -> str:
|
| 393 |
+
import urllib.request
|
| 394 |
+
import os
|
| 395 |
+
|
| 396 |
+
model_path = "models/face_landmarker.task"
|
| 397 |
+
if not os.path.exists(model_path):
|
| 398 |
+
os.makedirs("models", exist_ok=True)
|
| 399 |
+
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
|
| 400 |
+
urllib.request.urlretrieve(url, model_path)
|
| 401 |
+
return model_path
|
| 402 |
+
|
| 403 |
+
def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
|
| 404 |
+
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
|
| 405 |
+
result = self.detector.detect(mp_image)
|
| 406 |
+
|
| 407 |
+
if not result.face_landmarks:
|
| 408 |
+
return {"error": "No face detected by MediaPipe"}
|
| 409 |
+
|
| 410 |
+
landmarks = result.face_landmarks[0]
|
| 411 |
+
lm = [{"x": l.x, "y": l.y, "z": l.z} for l in landmarks]
|
| 412 |
+
|
| 413 |
+
blendshapes = {}
|
| 414 |
+
if result.face_blendshapes:
|
| 415 |
+
for bs in result.face_blendshapes[0]:
|
| 416 |
+
blendshapes[bs.category_name] = round(bs.score, 4)
|
| 417 |
+
|
| 418 |
+
attrs = {}
|
| 419 |
+
attrs["_raw_landmarks"] = lm
|
| 420 |
+
|
| 421 |
+
# === Face Shape ===
|
| 422 |
+
face_height = self._dist(lm[10], lm[152])
|
| 423 |
+
face_width = self._dist(lm[234], lm[454])
|
| 424 |
+
jaw_width = self._dist(lm[172], lm[397])
|
| 425 |
+
cheekbone_width = self._dist(lm[93], lm[323])
|
| 426 |
+
forehead_width = self._dist(lm[54], lm[284])
|
| 427 |
+
|
| 428 |
+
wh_ratio = face_width / face_height if face_height > 0 else 1
|
| 429 |
+
jaw_to_face = jaw_width / face_width if face_width > 0 else 1
|
| 430 |
+
forehead_to_jaw = forehead_width / jaw_width if jaw_width > 0 else 1
|
| 431 |
+
cheek_to_jaw = cheekbone_width / jaw_width if jaw_width > 0 else 1
|
| 432 |
+
|
| 433 |
+
if wh_ratio > 0.85 and jaw_to_face > 0.75:
|
| 434 |
+
attrs["face_shape"] = "round"
|
| 435 |
+
elif wh_ratio > 0.8 and jaw_to_face > 0.8 and forehead_to_jaw < 1.1:
|
| 436 |
+
attrs["face_shape"] = "square"
|
| 437 |
+
elif wh_ratio < 0.75:
|
| 438 |
+
attrs["face_shape"] = "oblong"
|
| 439 |
+
elif forehead_to_jaw > 1.3:
|
| 440 |
+
attrs["face_shape"] = "heart"
|
| 441 |
+
elif cheek_to_jaw > 1.25 and forehead_to_jaw < 1.15:
|
| 442 |
+
attrs["face_shape"] = "diamond"
|
| 443 |
+
elif forehead_to_jaw < 0.85:
|
| 444 |
+
attrs["face_shape"] = "triangle"
|
| 445 |
+
else:
|
| 446 |
+
attrs["face_shape"] = "oval"
|
| 447 |
+
|
| 448 |
+
attrs["face_shape_metrics"] = {
|
| 449 |
+
"width_height_ratio": round(wh_ratio, 3),
|
| 450 |
+
"jaw_to_face_ratio": round(jaw_to_face, 3),
|
| 451 |
+
"forehead_to_jaw_ratio": round(forehead_to_jaw, 3),
|
| 452 |
+
"cheekbone_to_jaw_ratio": round(cheek_to_jaw, 3),
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
# === Forehead ===
|
| 456 |
+
forehead_ratio = forehead_width / face_width if face_width > 0 else 0.6
|
| 457 |
+
attrs["forehead_width"] = (
|
| 458 |
+
"broad" if forehead_ratio > 0.7
|
| 459 |
+
else "narrow" if forehead_ratio < 0.55
|
| 460 |
+
else "average"
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# === Jawline ===
|
| 464 |
+
jaw_angle = self._jaw_angle(lm)
|
| 465 |
+
attrs["jawline_angle"] = round(jaw_angle, 1)
|
| 466 |
+
if jaw_angle < 110:
|
| 467 |
+
attrs["jawline_type"] = "sharp"
|
| 468 |
+
elif jaw_angle > 140:
|
| 469 |
+
attrs["jawline_type"] = "soft"
|
| 470 |
+
elif jaw_to_face > 0.75:
|
| 471 |
+
attrs["jawline_type"] = "strong"
|
| 472 |
+
else:
|
| 473 |
+
attrs["jawline_type"] = "soft"
|
| 474 |
+
|
| 475 |
+
# === Chin ===
|
| 476 |
+
chin_width = self._dist(lm[175], lm[396])
|
| 477 |
+
chin_ratio = chin_width / jaw_width if jaw_width > 0 else 0.4
|
| 478 |
+
attrs["chin_type"] = (
|
| 479 |
+
"pointed" if chin_ratio < 0.3
|
| 480 |
+
else "wide" if chin_ratio > 0.5
|
| 481 |
+
else "normal"
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
# === Cheekbones ===
|
| 485 |
+
cheek_z = (lm[93]["z"] + lm[323]["z"]) / 2
|
| 486 |
+
attrs["cheekbone_prominence"] = (
|
| 487 |
+
"high" if cheek_z < -0.04
|
| 488 |
+
else "flat" if cheek_z > 0.0
|
| 489 |
+
else "moderate"
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
# Hollow vs full cheeks (blendshape-assisted)
|
| 493 |
+
cheek_puff = blendshapes.get("cheekPuff", 0)
|
| 494 |
+
cheek_squint_l = blendshapes.get("cheekSquintLeft", 0)
|
| 495 |
+
cheek_squint_r = blendshapes.get("cheekSquintRight", 0)
|
| 496 |
+
if cheek_puff > 0.3:
|
| 497 |
+
attrs["cheek_fullness"] = "full"
|
| 498 |
+
elif cheek_z > -0.01:
|
| 499 |
+
attrs["cheek_fullness"] = "hollow"
|
| 500 |
+
else:
|
| 501 |
+
attrs["cheek_fullness"] = "normal"
|
| 502 |
+
|
| 503 |
+
# === Eyes ===
|
| 504 |
+
left_eye_top = lm[159]
|
| 505 |
+
left_eye_bottom = lm[145]
|
| 506 |
+
left_eye_inner = lm[133]
|
| 507 |
+
left_eye_outer = lm[33]
|
| 508 |
+
eye_openness = self._dist(left_eye_top, left_eye_bottom)
|
| 509 |
+
eye_width_val = self._dist(left_eye_inner, left_eye_outer)
|
| 510 |
+
eye_ratio = eye_openness / eye_width_val if eye_width_val > 0 else 0.3
|
| 511 |
+
|
| 512 |
+
outer_angle = left_eye_outer["y"] - left_eye_inner["y"]
|
| 513 |
+
if outer_angle < -0.012:
|
| 514 |
+
attrs["eye_shape"] = "upturned"
|
| 515 |
+
elif outer_angle > 0.012:
|
| 516 |
+
attrs["eye_shape"] = "downturned"
|
| 517 |
+
elif eye_ratio > 0.38:
|
| 518 |
+
attrs["eye_shape"] = "round"
|
| 519 |
+
elif eye_ratio < 0.2:
|
| 520 |
+
attrs["eye_shape"] = "hooded"
|
| 521 |
+
else:
|
| 522 |
+
attrs["eye_shape"] = "almond"
|
| 523 |
+
|
| 524 |
+
# Deep-set vs protruding
|
| 525 |
+
eye_z = (lm[159]["z"] + lm[145]["z"]) / 2
|
| 526 |
+
nose_bridge_z = lm[6]["z"]
|
| 527 |
+
if eye_z > nose_bridge_z + 0.02:
|
| 528 |
+
attrs["eye_depth"] = "deep-set"
|
| 529 |
+
elif eye_z < nose_bridge_z - 0.01:
|
| 530 |
+
attrs["eye_depth"] = "protruding"
|
| 531 |
+
else:
|
| 532 |
+
attrs["eye_depth"] = "normal"
|
| 533 |
+
|
| 534 |
+
# Eye spacing
|
| 535 |
+
if len(lm) > 473: # Iris landmarks available
|
| 536 |
+
inter_pupillary = self._dist(lm[468], lm[473])
|
| 537 |
+
else:
|
| 538 |
+
inter_pupillary = self._dist(lm[133], lm[362])
|
| 539 |
+
ip_ratio = inter_pupillary / face_width if face_width > 0 else 0.35
|
| 540 |
+
attrs["eye_spacing"] = (
|
| 541 |
+
"wide-set" if ip_ratio > 0.38
|
| 542 |
+
else "close-set" if ip_ratio < 0.28
|
| 543 |
+
else "average"
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
# Eye size
|
| 547 |
+
right_eye_top = lm[386]
|
| 548 |
+
right_eye_bottom = lm[374]
|
| 549 |
+
right_eye_inner = lm[362]
|
| 550 |
+
right_eye_outer = lm[263]
|
| 551 |
+
r_eye_area = self._dist(right_eye_top, right_eye_bottom) * self._dist(right_eye_inner, right_eye_outer)
|
| 552 |
+
l_eye_area = eye_openness * eye_width_val
|
| 553 |
+
avg_eye_area = (l_eye_area + r_eye_area) / 2
|
| 554 |
+
face_area = face_width * face_height
|
| 555 |
+
eye_size_ratio = avg_eye_area / face_area if face_area > 0 else 0.015
|
| 556 |
+
attrs["eye_size"] = (
|
| 557 |
+
"large" if eye_size_ratio > 0.02
|
| 558 |
+
else "small" if eye_size_ratio < 0.012
|
| 559 |
+
else "average"
|
| 560 |
+
)
|
| 561 |
+
|
| 562 |
+
# Eye blink (closed vs open)
|
| 563 |
+
blink_l = blendshapes.get("eyeBlinkLeft", 0)
|
| 564 |
+
blink_r = blendshapes.get("eyeBlinkRight", 0)
|
| 565 |
+
attrs["eyes_open"] = (blink_l + blink_r) / 2 < 0.5
|
| 566 |
+
|
| 567 |
+
# === Eyebrows ===
|
| 568 |
+
brow_mid_l = lm[105]
|
| 569 |
+
brow_outer_l = lm[46]
|
| 570 |
+
brow_inner_l = lm[70]
|
| 571 |
+
brow_to_eye = self._dist(brow_mid_l, lm[159])
|
| 572 |
+
brow_arch_ratio = brow_to_eye / eye_openness if eye_openness > 0 else 1.5
|
| 573 |
+
|
| 574 |
+
attrs["eyebrow_arch_height"] = (
|
| 575 |
+
"high" if brow_arch_ratio > 2.2
|
| 576 |
+
else "low" if brow_arch_ratio < 1.3
|
| 577 |
+
else "average"
|
| 578 |
+
)
|
| 579 |
+
|
| 580 |
+
# Brow curvature
|
| 581 |
+
mid_y = brow_mid_l["y"]
|
| 582 |
+
avg_end_y = (brow_inner_l["y"] + brow_outer_l["y"]) / 2
|
| 583 |
+
curvature = mid_y - avg_end_y
|
| 584 |
+
if abs(curvature) < 0.003:
|
| 585 |
+
attrs["eyebrow_shape"] = "straight"
|
| 586 |
+
elif curvature < -0.008:
|
| 587 |
+
attrs["eyebrow_shape"] = "arched"
|
| 588 |
+
else:
|
| 589 |
+
attrs["eyebrow_shape"] = "flat"
|
| 590 |
+
|
| 591 |
+
# Eyebrow thickness (vertical span of brow landmarks)
|
| 592 |
+
brow_top = lm[66] # Top of left brow
|
| 593 |
+
brow_bottom = lm[105] # Bottom of left brow
|
| 594 |
+
brow_thickness = self._dist(brow_top, brow_bottom)
|
| 595 |
+
attrs["eyebrow_thickness"] = (
|
| 596 |
+
"thick" if brow_thickness > 0.015
|
| 597 |
+
else "thin" if brow_thickness < 0.008
|
| 598 |
+
else "medium"
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
# Unibrow detection
|
| 602 |
+
inner_brow_dist = self._dist(lm[70], lm[300])
|
| 603 |
+
attrs["possible_unibrow"] = inner_brow_dist < 0.04
|
| 604 |
+
|
| 605 |
+
# === Nose ===
|
| 606 |
+
nose_bridge_top = lm[6]
|
| 607 |
+
nose_tip = lm[1]
|
| 608 |
+
nose_bottom = lm[2]
|
| 609 |
+
left_nostril = lm[129]
|
| 610 |
+
right_nostril = lm[358]
|
| 611 |
+
nostril_w = self._dist(left_nostril, right_nostril)
|
| 612 |
+
|
| 613 |
+
nw_ratio = nostril_w / face_width if face_width > 0 else 0.24
|
| 614 |
+
attrs["nostril_width"] = (
|
| 615 |
+
"wide" if nw_ratio > 0.28
|
| 616 |
+
else "narrow" if nw_ratio < 0.2
|
| 617 |
+
else "average"
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
tip_angle = nose_tip["y"] - nose_bottom["y"]
|
| 621 |
+
if tip_angle < -0.005:
|
| 622 |
+
attrs["nose_shape"] = "upturned"
|
| 623 |
+
elif tip_angle > 0.01:
|
| 624 |
+
attrs["nose_shape"] = "aquiline"
|
| 625 |
+
elif nw_ratio > 0.28:
|
| 626 |
+
attrs["nose_shape"] = "wide"
|
| 627 |
+
elif nw_ratio < 0.2:
|
| 628 |
+
attrs["nose_shape"] = "narrow"
|
| 629 |
+
else:
|
| 630 |
+
attrs["nose_shape"] = "straight"
|
| 631 |
+
|
| 632 |
+
attrs["nose_bridge"] = (
|
| 633 |
+
"high" if nose_bridge_top["z"] < -0.05
|
| 634 |
+
else "flat" if nose_bridge_top["z"] > 0.0
|
| 635 |
+
else "average"
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
attrs["nose_tip_shape"] = (
|
| 639 |
+
"pointed" if nose_tip["z"] < nose_bottom["z"] - 0.01
|
| 640 |
+
else "rounded"
|
| 641 |
+
)
|
| 642 |
+
|
| 643 |
+
# === Lips & Mouth ===
|
| 644 |
+
upper_lip_top = lm[0]
|
| 645 |
+
upper_lip_bottom = lm[13]
|
| 646 |
+
lower_lip_top = lm[14]
|
| 647 |
+
lower_lip_bottom = lm[17]
|
| 648 |
+
mouth_left = lm[61]
|
| 649 |
+
mouth_right = lm[291]
|
| 650 |
+
|
| 651 |
+
upper_lip_h = self._dist(upper_lip_top, upper_lip_bottom)
|
| 652 |
+
lower_lip_h = self._dist(lower_lip_top, lower_lip_bottom)
|
| 653 |
+
total_lip_h = upper_lip_h + lower_lip_h
|
| 654 |
+
mouth_w = self._dist(mouth_left, mouth_right)
|
| 655 |
+
|
| 656 |
+
lip_ratio = total_lip_h / mouth_w if mouth_w > 0 else 0.3
|
| 657 |
+
attrs["lip_fullness"] = (
|
| 658 |
+
"full" if lip_ratio > 0.38
|
| 659 |
+
else "thin" if lip_ratio < 0.22
|
| 660 |
+
else "average"
|
| 661 |
+
)
|
| 662 |
+
|
| 663 |
+
attrs["lip_balance"] = (
|
| 664 |
+
"top-heavy" if upper_lip_h > lower_lip_h * 1.2
|
| 665 |
+
else "bottom-heavy" if lower_lip_h > upper_lip_h * 1.2
|
| 666 |
+
else "balanced"
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
mw_ratio = mouth_w / face_width if face_width > 0 else 0.37
|
| 670 |
+
attrs["mouth_width"] = (
|
| 671 |
+
"wide" if mw_ratio > 0.42
|
| 672 |
+
else "small" if mw_ratio < 0.32
|
| 673 |
+
else "average"
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
# Cupid's bow
|
| 677 |
+
cupid_left = lm[37]
|
| 678 |
+
cupid_center = lm[0]
|
| 679 |
+
cupid_right = lm[267]
|
| 680 |
+
bow_depth = cupid_center["y"] - (cupid_left["y"] + cupid_right["y"]) / 2
|
| 681 |
+
attrs["cupids_bow"] = (
|
| 682 |
+
"defined" if bow_depth > 0.005
|
| 683 |
+
else "subtle" if bow_depth > 0.002
|
| 684 |
+
else "flat"
|
| 685 |
+
)
|
| 686 |
+
|
| 687 |
+
# Smile
|
| 688 |
+
smile_l = blendshapes.get("mouthSmileLeft", 0)
|
| 689 |
+
smile_r = blendshapes.get("mouthSmileRight", 0)
|
| 690 |
+
attrs["smiling"] = (smile_l + smile_r) / 2 > 0.4
|
| 691 |
+
attrs["smile_asymmetry"] = round(abs(smile_l - smile_r), 3)
|
| 692 |
+
|
| 693 |
+
# Dimples (heuristic: strong smile with low cheek puff)
|
| 694 |
+
attrs["possible_dimples"] = (
|
| 695 |
+
(smile_l > 0.5 or smile_r > 0.5) and cheek_puff < 0.2
|
| 696 |
+
)
|
| 697 |
+
|
| 698 |
+
# === Facial Asymmetry ===
|
| 699 |
+
symmetry_pairs = [
|
| 700 |
+
(33, 263), (133, 362), (70, 300), (93, 323), (172, 397),
|
| 701 |
+
(61, 291), (159, 386), (145, 374), (46, 276),
|
| 702 |
+
]
|
| 703 |
+
asymmetry_sum = 0.0
|
| 704 |
+
for li, ri in symmetry_pairs:
|
| 705 |
+
left_dist = abs(lm[li]["x"] - 0.5)
|
| 706 |
+
right_dist = abs(lm[ri]["x"] - 0.5)
|
| 707 |
+
asymmetry_sum += abs(left_dist - right_dist)
|
| 708 |
+
attrs["facial_asymmetry_score"] = round(
|
| 709 |
+
min(asymmetry_sum / len(symmetry_pairs) / 0.05, 1.0), 3
|
| 710 |
+
)
|
| 711 |
+
|
| 712 |
+
# === Head Pose (from transformation matrix) ===
|
| 713 |
+
attrs["blendshapes"] = blendshapes
|
| 714 |
+
|
| 715 |
+
return attrs
|
| 716 |
+
|
| 717 |
+
def _dist(self, a: dict, b: dict) -> float:
|
| 718 |
+
return math.sqrt(
|
| 719 |
+
(a["x"] - b["x"]) ** 2
|
| 720 |
+
+ (a["y"] - b["y"]) ** 2
|
| 721 |
+
+ (a.get("z", 0) - b.get("z", 0)) ** 2
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
+
def _jaw_angle(self, lm: list[dict]) -> float:
|
| 725 |
+
chin = lm[152]
|
| 726 |
+
left_jaw = lm[172]
|
| 727 |
+
right_jaw = lm[397]
|
| 728 |
+
v1 = (left_jaw["x"] - chin["x"], left_jaw["y"] - chin["y"])
|
| 729 |
+
v2 = (right_jaw["x"] - chin["x"], right_jaw["y"] - chin["y"])
|
| 730 |
+
dot = v1[0] * v2[0] + v1[1] * v2[1]
|
| 731 |
+
mag1 = math.sqrt(v1[0] ** 2 + v1[1] ** 2)
|
| 732 |
+
mag2 = math.sqrt(v2[0] ** 2 + v2[1] ** 2)
|
| 733 |
+
if mag1 * mag2 == 0:
|
| 734 |
+
return 120.0
|
| 735 |
+
cos_angle = max(-1, min(1, dot / (mag1 * mag2)))
|
| 736 |
+
return math.acos(cos_angle) * (180 / math.pi)
|
| 737 |
+
```
|
| 738 |
+
|
| 739 |
+
#### face-service/analyzers/demographic_analyzer.py
|
| 740 |
+
|
| 741 |
+
```python
|
| 742 |
+
"""
|
| 743 |
+
FairFace β Age, Gender, Race prediction
|
| 744 |
+
Most fair and accurate demographic classifier.
|
| 745 |
+
"""
|
| 746 |
+
|
| 747 |
+
import os
|
| 748 |
+
from typing import Any
|
| 749 |
+
|
| 750 |
+
import cv2
|
| 751 |
+
import numpy as np
|
| 752 |
+
import torch
|
| 753 |
+
import torchvision.transforms as transforms
|
| 754 |
+
from huggingface_hub import hf_hub_download
|
| 755 |
+
from PIL import Image
|
| 756 |
+
from torchvision import models
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
class DemographicAnalyzer:
|
| 760 |
+
"""FairFace-based age, gender, race classifier."""
|
| 761 |
+
|
| 762 |
+
AGE_LABELS = [
|
| 763 |
+
"0-2", "3-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70+"
|
| 764 |
+
]
|
| 765 |
+
GENDER_LABELS = ["Male", "Female"]
|
| 766 |
+
RACE_LABELS = [
|
| 767 |
+
"White", "Black", "Latino_Hispanic", "East Asian",
|
| 768 |
+
"Southeast Asian", "Indian", "Middle Eastern"
|
| 769 |
+
]
|
| 770 |
+
|
| 771 |
+
def __init__(self):
|
| 772 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 773 |
+
self.model = self._load_model()
|
| 774 |
+
self.transform = transforms.Compose([
|
| 775 |
+
transforms.Resize((224, 224)),
|
| 776 |
+
transforms.ToTensor(),
|
| 777 |
+
transforms.Normalize(
|
| 778 |
+
mean=[0.485, 0.456, 0.406],
|
| 779 |
+
std=[0.229, 0.224, 0.225],
|
| 780 |
+
),
|
| 781 |
+
])
|
| 782 |
+
|
| 783 |
+
def _load_model(self):
|
| 784 |
+
"""Load FairFace ResNet34 model."""
|
| 785 |
+
model_path = "models/fairface_model.pt"
|
| 786 |
+
if not os.path.exists(model_path):
|
| 787 |
+
os.makedirs("models", exist_ok=True)
|
| 788 |
+
# Download from HuggingFace mirror or original source
|
| 789 |
+
# FairFace official weights: res34_fair_align_multi_7_20190809.pt
|
| 790 |
+
try:
|
| 791 |
+
hf_hub_download(
|
| 792 |
+
repo_id="dchen236/FairFace",
|
| 793 |
+
filename="res34_fair_align_multi_7_20190809.pt",
|
| 794 |
+
local_dir="models",
|
| 795 |
+
local_dir_use_symlinks=False,
|
| 796 |
+
)
|
| 797 |
+
os.rename(
|
| 798 |
+
"models/res34_fair_align_multi_7_20190809.pt",
|
| 799 |
+
model_path,
|
| 800 |
+
)
|
| 801 |
+
except Exception:
|
| 802 |
+
# Fallback: use a smaller pretrained model
|
| 803 |
+
raise FileNotFoundError(
|
| 804 |
+
"Please download FairFace weights from "
|
| 805 |
+
"https://github.com/dchen236/FairFace and place at models/fairface_model.pt"
|
| 806 |
+
)
|
| 807 |
+
|
| 808 |
+
model = models.resnet34(pretrained=False)
|
| 809 |
+
# FairFace has 3 output heads: race(7), gender(2), age(9) = 18
|
| 810 |
+
model.fc = torch.nn.Linear(model.fc.in_features, 18)
|
| 811 |
+
model.load_state_dict(torch.load(model_path, map_location=self.device))
|
| 812 |
+
model.to(self.device)
|
| 813 |
+
model.eval()
|
| 814 |
+
return model
|
| 815 |
+
|
| 816 |
+
def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
|
| 817 |
+
"""Predict age, gender, and race."""
|
| 818 |
+
pil_image = Image.fromarray(img_rgb)
|
| 819 |
+
input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
|
| 820 |
+
|
| 821 |
+
with torch.no_grad():
|
| 822 |
+
outputs = self.model(input_tensor)
|
| 823 |
+
|
| 824 |
+
outputs = outputs.cpu().numpy()[0]
|
| 825 |
+
|
| 826 |
+
# Split outputs: race(0-6), gender(7-8), age(9-17)
|
| 827 |
+
race_logits = outputs[0:7]
|
| 828 |
+
gender_logits = outputs[7:9]
|
| 829 |
+
age_logits = outputs[9:18]
|
| 830 |
+
|
| 831 |
+
race_probs = self._softmax(race_logits)
|
| 832 |
+
gender_probs = self._softmax(gender_logits)
|
| 833 |
+
age_probs = self._softmax(age_logits)
|
| 834 |
+
|
| 835 |
+
race_idx = int(np.argmax(race_probs))
|
| 836 |
+
gender_idx = int(np.argmax(gender_probs))
|
| 837 |
+
age_idx = int(np.argmax(age_probs))
|
| 838 |
+
|
| 839 |
+
# Estimate numeric age from bucket
|
| 840 |
+
age_ranges = [(0, 2), (3, 9), (10, 19), (20, 29), (30, 39), (40, 49), (50, 59), (60, 69), (70, 85)]
|
| 841 |
+
age_estimate = sum(age_ranges[age_idx]) / 2
|
| 842 |
+
|
| 843 |
+
return {
|
| 844 |
+
"age_estimate": round(age_estimate, 1),
|
| 845 |
+
"age_range": self.AGE_LABELS[age_idx],
|
| 846 |
+
"age_confidence": round(float(age_probs[age_idx]), 3),
|
| 847 |
+
"gender": self.GENDER_LABELS[gender_idx].lower(),
|
| 848 |
+
"gender_confidence": round(float(gender_probs[gender_idx]), 3),
|
| 849 |
+
"race": self.RACE_LABELS[race_idx],
|
| 850 |
+
"race_confidence": round(float(race_probs[race_idx]), 3),
|
| 851 |
+
"race_probabilities": {
|
| 852 |
+
label: round(float(prob), 3)
|
| 853 |
+
for label, prob in zip(self.RACE_LABELS, race_probs)
|
| 854 |
+
},
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
@staticmethod
|
| 858 |
+
def _softmax(x: np.ndarray) -> np.ndarray:
|
| 859 |
+
e_x = np.exp(x - np.max(x))
|
| 860 |
+
return e_x / e_x.sum()
|
| 861 |
+
```
|
| 862 |
+
|
| 863 |
+
#### face-service/analyzers/attribute_analyzer.py
|
| 864 |
+
|
| 865 |
+
```python
|
| 866 |
+
"""
|
| 867 |
+
CelebA Multi-Label Attribute Classifier
|
| 868 |
+
Predicts 40 binary facial attributes from CelebA-trained model.
|
| 869 |
+
Uses a pretrained model from HuggingFace.
|
| 870 |
+
"""
|
| 871 |
+
|
| 872 |
+
import os
|
| 873 |
+
from typing import Any
|
| 874 |
+
|
| 875 |
+
import numpy as np
|
| 876 |
+
import torch
|
| 877 |
+
import torchvision.transforms as transforms
|
| 878 |
+
from PIL import Image
|
| 879 |
+
|
| 880 |
+
|
| 881 |
+
CELEBA_ATTRIBUTES = [
|
| 882 |
+
"5_o_Clock_Shadow", "Arched_Eyebrows", "Attractive", "Bags_Under_Eyes",
|
| 883 |
+
"Bald", "Bangs", "Big_Lips", "Big_Nose", "Black_Hair", "Blond_Hair",
|
| 884 |
+
"Blurry", "Brown_Hair", "Bushy_Eyebrows", "Chubby", "Double_Chin",
|
| 885 |
+
"Eyeglasses", "Goatee", "Gray_Hair", "Heavy_Makeup", "High_Cheekbones",
|
| 886 |
+
"Male", "Mouth_Slightly_Open", "Mustache", "Narrow_Eyes", "No_Beard",
|
| 887 |
+
"Oval_Face", "Pale_Skin", "Pointy_Nose", "Receding_Hairline",
|
| 888 |
+
"Rosy_Cheeks", "Sideburns", "Smiling", "Straight_Hair", "Wavy_Hair",
|
| 889 |
+
"Wearing_Earrings", "Wearing_Hat", "Wearing_Lipstick", "Wearing_Necklace",
|
| 890 |
+
"Wearing_Necktie", "Young",
|
| 891 |
+
]
|
| 892 |
+
|
| 893 |
+
|
| 894 |
+
class AttributeAnalyzer:
|
| 895 |
+
"""CelebA 40-attribute binary classifier using a fine-tuned ResNet."""
|
| 896 |
+
|
| 897 |
+
def __init__(self):
|
| 898 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 899 |
+
self.model = self._load_model()
|
| 900 |
+
self.transform = transforms.Compose([
|
| 901 |
+
transforms.Resize((224, 224)),
|
| 902 |
+
transforms.ToTensor(),
|
| 903 |
+
transforms.Normalize(
|
| 904 |
+
mean=[0.485, 0.456, 0.406],
|
| 905 |
+
std=[0.229, 0.224, 0.225],
|
| 906 |
+
),
|
| 907 |
+
])
|
| 908 |
+
|
| 909 |
+
def _load_model(self):
|
| 910 |
+
"""
|
| 911 |
+
Load a CelebA attribute prediction model.
|
| 912 |
+
Using a ResNet-18 fine-tuned on CelebA for 40 attributes.
|
| 913 |
+
"""
|
| 914 |
+
from torchvision import models
|
| 915 |
+
|
| 916 |
+
model_path = "models/celeba_resnet18.pt"
|
| 917 |
+
|
| 918 |
+
if not os.path.exists(model_path):
|
| 919 |
+
os.makedirs("models", exist_ok=True)
|
| 920 |
+
# Try loading from HuggingFace
|
| 921 |
+
try:
|
| 922 |
+
from huggingface_hub import hf_hub_download
|
| 923 |
+
hf_hub_download(
|
| 924 |
+
repo_id="jnferreira/attribute-prediction-celebA",
|
| 925 |
+
filename="model.pt",
|
| 926 |
+
local_dir="models",
|
| 927 |
+
local_dir_use_symlinks=False,
|
| 928 |
+
)
|
| 929 |
+
os.rename("models/model.pt", model_path)
|
| 930 |
+
except Exception:
|
| 931 |
+
# Fallback: build a fresh model skeleton
|
| 932 |
+
# Users will need to train or provide weights
|
| 933 |
+
model = models.resnet18(pretrained=True)
|
| 934 |
+
model.fc = torch.nn.Linear(model.fc.in_features, 40)
|
| 935 |
+
torch.save(model.state_dict(), model_path)
|
| 936 |
+
print(
|
| 937 |
+
"WARNING: Using ImageNet-pretrained ResNet18 without CelebA fine-tuning. "
|
| 938 |
+
"Attribute predictions will be inaccurate. "
|
| 939 |
+
"Please provide CelebA-trained weights at models/celeba_resnet18.pt"
|
| 940 |
+
)
|
| 941 |
+
|
| 942 |
+
model = models.resnet18(pretrained=False)
|
| 943 |
+
model.fc = torch.nn.Linear(model.fc.in_features, 40)
|
| 944 |
+
model.load_state_dict(
|
| 945 |
+
torch.load(model_path, map_location=self.device)
|
| 946 |
+
)
|
| 947 |
+
model.to(self.device)
|
| 948 |
+
model.eval()
|
| 949 |
+
return model
|
| 950 |
+
|
| 951 |
+
def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
|
| 952 |
+
pil_image = Image.fromarray(img_rgb)
|
| 953 |
+
input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
|
| 954 |
+
|
| 955 |
+
with torch.no_grad():
|
| 956 |
+
logits = self.model(input_tensor)
|
| 957 |
+
|
| 958 |
+
probs = torch.sigmoid(logits).cpu().numpy()[0]
|
| 959 |
+
|
| 960 |
+
# Build structured results
|
| 961 |
+
raw_attrs = {
|
| 962 |
+
attr: round(float(prob), 3)
|
| 963 |
+
for attr, prob in zip(CELEBA_ATTRIBUTES, probs)
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
# Interpret into user-friendly categories
|
| 967 |
+
result: dict[str, Any] = {"celeba_raw": raw_attrs}
|
| 968 |
+
|
| 969 |
+
# Hair color (pick highest confidence)
|
| 970 |
+
hair_colors = {
|
| 971 |
+
"black": raw_attrs.get("Black_Hair", 0),
|
| 972 |
+
"brown": raw_attrs.get("Brown_Hair", 0),
|
| 973 |
+
"blonde": raw_attrs.get("Blond_Hair", 0),
|
| 974 |
+
"gray": raw_attrs.get("Gray_Hair", 0),
|
| 975 |
+
}
|
| 976 |
+
result["hair_color_celeba"] = max(hair_colors, key=hair_colors.get)
|
| 977 |
+
|
| 978 |
+
# Hair type
|
| 979 |
+
if raw_attrs.get("Straight_Hair", 0) > 0.5:
|
| 980 |
+
result["hair_type_celeba"] = "straight"
|
| 981 |
+
elif raw_attrs.get("Wavy_Hair", 0) > 0.5:
|
| 982 |
+
result["hair_type_celeba"] = "wavy"
|
| 983 |
+
else:
|
| 984 |
+
result["hair_type_celeba"] = "unknown"
|
| 985 |
+
|
| 986 |
+
result["bald"] = raw_attrs.get("Bald", 0) > 0.5
|
| 987 |
+
result["bangs"] = raw_attrs.get("Bangs", 0) > 0.5
|
| 988 |
+
result["receding_hairline"] = raw_attrs.get("Receding_Hairline", 0) > 0.5
|
| 989 |
+
|
| 990 |
+
# Facial hair
|
| 991 |
+
has_beard = raw_attrs.get("No_Beard", 0) < 0.5
|
| 992 |
+
has_goatee = raw_attrs.get("Goatee", 0) > 0.5
|
| 993 |
+
has_mustache = raw_attrs.get("Mustache", 0) > 0.5
|
| 994 |
+
has_sideburns = raw_attrs.get("Sideburns", 0) > 0.5
|
| 995 |
+
has_stubble = raw_attrs.get("5_o_Clock_Shadow", 0) > 0.5
|
| 996 |
+
|
| 997 |
+
if has_goatee:
|
| 998 |
+
result["facial_hair"] = "goatee"
|
| 999 |
+
elif has_mustache and has_beard:
|
| 1000 |
+
result["facial_hair"] = "full_beard"
|
| 1001 |
+
elif has_mustache:
|
| 1002 |
+
result["facial_hair"] = "mustache"
|
| 1003 |
+
elif has_sideburns:
|
| 1004 |
+
result["facial_hair"] = "sideburns"
|
| 1005 |
+
elif has_stubble:
|
| 1006 |
+
result["facial_hair"] = "stubble"
|
| 1007 |
+
elif not has_beard:
|
| 1008 |
+
result["facial_hair"] = "clean_shaven"
|
| 1009 |
+
else:
|
| 1010 |
+
result["facial_hair"] = "beard"
|
| 1011 |
+
|
| 1012 |
+
# Appearance attributes
|
| 1013 |
+
result["wearing_glasses"] = raw_attrs.get("Eyeglasses", 0) > 0.5
|
| 1014 |
+
result["wearing_hat"] = raw_attrs.get("Wearing_Hat", 0) > 0.5
|
| 1015 |
+
result["bushy_eyebrows"] = raw_attrs.get("Bushy_Eyebrows", 0) > 0.5
|
| 1016 |
+
result["arched_eyebrows_celeba"] = raw_attrs.get("Arched_Eyebrows", 0) > 0.5
|
| 1017 |
+
result["bags_under_eyes"] = raw_attrs.get("Bags_Under_Eyes", 0) > 0.5
|
| 1018 |
+
result["high_cheekbones_celeba"] = raw_attrs.get("High_Cheekbones", 0) > 0.5
|
| 1019 |
+
result["oval_face_celeba"] = raw_attrs.get("Oval_Face", 0) > 0.5
|
| 1020 |
+
result["pointy_nose_celeba"] = raw_attrs.get("Pointy_Nose", 0) > 0.5
|
| 1021 |
+
result["big_lips_celeba"] = raw_attrs.get("Big_Lips", 0) > 0.5
|
| 1022 |
+
result["big_nose_celeba"] = raw_attrs.get("Big_Nose", 0) > 0.5
|
| 1023 |
+
result["narrow_eyes_celeba"] = raw_attrs.get("Narrow_Eyes", 0) > 0.5
|
| 1024 |
+
result["double_chin"] = raw_attrs.get("Double_Chin", 0) > 0.5
|
| 1025 |
+
result["chubby"] = raw_attrs.get("Chubby", 0) > 0.5
|
| 1026 |
+
result["rosy_cheeks"] = raw_attrs.get("Rosy_Cheeks", 0) > 0.5
|
| 1027 |
+
result["pale_skin"] = raw_attrs.get("Pale_Skin", 0) > 0.5
|
| 1028 |
+
result["young"] = raw_attrs.get("Young", 0) > 0.5
|
| 1029 |
+
result["smiling_celeba"] = raw_attrs.get("Smiling", 0) > 0.5
|
| 1030 |
+
result["mouth_open"] = raw_attrs.get("Mouth_Slightly_Open", 0) > 0.5
|
| 1031 |
+
|
| 1032 |
+
return result
|
| 1033 |
+
```
|
| 1034 |
+
|
| 1035 |
+
#### face-service/analyzers/parsing_analyzer.py
|
| 1036 |
+
|
| 1037 |
+
```python
|
| 1038 |
+
"""
|
| 1039 |
+
BiSeNet Face Parsing β 19-class semantic segmentation of the face.
|
| 1040 |
+
Segments: skin, eyebrows, eyes, nose, lips, hair, ears, neck, etc.
|
| 1041 |
+
"""
|
| 1042 |
+
|
| 1043 |
+
import os
|
| 1044 |
+
from typing import Any
|
| 1045 |
+
|
| 1046 |
+
import cv2
|
| 1047 |
+
import numpy as np
|
| 1048 |
+
import torch
|
| 1049 |
+
from torchvision import transforms
|
| 1050 |
+
|
| 1051 |
+
|
| 1052 |
+
class ParsingAnalyzer:
|
| 1053 |
+
"""
|
| 1054 |
+
BiSeNet face parsing for hair/skin/feature segmentation.
|
| 1055 |
+
|
| 1056 |
+
Parsing classes:
|
| 1057 |
+
0: background, 1: skin, 2: l_brow, 3: r_brow, 4: l_eye, 5: r_eye,
|
| 1058 |
+
6: eye_g (glasses), 7: l_ear, 8: r_ear, 9: ear_r (earring),
|
| 1059 |
+
10: nose, 11: mouth, 12: u_lip, 13: l_lip, 14: neck,
|
| 1060 |
+
15: necklace, 16: cloth, 17: hair, 18: hat
|
| 1061 |
+
"""
|
| 1062 |
+
|
| 1063 |
+
LABELS = {
|
| 1064 |
+
0: "background", 1: "skin", 2: "left_brow", 3: "right_brow",
|
| 1065 |
+
4: "left_eye", 5: "right_eye", 6: "glasses", 7: "left_ear",
|
| 1066 |
+
8: "right_ear", 9: "earring", 10: "nose", 11: "mouth",
|
| 1067 |
+
12: "upper_lip", 13: "lower_lip", 14: "neck", 15: "necklace",
|
| 1068 |
+
16: "cloth", 17: "hair", 18: "hat",
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
+
def __init__(self):
|
| 1072 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 1073 |
+
self.model = self._load_model()
|
| 1074 |
+
self.transform = transforms.Compose([
|
| 1075 |
+
transforms.ToTensor(),
|
| 1076 |
+
transforms.Normalize(
|
| 1077 |
+
mean=[0.485, 0.456, 0.406],
|
| 1078 |
+
std=[0.229, 0.224, 0.225],
|
| 1079 |
+
),
|
| 1080 |
+
])
|
| 1081 |
+
|
| 1082 |
+
def _load_model(self):
|
| 1083 |
+
model_path = "models/bisenet_face_parsing.pt"
|
| 1084 |
+
if not os.path.exists(model_path):
|
| 1085 |
+
os.makedirs("models", exist_ok=True)
|
| 1086 |
+
# BiSeNet model from face-parsing.PyTorch
|
| 1087 |
+
# Download from: https://drive.google.com/file/d/154JgKpzCPW82qINcVieuPH3fZ2e0P812
|
| 1088 |
+
raise FileNotFoundError(
|
| 1089 |
+
"Please download BiSeNet face parsing weights from "
|
| 1090 |
+
"https://github.com/zllrunning/face-parsing.PyTorch and place at "
|
| 1091 |
+
"models/bisenet_face_parsing.pt"
|
| 1092 |
+
)
|
| 1093 |
+
|
| 1094 |
+
from models.bisenet_model import BiSeNet # You'll need to include this
|
| 1095 |
+
model = BiSeNet(n_classes=19)
|
| 1096 |
+
model.load_state_dict(
|
| 1097 |
+
torch.load(model_path, map_location=self.device)
|
| 1098 |
+
)
|
| 1099 |
+
model.to(self.device)
|
| 1100 |
+
model.eval()
|
| 1101 |
+
return model
|
| 1102 |
+
|
| 1103 |
+
def analyze(self, img_bgr: np.ndarray) -> dict[str, Any]:
|
| 1104 |
+
h, w = img_bgr.shape[:2]
|
| 1105 |
+
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
|
| 1106 |
+
img_resized = cv2.resize(img_rgb, (512, 512))
|
| 1107 |
+
|
| 1108 |
+
input_tensor = self.transform(img_resized).unsqueeze(0).to(self.device)
|
| 1109 |
+
|
| 1110 |
+
with torch.no_grad():
|
| 1111 |
+
output = self.model(input_tensor)[0] # BiSeNet returns tuple
|
| 1112 |
+
|
| 1113 |
+
parsing = output.squeeze(0).argmax(0).cpu().numpy()
|
| 1114 |
+
parsing = cv2.resize(
|
| 1115 |
+
parsing.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST
|
| 1116 |
+
)
|
| 1117 |
+
|
| 1118 |
+
# Generate masks
|
| 1119 |
+
skin_mask = (parsing == 1).astype(np.uint8)
|
| 1120 |
+
hair_mask = (parsing == 17).astype(np.uint8)
|
| 1121 |
+
glasses_mask = (parsing == 6).astype(np.uint8)
|
| 1122 |
+
hat_mask = (parsing == 18).astype(np.uint8)
|
| 1123 |
+
|
| 1124 |
+
# Facial hair detection: look for dark pixels in lower face skin region
|
| 1125 |
+
lower_face = parsing[int(h * 0.55):int(h * 0.85), int(w * 0.25):int(w * 0.75)]
|
| 1126 |
+
lower_skin = (lower_face == 1).sum()
|
| 1127 |
+
total_lower = lower_face.size or 1
|
| 1128 |
+
|
| 1129 |
+
# Region stats
|
| 1130 |
+
hair_area = hair_mask.sum() / (h * w)
|
| 1131 |
+
skin_area = skin_mask.sum() / (h * w)
|
| 1132 |
+
|
| 1133 |
+
result: dict[str, Any] = {
|
| 1134 |
+
"_skin_mask": skin_mask,
|
| 1135 |
+
"_hair_mask": hair_mask,
|
| 1136 |
+
"has_glasses_parsing": int(glasses_mask.sum()) > 100,
|
| 1137 |
+
"wearing_hat_parsing": int(hat_mask.sum()) > 500,
|
| 1138 |
+
"hair_coverage": round(float(hair_area), 3),
|
| 1139 |
+
"skin_coverage": round(float(skin_area), 3),
|
| 1140 |
+
}
|
| 1141 |
+
|
| 1142 |
+
# Hair length estimation from mask
|
| 1143 |
+
if hair_area < 0.01:
|
| 1144 |
+
result["hair_length_estimate"] = "bald"
|
| 1145 |
+
elif hair_area < 0.08:
|
| 1146 |
+
result["hair_length_estimate"] = "short"
|
| 1147 |
+
elif hair_area < 0.18:
|
| 1148 |
+
result["hair_length_estimate"] = "medium"
|
| 1149 |
+
else:
|
| 1150 |
+
result["hair_length_estimate"] = "long"
|
| 1151 |
+
|
| 1152 |
+
# Wrinkle analysis on forehead skin
|
| 1153 |
+
forehead_region = img_bgr[int(h * 0.05):int(h * 0.25), int(w * 0.3):int(w * 0.7)]
|
| 1154 |
+
forehead_skin = skin_mask[int(h * 0.05):int(h * 0.25), int(w * 0.3):int(w * 0.7)]
|
| 1155 |
+
if forehead_skin.sum() > 100:
|
| 1156 |
+
gray_forehead = cv2.cvtColor(forehead_region, cv2.COLOR_BGR2GRAY)
|
| 1157 |
+
# Apply mask
|
| 1158 |
+
gray_forehead = cv2.bitwise_and(gray_forehead, gray_forehead, mask=forehead_skin)
|
| 1159 |
+
edges = cv2.Canny(gray_forehead, 30, 80)
|
| 1160 |
+
edge_density = edges.sum() / (forehead_skin.sum() * 255 + 1)
|
| 1161 |
+
result["forehead_wrinkle_score"] = round(float(edge_density), 3)
|
| 1162 |
+
result["forehead_wrinkles"] = (
|
| 1163 |
+
"heavy" if edge_density > 0.15
|
| 1164 |
+
else "moderate" if edge_density > 0.08
|
| 1165 |
+
else "mild" if edge_density > 0.04
|
| 1166 |
+
else "none"
|
| 1167 |
+
)
|
| 1168 |
+
|
| 1169 |
+
# Freckles/moles detection on skin
|
| 1170 |
+
skin_region = cv2.bitwise_and(img_bgr, img_bgr, mask=skin_mask)
|
| 1171 |
+
gray_skin = cv2.cvtColor(skin_region, cv2.COLOR_BGR2GRAY)
|
| 1172 |
+
# Detect dark spots
|
| 1173 |
+
_, dark_spots = cv2.threshold(gray_skin, 80, 255, cv2.THRESH_BINARY_INV)
|
| 1174 |
+
dark_spots = cv2.bitwise_and(dark_spots, dark_spots, mask=skin_mask)
|
| 1175 |
+
# Find contours of dark spots
|
| 1176 |
+
contours, _ = cv2.findContours(dark_spots, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 1177 |
+
small_spots = [c for c in contours if 5 < cv2.contourArea(c) < 200]
|
| 1178 |
+
result["possible_freckles_moles"] = len(small_spots) > 10
|
| 1179 |
+
result["dark_spot_count"] = len(small_spots)
|
| 1180 |
+
|
| 1181 |
+
return result
|
| 1182 |
+
```
|
| 1183 |
+
|
| 1184 |
+
#### face-service/analyzers/emotion_analyzer.py
|
| 1185 |
+
|
| 1186 |
+
```python
|
| 1187 |
+
"""
|
| 1188 |
+
HSEmotion β State-of-the-art facial emotion recognition.
|
| 1189 |
+
Supports 8 emotions on AffectNet.
|
| 1190 |
+
"""
|
| 1191 |
+
|
| 1192 |
+
import os
|
| 1193 |
+
from typing import Any
|
| 1194 |
+
|
| 1195 |
+
import cv2
|
| 1196 |
+
import numpy as np
|
| 1197 |
+
import torch
|
| 1198 |
+
import torchvision.transforms as transforms
|
| 1199 |
+
from PIL import Image
|
| 1200 |
+
|
| 1201 |
+
|
| 1202 |
+
class EmotionAnalyzer:
|
| 1203 |
+
"""HSEmotion-based facial expression classifier."""
|
| 1204 |
+
|
| 1205 |
+
EMOTION_LABELS = [
|
| 1206 |
+
"angry", "contempt", "disgust", "fear",
|
| 1207 |
+
"happy", "neutral", "sad", "surprise",
|
| 1208 |
+
]
|
| 1209 |
+
|
| 1210 |
+
def __init__(self):
|
| 1211 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 1212 |
+
self.model = self._load_model()
|
| 1213 |
+
self.transform = transforms.Compose([
|
| 1214 |
+
transforms.Resize((260, 260)),
|
| 1215 |
+
transforms.CenterCrop(224),
|
| 1216 |
+
transforms.ToTensor(),
|
| 1217 |
+
transforms.Normalize(
|
| 1218 |
+
mean=[0.485, 0.456, 0.406],
|
| 1219 |
+
std=[0.229, 0.224, 0.225],
|
| 1220 |
+
),
|
| 1221 |
+
])
|
| 1222 |
+
|
| 1223 |
+
def _load_model(self):
|
| 1224 |
+
"""Load HSEmotion EfficientNet model."""
|
| 1225 |
+
model_path = "models/hsemotion_enet_b0_8.pt"
|
| 1226 |
+
|
| 1227 |
+
if not os.path.exists(model_path):
|
| 1228 |
+
os.makedirs("models", exist_ok=True)
|
| 1229 |
+
try:
|
| 1230 |
+
from huggingface_hub import hf_hub_download
|
| 1231 |
+
# HSEmotion models available at:
|
| 1232 |
+
# https://github.com/HSE-asavchenko/face-emotion-recognition
|
| 1233 |
+
hf_hub_download(
|
| 1234 |
+
repo_id="HSE-asavchenko/hsemotion",
|
| 1235 |
+
filename="enet_b0_8_best_afew.pt",
|
| 1236 |
+
local_dir="models",
|
| 1237 |
+
local_dir_use_symlinks=False,
|
| 1238 |
+
)
|
| 1239 |
+
os.rename("models/enet_b0_8_best_afew.pt", model_path)
|
| 1240 |
+
except Exception:
|
| 1241 |
+
raise FileNotFoundError(
|
| 1242 |
+
"Please download HSEmotion weights from "
|
| 1243 |
+
"https://github.com/HSE-asavchenko/face-emotion-recognition"
|
| 1244 |
+
)
|
| 1245 |
+
|
| 1246 |
+
import timm
|
| 1247 |
+
model = timm.create_model("efficientnet_b0", pretrained=False, num_classes=8)
|
| 1248 |
+
model.load_state_dict(torch.load(model_path, map_location=self.device))
|
| 1249 |
+
model.to(self.device)
|
| 1250 |
+
model.eval()
|
| 1251 |
+
return model
|
| 1252 |
+
|
| 1253 |
+
def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
|
| 1254 |
+
pil_image = Image.fromarray(img_rgb)
|
| 1255 |
+
input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
|
| 1256 |
+
|
| 1257 |
+
with torch.no_grad():
|
| 1258 |
+
logits = self.model(input_tensor)
|
| 1259 |
+
|
| 1260 |
+
probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
|
| 1261 |
+
top_idx = int(np.argmax(probs))
|
| 1262 |
+
|
| 1263 |
+
return {
|
| 1264 |
+
"emotion": self.EMOTION_LABELS[top_idx],
|
| 1265 |
+
"emotion_confidence": round(float(probs[top_idx]), 3),
|
| 1266 |
+
"emotion_probabilities": {
|
| 1267 |
+
label: round(float(prob), 3)
|
| 1268 |
+
for label, prob in zip(self.EMOTION_LABELS, probs)
|
| 1269 |
+
},
|
| 1270 |
+
}
|
| 1271 |
+
```
|
| 1272 |
+
|
| 1273 |
+
#### face-service/analyzers/color_analyzer.py
|
| 1274 |
+
|
| 1275 |
+
```python
|
| 1276 |
+
"""
|
| 1277 |
+
Pixel-level color analysis using segmentation masks from BiSeNet
|
| 1278 |
+
and landmark positions from MediaPipe.
|
| 1279 |
+
"""
|
| 1280 |
+
|
| 1281 |
+
from typing import Any, Optional
|
| 1282 |
+
|
| 1283 |
+
import cv2
|
| 1284 |
+
import numpy as np
|
| 1285 |
+
from sklearn.cluster import KMeans
|
| 1286 |
+
|
| 1287 |
+
|
| 1288 |
+
class ColorAnalyzer:
|
| 1289 |
+
"""Analyzes skin tone, eye color, and hair color from pixel data."""
|
| 1290 |
+
|
| 1291 |
+
def analyze(
|
| 1292 |
+
self,
|
| 1293 |
+
img_rgb: np.ndarray,
|
| 1294 |
+
skin_mask: Optional[np.ndarray] = None,
|
| 1295 |
+
hair_mask: Optional[np.ndarray] = None,
|
| 1296 |
+
landmark_data: Optional[list[dict]] = None,
|
| 1297 |
+
) -> dict[str, Any]:
|
| 1298 |
+
h, w = img_rgb.shape[:2]
|
| 1299 |
+
results: dict[str, Any] = {}
|
| 1300 |
+
|
| 1301 |
+
# === Skin Tone ===
|
| 1302 |
+
if skin_mask is not None and skin_mask.sum() > 100:
|
| 1303 |
+
skin_pixels = img_rgb[skin_mask > 0]
|
| 1304 |
+
# Convert to LAB for perceptually uniform brightness
|
| 1305 |
+
skin_lab = cv2.cvtColor(
|
| 1306 |
+
skin_pixels.reshape(-1, 1, 3), cv2.COLOR_RGB2LAB
|
| 1307 |
+
).reshape(-1, 3)
|
| 1308 |
+
avg_l = float(skin_lab[:, 0].mean()) # L channel (brightness)
|
| 1309 |
+
|
| 1310 |
+
if avg_l > 180:
|
| 1311 |
+
results["skin_tone"] = "very_light"
|
| 1312 |
+
elif avg_l > 155:
|
| 1313 |
+
results["skin_tone"] = "light"
|
| 1314 |
+
elif avg_l > 130:
|
| 1315 |
+
results["skin_tone"] = "medium_light"
|
| 1316 |
+
elif avg_l > 105:
|
| 1317 |
+
results["skin_tone"] = "medium"
|
| 1318 |
+
elif avg_l > 80:
|
| 1319 |
+
results["skin_tone"] = "medium_dark"
|
| 1320 |
+
else:
|
| 1321 |
+
results["skin_tone"] = "dark"
|
| 1322 |
+
|
| 1323 |
+
results["skin_tone_score"] = round(avg_l / 255, 3)
|
| 1324 |
+
|
| 1325 |
+
# Fitzpatrick scale approximation
|
| 1326 |
+
if avg_l > 170:
|
| 1327 |
+
results["fitzpatrick_type"] = "I"
|
| 1328 |
+
elif avg_l > 145:
|
| 1329 |
+
results["fitzpatrick_type"] = "II"
|
| 1330 |
+
elif avg_l > 120:
|
| 1331 |
+
results["fitzpatrick_type"] = "III"
|
| 1332 |
+
elif avg_l > 95:
|
| 1333 |
+
results["fitzpatrick_type"] = "IV"
|
| 1334 |
+
elif avg_l > 70:
|
| 1335 |
+
results["fitzpatrick_type"] = "V"
|
| 1336 |
+
else:
|
| 1337 |
+
results["fitzpatrick_type"] = "VI"
|
| 1338 |
+
|
| 1339 |
+
# === Hair Color ===
|
| 1340 |
+
if hair_mask is not None and hair_mask.sum() > 500:
|
| 1341 |
+
hair_pixels = img_rgb[hair_mask > 0]
|
| 1342 |
+
|
| 1343 |
+
# K-means to find dominant hair color
|
| 1344 |
+
if len(hair_pixels) > 100:
|
| 1345 |
+
sample_size = min(5000, len(hair_pixels))
|
| 1346 |
+
indices = np.random.choice(len(hair_pixels), sample_size, replace=False)
|
| 1347 |
+
sampled = hair_pixels[indices].astype(np.float32)
|
| 1348 |
+
|
| 1349 |
+
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
|
| 1350 |
+
kmeans.fit(sampled)
|
| 1351 |
+
|
| 1352 |
+
# Pick the cluster with most members
|
| 1353 |
+
labels, counts = np.unique(kmeans.labels_, return_counts=True)
|
| 1354 |
+
dominant_idx = labels[np.argmax(counts)]
|
| 1355 |
+
dominant_color = kmeans.cluster_centers_[dominant_idx].astype(int)
|
| 1356 |
+
|
| 1357 |
+
r, g, b = dominant_color
|
| 1358 |
+
brightness = (int(r) + int(g) + int(b)) / 3
|
| 1359 |
+
|
| 1360 |
+
# Classify hair color
|
| 1361 |
+
hsv_color = cv2.cvtColor(
|
| 1362 |
+
np.array([[dominant_color]], dtype=np.uint8), cv2.COLOR_RGB2HSV
|
| 1363 |
+
)[0][0]
|
| 1364 |
+
hue, sat, val = int(hsv_color[0]), int(hsv_color[1]), int(hsv_color[2])
|
| 1365 |
+
|
| 1366 |
+
if brightness < 40:
|
| 1367 |
+
results["hair_color_detected"] = "black"
|
| 1368 |
+
elif brightness > 190:
|
| 1369 |
+
results["hair_color_detected"] = "platinum_blonde"
|
| 1370 |
+
elif brightness > 160 and sat < 50:
|
| 1371 |
+
results["hair_color_detected"] = "gray"
|
| 1372 |
+
elif brightness > 140 and (hue > 15 and hue < 35):
|
| 1373 |
+
results["hair_color_detected"] = "blonde"
|
| 1374 |
+
elif (hue < 15 or hue > 160) and sat > 80:
|
| 1375 |
+
results["hair_color_detected"] = "red"
|
| 1376 |
+
elif brightness > 60:
|
| 1377 |
+
results["hair_color_detected"] = "brown"
|
| 1378 |
+
else:
|
| 1379 |
+
results["hair_color_detected"] = "dark_brown"
|
| 1380 |
+
|
| 1381 |
+
results["hair_dominant_rgb"] = [int(r), int(g), int(b)]
|
| 1382 |
+
|
| 1383 |
+
# Hair texture analysis (FFT-based)
|
| 1384 |
+
hair_region = cv2.bitwise_and(
|
| 1385 |
+
img_rgb,
|
| 1386 |
+
img_rgb,
|
| 1387 |
+
mask=hair_mask,
|
| 1388 |
+
)
|
| 1389 |
+
gray_hair = cv2.cvtColor(hair_region, cv2.COLOR_RGB2GRAY)
|
| 1390 |
+
# Mask out non-hair regions
|
| 1391 |
+
gray_hair_masked = gray_hair[hair_mask > 0]
|
| 1392 |
+
|
| 1393 |
+
if len(gray_hair_masked) > 1000:
|
| 1394 |
+
# Compute local variance as texture indicator
|
| 1395 |
+
# High frequency = curly, low frequency = straight
|
| 1396 |
+
hair_patch = gray_hair_masked[:1024].astype(np.float32)
|
| 1397 |
+
fft = np.fft.fft(hair_patch)
|
| 1398 |
+
magnitude = np.abs(fft)
|
| 1399 |
+
# Ratio of high freq to low freq energy
|
| 1400 |
+
low_freq = magnitude[:len(magnitude) // 4].sum()
|
| 1401 |
+
high_freq = magnitude[len(magnitude) // 4:].sum()
|
| 1402 |
+
freq_ratio = high_freq / (low_freq + 1e-6)
|
| 1403 |
+
|
| 1404 |
+
if freq_ratio > 0.8:
|
| 1405 |
+
results["hair_texture_detected"] = "curly"
|
| 1406 |
+
elif freq_ratio > 0.5:
|
| 1407 |
+
results["hair_texture_detected"] = "wavy"
|
| 1408 |
+
else:
|
| 1409 |
+
results["hair_texture_detected"] = "straight"
|
| 1410 |
+
|
| 1411 |
+
# === Eye Color ===
|
| 1412 |
+
if landmark_data is not None and len(landmark_data) > 473:
|
| 1413 |
+
for eye_name, iris_idx in [("left", 468), ("right", 473)]:
|
| 1414 |
+
ix = int(landmark_data[iris_idx]["x"] * w)
|
| 1415 |
+
iy = int(landmark_data[iris_idx]["y"] * h)
|
| 1416 |
+
|
| 1417 |
+
# Sample a small patch around iris
|
| 1418 |
+
pad = 3
|
| 1419 |
+
y1 = max(0, iy - pad)
|
| 1420 |
+
y2 = min(h, iy + pad)
|
| 1421 |
+
x1 = max(0, ix - pad)
|
| 1422 |
+
x2 = min(w, ix + pad)
|
| 1423 |
+
|
| 1424 |
+
iris_patch = img_rgb[y1:y2, x1:x2]
|
| 1425 |
+
if iris_patch.size == 0:
|
| 1426 |
+
continue
|
| 1427 |
+
|
| 1428 |
+
avg_color = iris_patch.mean(axis=(0, 1))
|
| 1429 |
+
r, g, b = avg_color
|
| 1430 |
+
|
| 1431 |
+
# Convert to HSV for better classification
|
| 1432 |
+
hsv = cv2.cvtColor(
|
| 1433 |
+
np.array([[avg_color]], dtype=np.uint8), cv2.COLOR_RGB2HSV
|
| 1434 |
+
)[0][0]
|
| 1435 |
+
hue_val, sat_val, val_val = int(hsv[0]), int(hsv[1]), int(hsv[2])
|
| 1436 |
+
|
| 1437 |
+
if val_val < 60:
|
| 1438 |
+
color = "dark_brown"
|
| 1439 |
+
elif sat_val < 30:
|
| 1440 |
+
color = "gray"
|
| 1441 |
+
elif hue_val > 100 and hue_val < 130 and sat_val > 50:
|
| 1442 |
+
color = "blue"
|
| 1443 |
+
elif hue_val > 35 and hue_val < 85 and sat_val > 40:
|
| 1444 |
+
color = "green"
|
| 1445 |
+
elif (hue_val > 15 and hue_val < 35) and sat_val > 40:
|
| 1446 |
+
color = "hazel"
|
| 1447 |
+
elif val_val > 120 and sat_val > 60:
|
| 1448 |
+
color = "amber"
|
| 1449 |
+
else:
|
| 1450 |
+
color = "brown"
|
| 1451 |
+
|
| 1452 |
+
results[f"{eye_name}_eye_color"] = color
|
| 1453 |
+
|
| 1454 |
+
# Consensus
|
| 1455 |
+
if "left_eye_color" in results and "right_eye_color" in results:
|
| 1456 |
+
if results["left_eye_color"] == results["right_eye_color"]:
|
| 1457 |
+
results["eye_color"] = results["left_eye_color"]
|
| 1458 |
+
else:
|
| 1459 |
+
results["eye_color"] = results["left_eye_color"] # Use left as primary
|
| 1460 |
+
results["heterochromia"] = True
|
| 1461 |
+
|
| 1462 |
+
return results
|
| 1463 |
+
```
|
| 1464 |
+
|
| 1465 |
+
#### face-service/Dockerfile
|
| 1466 |
+
|
| 1467 |
+
```dockerfile
|
| 1468 |
+
FROM python:3.11-slim
|
| 1469 |
+
|
| 1470 |
+
WORKDIR /app
|
| 1471 |
+
|
| 1472 |
+
# Install system dependencies for OpenCV
|
| 1473 |
+
RUN apt-get update && apt-get install -y \
|
| 1474 |
+
libgl1-mesa-glx \
|
| 1475 |
+
libglib2.0-0 \
|
| 1476 |
+
curl \
|
| 1477 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 1478 |
+
|
| 1479 |
+
COPY requirements.txt .
|
| 1480 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 1481 |
+
|
| 1482 |
+
COPY . .
|
| 1483 |
+
|
| 1484 |
+
# Download MediaPipe model at build time
|
| 1485 |
+
RUN python -c "from analyzers.landmark_analyzer import LandmarkAnalyzer; LandmarkAnalyzer()"
|
| 1486 |
+
|
| 1487 |
+
EXPOSE 8000
|
| 1488 |
+
|
| 1489 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 1490 |
+
```
|
| 1491 |
+
|
| 1492 |
+
### Connect Your Next.js App to the Microservice
|
| 1493 |
+
|
| 1494 |
+
#### lib/face-analysis/api-client.ts
|
| 1495 |
+
|
| 1496 |
+
```typescript
|
| 1497 |
+
/**
|
| 1498 |
+
* Client for the Python face analysis microservice.
|
| 1499 |
+
* Replaces the Supabase Edge Function that called AWS Rekognition.
|
| 1500 |
+
*/
|
| 1501 |
+
|
| 1502 |
+
const FACE_SERVICE_URL = process.env.NEXT_PUBLIC_FACE_SERVICE_URL || "http://localhost:8000";
|
| 1503 |
+
|
| 1504 |
+
export interface FaceAnalysisResult {
|
| 1505 |
+
// Geometric (MediaPipe)
|
| 1506 |
+
face_shape: string;
|
| 1507 |
+
face_shape_metrics: Record<string, number>;
|
| 1508 |
+
forehead_width: string;
|
| 1509 |
+
jawline_type: string;
|
| 1510 |
+
jawline_angle: number;
|
| 1511 |
+
chin_type: string;
|
| 1512 |
+
cheekbone_prominence: string;
|
| 1513 |
+
cheek_fullness: string;
|
| 1514 |
+
eye_shape: string;
|
| 1515 |
+
eye_depth: string;
|
| 1516 |
+
eye_spacing: string;
|
| 1517 |
+
eye_size: string;
|
| 1518 |
+
eyebrow_shape: string;
|
| 1519 |
+
eyebrow_arch_height: string;
|
| 1520 |
+
eyebrow_thickness: string;
|
| 1521 |
+
possible_unibrow: boolean;
|
| 1522 |
+
nose_shape: string;
|
| 1523 |
+
nose_bridge: string;
|
| 1524 |
+
nose_tip_shape: string;
|
| 1525 |
+
nostril_width: string;
|
| 1526 |
+
lip_fullness: string;
|
| 1527 |
+
lip_balance: string;
|
| 1528 |
+
mouth_width: string;
|
| 1529 |
+
cupids_bow: string;
|
| 1530 |
+
smiling: boolean;
|
| 1531 |
+
smile_asymmetry: number;
|
| 1532 |
+
possible_dimples: boolean;
|
| 1533 |
+
facial_asymmetry_score: number;
|
| 1534 |
+
|
| 1535 |
+
// Demographics (FairFace)
|
| 1536 |
+
age_estimate: number;
|
| 1537 |
+
age_range: string;
|
| 1538 |
+
age_confidence: number;
|
| 1539 |
+
gender: string;
|
| 1540 |
+
gender_confidence: number;
|
| 1541 |
+
race: string;
|
| 1542 |
+
race_confidence: number;
|
| 1543 |
+
race_probabilities: Record<string, number>;
|
| 1544 |
+
|
| 1545 |
+
// CelebA Attributes
|
| 1546 |
+
facial_hair: string;
|
| 1547 |
+
wearing_glasses: boolean;
|
| 1548 |
+
bald: boolean;
|
| 1549 |
+
receding_hairline: boolean;
|
| 1550 |
+
hair_color_celeba: string;
|
| 1551 |
+
hair_type_celeba: string;
|
| 1552 |
+
bags_under_eyes: boolean;
|
| 1553 |
+
double_chin: boolean;
|
| 1554 |
+
bushy_eyebrows: boolean;
|
| 1555 |
+
high_cheekbones_celeba: boolean;
|
| 1556 |
+
|
| 1557 |
+
// Emotion (HSEmotion)
|
| 1558 |
+
emotion: string;
|
| 1559 |
+
emotion_confidence: number;
|
| 1560 |
+
emotion_probabilities: Record<string, number>;
|
| 1561 |
+
|
| 1562 |
+
// Color Analysis
|
| 1563 |
+
skin_tone: string;
|
| 1564 |
+
skin_tone_score: number;
|
| 1565 |
+
fitzpatrick_type: string;
|
| 1566 |
+
eye_color: string;
|
| 1567 |
+
hair_color_detected: string;
|
| 1568 |
+
hair_dominant_rgb: number[];
|
| 1569 |
+
hair_texture_detected: string;
|
| 1570 |
+
|
| 1571 |
+
// Parsing
|
| 1572 |
+
hair_length_estimate: string;
|
| 1573 |
+
forehead_wrinkles: string;
|
| 1574 |
+
possible_freckles_moles: boolean;
|
| 1575 |
+
dark_spot_count: number;
|
| 1576 |
+
|
| 1577 |
+
// Blendshapes
|
| 1578 |
+
blendshapes: Record<string, number>;
|
| 1579 |
+
}
|
| 1580 |
+
|
| 1581 |
+
export async function analyzeFace(imageFile: File): Promise<FaceAnalysisResult> {
|
| 1582 |
+
const formData = new FormData();
|
| 1583 |
+
formData.append("file", imageFile);
|
| 1584 |
+
|
| 1585 |
+
const response = await fetch(`${FACE_SERVICE_URL}/analyze`, {
|
| 1586 |
+
method: "POST",
|
| 1587 |
+
body: formData,
|
| 1588 |
+
});
|
| 1589 |
+
|
| 1590 |
+
if (!response.ok) {
|
| 1591 |
+
const error = await response.json().catch(() => ({ detail: "Unknown error" }));
|
| 1592 |
+
throw new Error(`Face analysis failed: ${error.detail}`);
|
| 1593 |
+
}
|
| 1594 |
+
|
| 1595 |
+
const result = await response.json();
|
| 1596 |
+
|
| 1597 |
+
if (!result.success) {
|
| 1598 |
+
throw new Error("Face analysis returned unsuccessful result");
|
| 1599 |
+
}
|
| 1600 |
+
|
| 1601 |
+
return result.data;
|
| 1602 |
+
}
|
| 1603 |
+
|
| 1604 |
+
export async function checkServiceHealth(): Promise<boolean> {
|
| 1605 |
+
try {
|
| 1606 |
+
const response = await fetch(`${FACE_SERVICE_URL}/health`);
|
| 1607 |
+
return response.ok;
|
| 1608 |
+
} catch {
|
| 1609 |
+
return false;
|
| 1610 |
+
}
|
| 1611 |
+
}
|
| 1612 |
+
```
|
| 1613 |
+
|
| 1614 |
+
### Deploy to Hugging Face Spaces (Free)
|
| 1615 |
+
|
| 1616 |
+
Create a `README.md` in the `face-service/` directory with the following frontmatter:
|
| 1617 |
+
|
| 1618 |
+
```yaml
|
| 1619 |
+
---
|
| 1620 |
+
title: HCP Face Analysis
|
| 1621 |
+
emoji: π
|
| 1622 |
+
colorFrom: blue
|
| 1623 |
+
colorTo: purple
|
| 1624 |
+
sdk: docker
|
| 1625 |
+
app_port: 8000
|
| 1626 |
+
---
|
| 1627 |
+
```
|
| 1628 |
+
|
| 1629 |
+
---
|
| 1630 |
+
|
| 1631 |
+
## Final Architecture Summary
|
| 1632 |
+
|
| 1633 |
+
```
|
| 1634 |
+
Browser (Next.js)
|
| 1635 |
+
β
|
| 1636 |
+
β POST /analyze (image file)
|
| 1637 |
+
βΌ
|
| 1638 |
+
Hugging Face Spaces (FREE, 2GB RAM)
|
| 1639 |
+
βββ FastAPI Server
|
| 1640 |
+
βββ MediaPipe (4MB) βββββββΊ 478 landmarks β ~40 geometric features
|
| 1641 |
+
βββ FairFace (90MB) βββββββΊ age, gender, race
|
| 1642 |
+
βββ CelebA ResNet (44MB) ββΊ 40 binary attributes (hair, beard, glasses...)
|
| 1643 |
+
βββ BiSeNet (50MB) ββββββββΊ face parsing β hair/skin segmentation
|
| 1644 |
+
βββ HSEmotion (20MB) ββββββΊ 8 emotions
|
| 1645 |
+
βββ Color Analysis ββββββββΊ skin tone, eye color, hair color
|
| 1646 |
+
β
|
| 1647 |
+
β JSON response (~150 attributes)
|
| 1648 |
+
βΌ
|
| 1649 |
+
Supabase (existing)
|
| 1650 |
+
βββ Store results in PostgreSQL
|
| 1651 |
+
βββ Auth / Storage unchanged
|
| 1652 |
+
```
|
| 1653 |
+
|
| 1654 |
+
| Metric | Value |
|
| 1655 |
+
|--------|-------|
|
| 1656 |
+
| **Total models** | ~210MB |
|
| 1657 |
+
| **Features detected** | **~95% of the full feature list** |
|
| 1658 |
+
| **Hosting cost** | **$0** (HF Spaces free tier) |
|
| 1659 |
+
| **Latency** | ~2-4s per image (CPU) |
|
| 1660 |
+
| **Languages** | Python (microservice) + TypeScript (existing Next.js) |
|
| 1661 |
+
| **Only missing** | Teeth analysis, scar detection, Adam's apple (require specialized fine-tuned models) |
|
| 1662 |
+
|
| 1663 |
+
---
|
| 1664 |
+
|
| 1665 |
+
## Required Feature List
|
| 1666 |
+
|
| 1667 |
+
### Face shape
|
| 1668 |
+
- Oval face, Round face, Square face, Heart-shaped face, Diamond face, Long/oblong face, Triangle face
|
| 1669 |
+
- Jawline sharp, Jawline soft, Strong jaw, Receding chin, Pointed chin, Cleft chin, Wide chin
|
| 1670 |
+
- High cheekbones, Flat cheekbones, Full cheeks, Hollow cheeks
|
| 1671 |
+
- Broad forehead, Narrow forehead
|
| 1672 |
+
|
| 1673 |
+
### Eye shape
|
| 1674 |
+
- Almond, Round, Hooded, Monolid, Deep-set eyes, Protruding eyes
|
| 1675 |
+
- Upturned eyes, Downturned eyes, Wide-set eyes, Close-set eyes, Large eyes, Small eyes
|
| 1676 |
+
- Eye color: brown, blue, green, hazel
|
| 1677 |
+
- Dark under-eyes, Eye bags, Crow's feet
|
| 1678 |
+
|
| 1679 |
+
### Eyebrows
|
| 1680 |
+
- Thick, Thin, Arched, Straight, Bushy, Unibrow
|
| 1681 |
+
- High eyebrow arch, Low eyebrow arch
|
| 1682 |
+
|
| 1683 |
+
### Nose
|
| 1684 |
+
- Straight, Aquiline, Button, Upturned, Wide, Narrow
|
| 1685 |
+
- Flat bridge, High bridge, Wide nostrils, Narrow nostrils
|
| 1686 |
+
- Rounded tip, Pointed tip
|
| 1687 |
+
|
| 1688 |
+
### Lips & Mouth
|
| 1689 |
+
- Full, Thin, Wide mouth, Small mouth
|
| 1690 |
+
- Defined cupid's bow, Uneven lips
|
| 1691 |
+
- Gap teeth, Crooked teeth, Straight teeth, Overbite, Underbite
|
| 1692 |
+
- Dimples, Smile lines, Asymmetrical smile
|
| 1693 |
+
|
| 1694 |
+
### Hair
|
| 1695 |
+
- Straight, Wavy, Curly, Coily
|
| 1696 |
+
- Short, Long, Bald, Receding hairline, Widow's peak
|
| 1697 |
+
- Thick, Thin
|
| 1698 |
+
- Color: black, brown, blonde, red, gray, dyed
|
| 1699 |
+
|
| 1700 |
+
### Facial hair
|
| 1701 |
+
- Full beard, Stubble, Goatee, Mustache, Clean-shaven, Sideburns
|
| 1702 |
+
|
| 1703 |
+
### Skin & Other
|
| 1704 |
+
- Skin tone: light, medium, dark
|
| 1705 |
+
- Freckles, Moles, Birthmark, Scar, Acne
|
| 1706 |
+
- Wrinkles, Forehead lines, Smile lines
|
| 1707 |
+
- Facial asymmetry, Prominent Adam's apple
|
requirements.txt
CHANGED
|
@@ -13,3 +13,6 @@ timm==1.0.3
|
|
| 13 |
safetensors>=0.6.0
|
| 14 |
transformers==4.45.2
|
| 15 |
hsemotion>=0.2.2
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
safetensors>=0.6.0
|
| 14 |
transformers==4.45.2
|
| 15 |
hsemotion>=0.2.2
|
| 16 |
+
openai-clip==1.0.1
|
| 17 |
+
ftfy
|
| 18 |
+
regex
|