Evan Li commited on
Commit
8aee038
Β·
1 Parent(s): 57be97e
Dockerfile CHANGED
@@ -18,6 +18,10 @@ RUN mkdir -p models && \
18
  wget -q -O models/face_landmarker.task \
19
  "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
20
 
 
 
 
 
21
  COPY . .
22
 
23
  EXPOSE 7860
 
18
  wget -q -O models/face_landmarker.task \
19
  "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
20
 
21
+ # Pre-download FaRL (face-tuned CLIP ViT-B/16) weights for attribute classifier
22
+ RUN wget -q -O models/FaRL-Base-Patch16-LAIONFace20M-ep64.pth \
23
+ "https://github.com/FacePerceiver/FaRL/releases/download/pretrained_weights/FaRL-Base-Patch16-LAIONFace20M-ep64.pth"
24
+
25
  COPY . .
26
 
27
  EXPOSE 7860
analyzers/attribute_analyzer.py CHANGED
@@ -1,26 +1,28 @@
1
  """
2
- CLIP zero-shot attribute classification.
3
-
4
- Previous version put all ~70 candidate labels into a single zero-shot pipeline
5
- call, which applied one softmax across every label at once. That meant each
6
- binary pair ("wearing earrings" vs "not wearing earrings") received ~1/70 of
7
- the probability mass and the comparison between positive and negative was
8
- essentially noise β€” hence the hallucinated accessories.
9
-
10
- This version encodes the image once with CLIPModel.get_image_features, then
11
- runs a fresh 2-way softmax per binary pair. Group labels (hair color,
12
- hair texture) get their own N-way softmax. All scores are now independent
13
- of how many other labels we happen to be asking about.
14
  """
15
 
 
 
16
  from typing import Any
17
 
 
18
  import torch
19
  from PIL import Image
20
- from transformers import CLIPModel, CLIPProcessor
21
 
22
 
23
- CLIP_MODEL_ID = "openai/clip-vit-base-patch32"
 
 
 
24
 
25
  PAIRS = {
26
  "wearing_glasses": ("wearing eyeglasses", "not wearing eyeglasses"),
@@ -59,8 +61,6 @@ PAIRS = {
59
  HAIR_COLOR_LABELS = ["black hair", "blond hair", "brown hair", "gray hair"]
60
  HAIR_TEXTURE_LABELS = ["straight hair", "wavy hair", "curly hair"]
61
 
62
- # Some pairs default to False unless CLIP is confidently past this threshold.
63
- # Stops borderline cases from being flipped to True on a 51/49 split.
64
  ACCESSORY_THRESHOLD = 0.65
65
  ACCESSORY_KEYS = {
66
  "wearing_earrings", "wearing_necklace", "wearing_necktie", "wearing_hat",
@@ -76,32 +76,47 @@ class AttributeAnalyzer:
76
  def __init__(self):
77
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
78
  self.model = None
79
- self.processor = None
80
  try:
81
- self.model = CLIPModel.from_pretrained(CLIP_MODEL_ID).to(self.device).eval()
82
- self.processor = CLIPProcessor.from_pretrained(CLIP_MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  except Exception as exc:
84
- print(f"[AttributeAnalyzer] Failed to load CLIP: {exc}")
85
 
86
  @torch.no_grad()
87
  def analyze(self, img_rgb) -> dict[str, Any]:
88
- if self.model is None or self.processor is None:
89
  return self._empty_result()
90
 
91
  pil = Image.fromarray(img_rgb)
92
-
93
- # Encode image once.
94
- image_inputs = self.processor(images=pil, return_tensors="pt").to(self.device)
95
- image_features = self.model.get_image_features(**image_inputs)
96
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
97
 
98
- # Per-pair scoring: each pair gets its own independent 2-way softmax.
99
  pair_scores: dict[str, float] = {}
100
  for key, (positive, negative) in PAIRS.items():
101
- prompts = [_prompt(positive), _prompt(negative)]
102
- pair_scores[key] = self._softmax_positive(image_features, prompts)
 
103
 
104
- # Group scoring (N-way softmax within each group).
105
  color_scores = self._group_softmax(
106
  image_features, [_prompt(x) for x in HAIR_COLOR_LABELS]
107
  )
@@ -143,10 +158,8 @@ class AttributeAnalyzer:
143
 
144
  @torch.no_grad()
145
  def _softmax_positive(self, image_features: torch.Tensor, prompts: list[str]) -> float:
146
- text_inputs = self.processor(
147
- text=prompts, return_tensors="pt", padding=True
148
- ).to(self.device)
149
- text_features = self.model.get_text_features(**text_inputs)
150
  text_features = text_features / text_features.norm(dim=-1, keepdim=True)
151
  logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
152
  probs = torch.softmax(logits, dim=-1)[0]
@@ -154,10 +167,8 @@ class AttributeAnalyzer:
154
 
155
  @torch.no_grad()
156
  def _group_softmax(self, image_features: torch.Tensor, prompts: list[str]) -> list[float]:
157
- text_inputs = self.processor(
158
- text=prompts, return_tensors="pt", padding=True
159
- ).to(self.device)
160
- text_features = self.model.get_text_features(**text_inputs)
161
  text_features = text_features / text_features.norm(dim=-1, keepdim=True)
162
  logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
163
  probs = torch.softmax(logits, dim=-1)[0]
 
1
  """
2
+ FaRL-based facial attribute classification.
3
+
4
+ Same CLIP ViT-B/16 architecture as before, but loaded with FaRL weights
5
+ (CVPR 2022) which were pretrained on LAION-Face β€” the 50M face-text-pair
6
+ subset of LAION-400M β€” instead of OpenAI's generic web crawl. The encoder
7
+ discriminates facial attributes much better while keeping the prompt-pair
8
+ zero-shot interface intact.
9
+
10
+ Falls back to vanilla OpenAI CLIP ViT-B/16 if the FaRL .pth is missing.
 
 
 
11
  """
12
 
13
+ import os
14
+ from pathlib import Path
15
  from typing import Any
16
 
17
+ import clip
18
  import torch
19
  from PIL import Image
 
20
 
21
 
22
+ CLIP_ARCH = "ViT-B/16"
23
+ FARL_WEIGHTS_PATH = os.environ.get(
24
+ "FARL_WEIGHTS_PATH", "models/FaRL-Base-Patch16-LAIONFace20M-ep64.pth"
25
+ )
26
 
27
  PAIRS = {
28
  "wearing_glasses": ("wearing eyeglasses", "not wearing eyeglasses"),
 
61
  HAIR_COLOR_LABELS = ["black hair", "blond hair", "brown hair", "gray hair"]
62
  HAIR_TEXTURE_LABELS = ["straight hair", "wavy hair", "curly hair"]
63
 
 
 
64
  ACCESSORY_THRESHOLD = 0.65
65
  ACCESSORY_KEYS = {
66
  "wearing_earrings", "wearing_necklace", "wearing_necktie", "wearing_hat",
 
76
  def __init__(self):
77
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
78
  self.model = None
79
+ self.preprocess = None
80
  try:
81
+ model, preprocess = clip.load(CLIP_ARCH, device="cpu")
82
+
83
+ weights_path = Path(FARL_WEIGHTS_PATH)
84
+ if weights_path.exists():
85
+ farl_state = torch.load(weights_path, map_location="cpu")
86
+ state = farl_state.get("state_dict", farl_state)
87
+ missing, unexpected = model.load_state_dict(state, strict=False)
88
+ print(
89
+ f"[AttributeAnalyzer] Loaded FaRL weights from {weights_path} "
90
+ f"(missing={len(missing)}, unexpected={len(unexpected)})"
91
+ )
92
+ else:
93
+ print(
94
+ f"[AttributeAnalyzer] FaRL weights not found at {weights_path}; "
95
+ "falling back to vanilla OpenAI CLIP ViT-B/16"
96
+ )
97
+
98
+ # Force float32 so per-pair softmax math is stable on both CPU and CUDA.
99
+ self.model = model.float().to(self.device).eval()
100
+ self.preprocess = preprocess
101
  except Exception as exc:
102
+ print(f"[AttributeAnalyzer] Failed to load model: {exc}")
103
 
104
  @torch.no_grad()
105
  def analyze(self, img_rgb) -> dict[str, Any]:
106
+ if self.model is None or self.preprocess is None:
107
  return self._empty_result()
108
 
109
  pil = Image.fromarray(img_rgb)
110
+ image_tensor = self.preprocess(pil).unsqueeze(0).to(self.device)
111
+ image_features = self.model.encode_image(image_tensor)
 
 
112
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
113
 
 
114
  pair_scores: dict[str, float] = {}
115
  for key, (positive, negative) in PAIRS.items():
116
+ pair_scores[key] = self._softmax_positive(
117
+ image_features, [_prompt(positive), _prompt(negative)]
118
+ )
119
 
 
120
  color_scores = self._group_softmax(
121
  image_features, [_prompt(x) for x in HAIR_COLOR_LABELS]
122
  )
 
158
 
159
  @torch.no_grad()
160
  def _softmax_positive(self, image_features: torch.Tensor, prompts: list[str]) -> float:
161
+ text_tokens = clip.tokenize(prompts).to(self.device)
162
+ text_features = self.model.encode_text(text_tokens)
 
 
163
  text_features = text_features / text_features.norm(dim=-1, keepdim=True)
164
  logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
165
  probs = torch.softmax(logits, dim=-1)[0]
 
167
 
168
  @torch.no_grad()
169
  def _group_softmax(self, image_features: torch.Tensor, prompts: list[str]) -> list[float]:
170
+ text_tokens = clip.tokenize(prompts).to(self.device)
171
+ text_features = self.model.encode_text(text_tokens)
 
 
172
  text_features = text_features / text_features.norm(dim=-1, keepdim=True)
173
  logits = (image_features @ text_features.T) * self.model.logit_scale.exp()
174
  probs = torch.softmax(logits, dim=-1)[0]
analyzers/parsing_analyzer.py CHANGED
@@ -21,6 +21,7 @@ primary source for lip geometry/color in color_analyzer.
21
  """
22
 
23
  from typing import Any
 
24
 
25
  import cv2
26
  import numpy as np
@@ -62,7 +63,6 @@ class ParsingAnalyzer:
62
  self.processor = None
63
  self.model = None
64
  try:
65
- self.processor = SegformerImageProcessor.from_pretrained(MODEL_ID)
66
  self.model = SegformerForSemanticSegmentation.from_pretrained(MODEL_ID)
67
  self.model.to(self.device).eval()
68
  except Exception as exc:
 
21
  """
22
 
23
  from typing import Any
24
+ import warnings
25
 
26
  import cv2
27
  import numpy as np
 
63
  self.processor = None
64
  self.model = None
65
  try:
 
66
  self.model = SegformerForSemanticSegmentation.from_pretrained(MODEL_ID)
67
  self.model.to(self.device).eval()
68
  except Exception as exc:
architecture.md ADDED
@@ -0,0 +1,1707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HCP Face Analysis β€” Architecture Plan
2
+
3
+ ## Revised Architecture & Best Models for Maximum Feature Coverage
4
+
5
+ Since the codebase is flexible and can use more languages and frameworks, we go beyond the Supabase Edge Function constraint to find the **absolute best models** for the full feature list.
6
+
7
+ ---
8
+
9
+ ## Recommended Architecture: Python Microservice Sidecar
10
+
11
+ ```
12
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
13
+ β”‚ CURRENT STACK β”‚
14
+ β”‚ Next.js Frontend ──► Supabase (Auth, DB, Storage) β”‚
15
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
16
+ β”‚
17
+ β–Ό
18
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
19
+ β”‚ NEW: Python Face Analysis Microservice β”‚
20
+ β”‚ (Railway.app / Render.com / Hugging Face Spaces) β”‚
21
+ β”‚ FREE TIER: 512MB RAM, shared CPU β”‚
22
+ β”‚ β”‚
23
+ β”‚ FastAPI Server β”‚
24
+ β”‚ β”œβ”€β”€ MediaPipe Face Landmarker (478 landmarks, 4MB) β”‚
25
+ β”‚ β”œβ”€β”€ InsightFace Buffalo_SC (recognition + attrs, 30MB) β”‚
26
+ β”‚ β”œβ”€β”€ FairFace (age/gender/race, 90MB) β”‚
27
+ β”‚ β”œβ”€β”€ HuggingFace ViT models (attributes, ~50MB each) β”‚
28
+ β”‚ β”œβ”€β”€ BiSeNet (face parsing/segmentation, 50MB) β”‚
29
+ β”‚ └── Custom geometric analysis (your feature list) β”‚
30
+ β”‚ β”‚
31
+ β”‚ Total: ~250MB models (loaded lazily) β”‚
32
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
33
+ ```
34
+
35
+ **Why this is better:** Python gives access to the **entire deep learning ecosystem** β€” every model on HuggingFace, every research paper's pretrained weights. Free-tier hosting on Railway/Render gives 512MB RAM and enough CPU for per-request inference.
36
+
37
+ ---
38
+
39
+ ## Best Models Per Feature Category
40
+
41
+ ### Tier 1: Core Models (Must Have)
42
+
43
+ #### 1. MediaPipe Face Landmarker β€” Geometric Features
44
+ - **478 3D landmarks + 52 blendshapes**
45
+ - **Size:** 4MB
46
+ - **Covers:** Face shape, jawline, chin, cheekbones, forehead, eye shape, eye spacing, eye size, eyebrow shape, nose shape, lip shape, mouth width, dimples, facial asymmetry
47
+ - **GitHub:** https://github.com/google-ai-edge/mediapipe
48
+ - **Python:** `pip install mediapipe`
49
+ - **Accuracy:** State-of-the-art landmark detection, handles 30Β° head rotation well
50
+
51
+ #### 2. InsightFace Buffalo_SC β€” Lightweight Recognition + Age/Gender
52
+ - **Size:** ~30MB (smallest Buffalo variant)
53
+ - **LFW Accuracy:** 99.5%
54
+ - **Covers:** Face detection, age, gender, face embedding (for recognition), 2D landmarks
55
+ - **GitHub:** https://github.com/deepinsight/insightface
56
+ - **Weights:** Auto-downloaded via `insightface.app.FaceAnalysis(name='buffalo_sc')`
57
+ - **Why not Buffalo_L:** 320MB is overkill; Buffalo_SC is 90% as accurate at 1/10th the size
58
+
59
+ #### 3. FairFace β€” Age, Gender, Race (Most Accurate)
60
+ - **Size:** ~90MB (ResNet-34)
61
+ - **Accuracy:** 93.4% race, 94.2% gender, MAE 3.4 years for age
62
+ - **Covers:** Age (9 buckets), gender, race (7 categories: White, Black, Latino, East Asian, Southeast Asian, Indian, Middle Eastern)
63
+ - **GitHub:** https://github.com/dchen236/FairFace
64
+ - **Weights:** https://drive.google.com/file/d/1xSfJQWMhm3AVlJYcPcabGO_bj1kDB0xw (res34_fair_align_multi_7_20190809.pt)
65
+ - **Why over InsightFace for this:** FairFace is specifically trained for fair demographic classification across races, not biased toward any group
66
+
67
+ #### 4. HSEmotion (EfficientNet) β€” Emotion Recognition
68
+ - **Size:** ~20MB
69
+ - **Accuracy:** 66.5% on AffectNet-8 (state-of-the-art), 8 emotions
70
+ - **Covers:** Angry, contempt, disgust, fear, happy, neutral, sad, surprise
71
+ - **GitHub:** https://github.com/HSE-asavchenko/face-emotion-recognition
72
+ - **Weights:** Available via `timm` or direct download from repo
73
+ - **Why over face-api.js:** Significantly more accurate, trained on AffectNet (largest emotion dataset)
74
+
75
+ ### Tier 2: Specialized Models
76
+
77
+ #### 5. BiSeNet Face Parsing β€” Facial Segmentation
78
+ - **Size:** ~50MB
79
+ - **Covers:** Skin region, left/right eyebrow, left/right eye, nose, upper/lower lip, inner mouth, hair, left/right ear, neck, cloth, hat, earrings, glasses, background
80
+ - **GitHub:** https://github.com/zllrunning/face-parsing.PyTorch
81
+ - **Weights:** https://drive.google.com/file/d/154JgKpzCPW82qINcVieuPH3fZ2e0P812
82
+ - **Why this matters:** Precisely segments hair, skin, eyebrows for color analysis, facial hair detection, glasses detection, and wrinkle analysis
83
+
84
+ #### 6. microsoft/swin-base-patch4-window7-224-in22k fine-tuned for facial attributes
85
+ - **HuggingFace:** Various CelebA-trained attribute classifiers
86
+ - Specifically: https://huggingface.co/nateraw/vit-age-classifier (age)
87
+ - Specifically: https://huggingface.co/rizvandwiki/gender-classification-2 (gender)
88
+
89
+ #### 7. CelebA Attribute Classifier (Custom Multi-Label)
90
+ - **Dataset:** CelebA has 40 binary attributes already labeled
91
+ - Train a lightweight EfficientNet-B0 (~20MB) on CelebA for:
92
+ - `Attractive`, `Bald`, `Bangs`, `Big_Lips`, `Big_Nose`, `Black_Hair`, `Blond_Hair`, `Brown_Hair`, `Bushy_Eyebrows`, `Chubby`, `Double_Chin`, `Eyeglasses`, `Goatee`, `Gray_Hair`, `Heavy_Makeup`, `High_Cheekbones`, `Male`, `Mouth_Slightly_Open`, `Mustache`, `Narrow_Eyes`, `No_Beard`, `Oval_Face`, `Pointy_Nose`, `Receding_Hairline`, `Sideburns`, `Smiling`, `Straight_Hair`, `Wavy_Hair`, `Wearing_Hat`, `Young`
93
+ - **Pre-trained option:** https://github.com/dchen236/FairFace has CelebA-trained models
94
+ - **Better pre-trained option:** https://huggingface.co/jnferreira/attribute-prediction-celebA
95
+
96
+ #### 8. Hair Segmentation + Color Analysis
97
+ - **Model:** MODNet for matting + BiSeNet for hair segmentation
98
+ - **GitHub (MODNet):** https://github.com/ZHKKKe/MODNet (~25MB)
99
+ - Post-segmentation: K-means clustering on hair pixels for color
100
+
101
+ #### 9. Skin Analysis (Wrinkles, Acne, etc.)
102
+ - **Model:** https://huggingface.co/imfarzanansari/skin-disease-detection (for acne/skin conditions)
103
+ - **For wrinkles:** Edge detection (Canny/Sobel) on forehead/eye regions from BiSeNet parsing β€” no model needed
104
+ - **For freckles/moles:** Blob detection on skin regions from BiSeNet parsing
105
+
106
+ ---
107
+
108
+ ## Complete Feature Coverage Map
109
+
110
+ | Feature | Model/Method | Confidence |
111
+ |---------|-------------|------------|
112
+ | **Face shape** (oval, round, square, heart, diamond, oblong, triangle) | MediaPipe landmarks geometric ratios + CelebA (`Oval_Face`) | ⭐⭐⭐⭐ |
113
+ | **Jawline** (sharp, soft, strong) | MediaPipe jaw landmark angles | ⭐⭐⭐⭐ |
114
+ | **Chin** (receding, pointed, cleft, wide) | MediaPipe chin landmarks + depth (z) | ⭐⭐⭐ |
115
+ | **Cheekbones** (high, flat, full, hollow) | MediaPipe landmark z-depth + CelebA (`High_Cheekbones`, `Chubby`) | ⭐⭐⭐⭐ |
116
+ | **Forehead** (broad, narrow) | MediaPipe forehead span ratio | ⭐⭐⭐⭐ |
117
+ | **Eye shape** (almond, round, hooded, monolid, upturned, downturned) | MediaPipe eyelid curvature + corner angles | ⭐⭐⭐⭐ |
118
+ | **Eye spacing** (wide-set, close-set) | MediaPipe interpupillary distance ratio | ⭐⭐⭐⭐⭐ |
119
+ | **Eye size** (large, small) | MediaPipe eye area / face area | ⭐⭐⭐⭐⭐ |
120
+ | **Deep-set / protruding eyes** | MediaPipe landmark z-depth at eye region | ⭐⭐⭐ |
121
+ | **Eye color** (brown, blue, green, hazel) | Iris crop β†’ HSV color histogram + KNN | ⭐⭐⭐⭐ |
122
+ | **Dark under-eyes / eye bags** | BiSeNet skin parsing β†’ brightness analysis under eyes | ⭐⭐⭐ |
123
+ | **Crow's feet** | Canny edge detection on BiSeNet-parsed outer eye skin | ⭐⭐⭐ |
124
+ | **Eyebrow shape** (arched, straight, bushy, thick, thin) | MediaPipe brow landmarks + CelebA (`Bushy_Eyebrows`, `Arched_Eyebrows`) | ⭐⭐⭐⭐ |
125
+ | **Unibrow** | MediaPipe inner brow distance + pixel analysis between brows | ⭐⭐⭐⭐ |
126
+ | **Nose shape** (straight, aquiline, button, upturned, wide, narrow) | MediaPipe nose landmarks + CelebA (`Big_Nose`, `Pointy_Nose`) | ⭐⭐⭐⭐ |
127
+ | **Nose bridge** (flat, high) | MediaPipe z-depth at nasal bridge | ⭐⭐⭐ |
128
+ | **Nostrils** (wide, narrow) | MediaPipe nostril landmark width ratio | ⭐⭐⭐⭐ |
129
+ | **Lips** (full, thin) | MediaPipe lip landmarks + CelebA (`Big_Lips`) | ⭐⭐⭐⭐ |
130
+ | **Mouth width** | MediaPipe mouth corner distance ratio | ⭐⭐⭐⭐⭐ |
131
+ | **Cupid's bow** | MediaPipe upper lip curvature analysis | ⭐⭐⭐ |
132
+ | **Teeth** (gap, crooked, straight, overbite, underbite) | Mouth crop when smiling β†’ custom classifier or rule-based | ⭐⭐ |
133
+ | **Dimples** | MediaPipe blendshapes during smile + cheek region analysis | ⭐⭐⭐ |
134
+ | **Smile lines** | Edge detection on nasolabial region | ⭐⭐⭐ |
135
+ | **Asymmetrical smile** | MediaPipe left/right smile blendshape difference | ⭐⭐⭐⭐ |
136
+ | **Hair type** (straight, wavy, curly, coily) | BiSeNet hair segmentation β†’ texture frequency (FFT) + CelebA (`Straight_Hair`, `Wavy_Hair`) | ⭐⭐⭐ |
137
+ | **Hair length** (short, long, bald) | BiSeNet hair mask area + CelebA (`Bald`, `Bangs`) | ⭐⭐⭐⭐ |
138
+ | **Hair color** (black, brown, blonde, red, gray, dyed) | BiSeNet hair mask β†’ K-means color clustering + CelebA (`Black_Hair`, `Brown_Hair`, `Blond_Hair`, `Gray_Hair`) | ⭐⭐⭐⭐ |
139
+ | **Receding hairline / widow's peak** | BiSeNet hair boundary analysis + CelebA (`Receding_Hairline`) | ⭐⭐⭐ |
140
+ | **Beard/facial hair** (full, stubble, goatee, mustache, sideburns, clean-shaven) | BiSeNet parsing lower face + CelebA (`5_o_Clock_Shadow`, `Goatee`, `Mustache`, `No_Beard`, `Sideburns`) | ⭐⭐⭐⭐ |
141
+ | **Skin tone** (light, medium, dark) | BiSeNet skin parsing β†’ mean LAB brightness | ⭐⭐⭐⭐⭐ |
142
+ | **Freckles** | BiSeNet skin mask β†’ small blob detection (contrast) | ⭐⭐⭐ |
143
+ | **Moles / birthmark** | BiSeNet skin mask β†’ dark blob detection | ⭐⭐⭐ |
144
+ | **Scars** | BiSeNet skin mask β†’ linear edge anomaly detection | ⭐⭐ |
145
+ | **Acne** | BiSeNet skin mask β†’ red blob detection or HuggingFace skin model | ⭐⭐⭐ |
146
+ | **Wrinkles / forehead lines** | BiSeNet forehead mask β†’ Gabor filter or Canny edges | ⭐⭐⭐ |
147
+ | **Facial asymmetry** | MediaPipe left/right landmark mirror distance | ⭐⭐⭐⭐⭐ |
148
+ | **Prominent Adam's apple** | Neck region detection (limited accuracy) | ⭐ |
149
+ | **Glasses** | CelebA (`Eyeglasses`) + BiSeNet parsing | ⭐⭐⭐⭐⭐ |
150
+ | **Age** | FairFace (MAE 3.4 years) | ⭐⭐⭐⭐⭐ |
151
+ | **Gender** | FairFace (94.2%) | ⭐⭐⭐⭐⭐ |
152
+ | **Race** | FairFace (93.4%, 7 categories) | ⭐⭐⭐⭐⭐ |
153
+ | **Emotion** | HSEmotion (66.5% AffectNet-8, SOTA) | ⭐⭐⭐⭐ |
154
+
155
+ ---
156
+
157
+ ## Model Comparison Table
158
+
159
+ | Model | Accuracy (LFW) | Size | Runs in Deno/Browser? | Feature Depth | Notes |
160
+ |-------|----------------|------|----------------------|---------------|-------|
161
+ | **DeepFace** (Python) | 97.4% (VGG-Face) | 500MB+ | ❌ No (Python only) | Age, gender, race, emotion | Too large, wrong runtime |
162
+ | **InsightFace Buffalo_L** | 99.8% (LFW) | ~320MB | ❌ No (Python/C++) | Landmarks, age, gender | Too large for edge |
163
+ | **InsightFace MobileFaceNet** | 99.5% (LFW) | ~4MB | ⚠️ ONNX possible | Recognition only, no attributes | Very small but limited features |
164
+ | **MediaPipe Face Landmarker** | N/A (landmark model) | ~4MB | βœ… Yes (TFJS/WASM) | 478 landmarks, blendshapes | Best for geometric features |
165
+ | **face-api.js** | 99.2% (LFW) | ~6MB (all models) | βœ… Yes (TFJS) | Age, gender, emotion, 68 landmarks | Browser/Node.js ready |
166
+ | **ONNX FER+ (emotion)** | ~85% (FER2013) | ~2MB | βœ… Yes (ONNX.js) | Emotion only | Supplement model |
167
+ | **HuggingFace ViT models** | Varies | 50-350MB | ⚠️ ONNX export possible | Age, gender, various classifiers | Some fit under 50MB |
168
+
169
+ ---
170
+
171
+ ## Free Hosting Options for the Python Microservice
172
+
173
+ | Platform | Free Tier | RAM | Cold Start | Best For |
174
+ |----------|-----------|-----|------------|----------|
175
+ | **Hugging Face Spaces** | Unlimited | 2GB CPU | ~15s | Best free option, runs Gradio/FastAPI |
176
+ | **Railway.app** | $5 credit/month | 512MB | ~5s | Good for always-on API |
177
+ | **Render.com** | 750 hrs/month | 512MB | ~30s | Spins down after 15min inactivity |
178
+ | **Google Cloud Run** | 2M requests/month | 512MB | ~10s | Best scaling, pay-per-request |
179
+ | **Fly.io** | 3 shared VMs | 256MB | ~3s | Low latency, always on |
180
+
181
+ **Recommendation: Hugging Face Spaces** β€” 2GB RAM free, pre-installed ML libraries, no cold start limits, and you can use their Inference API for some models without even hosting.
182
+
183
+ ---
184
+
185
+ ## Full Implementation
186
+
187
+ ### Python Microservice
188
+
189
+ #### requirements.txt
190
+
191
+ ```
192
+ fastapi==0.115.0
193
+ uvicorn==0.30.0
194
+ python-multipart==0.0.9
195
+ mediapipe==0.10.14
196
+ insightface==0.7.3
197
+ onnxruntime==1.18.0
198
+ torch==2.3.0
199
+ torchvision==0.18.0
200
+ Pillow==10.4.0
201
+ numpy==1.26.4
202
+ opencv-python-headless==4.10.0.84
203
+ scipy==1.13.0
204
+ scikit-learn==1.5.0
205
+ huggingface-hub==0.23.0
206
+ ```
207
+
208
+ #### face-service/app.py
209
+
210
+ ```python
211
+ """
212
+ Face Analysis Microservice
213
+ Combines multiple models for comprehensive facial attribute detection.
214
+ """
215
+
216
+ import io
217
+ import logging
218
+ from typing import Optional
219
+
220
+ import cv2
221
+ import numpy as np
222
+ from fastapi import FastAPI, File, HTTPException, UploadFile
223
+ from fastapi.middleware.cors import CORSMiddleware
224
+ from PIL import Image
225
+
226
+ from analyzers.landmark_analyzer import LandmarkAnalyzer
227
+ from analyzers.demographic_analyzer import DemographicAnalyzer
228
+ from analyzers.attribute_analyzer import AttributeAnalyzer
229
+ from analyzers.parsing_analyzer import ParsingAnalyzer
230
+ from analyzers.emotion_analyzer import EmotionAnalyzer
231
+ from analyzers.color_analyzer import ColorAnalyzer
232
+
233
+ logging.basicConfig(level=logging.INFO)
234
+ logger = logging.getLogger(__name__)
235
+
236
+ app = FastAPI(title="Face Analysis Service", version="2.0.0")
237
+
238
+ app.add_middleware(
239
+ CORSMiddleware,
240
+ allow_origins=["*"], # Restrict in production
241
+ allow_credentials=True,
242
+ allow_methods=["*"],
243
+ allow_headers=["*"],
244
+ )
245
+
246
+ # Initialize analyzers lazily
247
+ landmark_analyzer: Optional[LandmarkAnalyzer] = None
248
+ demographic_analyzer: Optional[DemographicAnalyzer] = None
249
+ attribute_analyzer: Optional[AttributeAnalyzer] = None
250
+ parsing_analyzer: Optional[ParsingAnalyzer] = None
251
+ emotion_analyzer: Optional[EmotionAnalyzer] = None
252
+ color_analyzer: Optional[ColorAnalyzer] = None
253
+
254
+
255
+ def get_analyzers():
256
+ global landmark_analyzer, demographic_analyzer, attribute_analyzer
257
+ global parsing_analyzer, emotion_analyzer, color_analyzer
258
+
259
+ if landmark_analyzer is None:
260
+ logger.info("Loading MediaPipe landmarks...")
261
+ landmark_analyzer = LandmarkAnalyzer()
262
+
263
+ if demographic_analyzer is None:
264
+ logger.info("Loading FairFace demographics...")
265
+ demographic_analyzer = DemographicAnalyzer()
266
+
267
+ if attribute_analyzer is None:
268
+ logger.info("Loading CelebA attribute classifier...")
269
+ attribute_analyzer = AttributeAnalyzer()
270
+
271
+ if parsing_analyzer is None:
272
+ logger.info("Loading BiSeNet face parser...")
273
+ parsing_analyzer = ParsingAnalyzer()
274
+
275
+ if emotion_analyzer is None:
276
+ logger.info("Loading HSEmotion...")
277
+ emotion_analyzer = EmotionAnalyzer()
278
+
279
+ if color_analyzer is None:
280
+ color_analyzer = ColorAnalyzer()
281
+
282
+ return (
283
+ landmark_analyzer,
284
+ demographic_analyzer,
285
+ attribute_analyzer,
286
+ parsing_analyzer,
287
+ emotion_analyzer,
288
+ color_analyzer,
289
+ )
290
+
291
+
292
+ @app.get("/health")
293
+ async def health():
294
+ return {"status": "ok"}
295
+
296
+
297
+ @app.post("/analyze")
298
+ async def analyze_face(file: UploadFile = File(...)):
299
+ """Comprehensive face analysis endpoint."""
300
+ try:
301
+ contents = await file.read()
302
+ image = Image.open(io.BytesIO(contents)).convert("RGB")
303
+ img_array = np.array(image)
304
+ img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
305
+
306
+ (
307
+ landmarks,
308
+ demographics,
309
+ attributes,
310
+ parsing,
311
+ emotions,
312
+ colors,
313
+ ) = get_analyzers()
314
+
315
+ results = {}
316
+
317
+ # 1. MediaPipe Landmarks β†’ geometric features
318
+ logger.info("Running landmark analysis...")
319
+ landmark_results = landmarks.analyze(img_array)
320
+ results.update(landmark_results)
321
+
322
+ # 2. FairFace β†’ age, gender, race
323
+ logger.info("Running demographic analysis...")
324
+ demo_results = demographics.analyze(img_array)
325
+ results.update(demo_results)
326
+
327
+ # 3. CelebA attributes β†’ 40 binary facial attributes
328
+ logger.info("Running attribute analysis...")
329
+ attr_results = attributes.analyze(img_array)
330
+ results.update(attr_results)
331
+
332
+ # 4. BiSeNet face parsing β†’ segmentation masks
333
+ logger.info("Running face parsing...")
334
+ parse_results = parsing.analyze(img_bgr)
335
+ results.update(parse_results)
336
+
337
+ # 5. HSEmotion β†’ emotion classification
338
+ logger.info("Running emotion analysis...")
339
+ emo_results = emotions.analyze(img_array)
340
+ results.update(emo_results)
341
+
342
+ # 6. Color analysis using parsing masks
343
+ logger.info("Running color analysis...")
344
+ color_results = colors.analyze(
345
+ img_array,
346
+ skin_mask=parse_results.get("_skin_mask"),
347
+ hair_mask=parse_results.get("_hair_mask"),
348
+ landmark_data=landmark_results.get("_raw_landmarks"),
349
+ )
350
+ results.update(color_results)
351
+
352
+ # Remove internal fields
353
+ results = {k: v for k, v in results.items() if not k.startswith("_")}
354
+
355
+ return {"success": True, "data": results}
356
+
357
+ except Exception as e:
358
+ logger.error(f"Analysis failed: {e}", exc_info=True)
359
+ raise HTTPException(status_code=500, detail=str(e))
360
+ ```
361
+
362
+ #### face-service/analyzers/landmark_analyzer.py
363
+
364
+ ```python
365
+ """
366
+ MediaPipe Face Landmarker β€” 478 3D landmarks + 52 blendshapes
367
+ Derives geometric facial features from landmark positions.
368
+ """
369
+
370
+ import math
371
+ from typing import Any
372
+
373
+ import mediapipe as mp
374
+ import numpy as np
375
+ from mediapipe.tasks import python as mp_python
376
+ from mediapipe.tasks.python import vision
377
+
378
+
379
+ class LandmarkAnalyzer:
380
+ def __init__(self):
381
+ base_options = mp_python.BaseOptions(
382
+ model_asset_path=self._download_model()
383
+ )
384
+ options = vision.FaceLandmarkerOptions(
385
+ base_options=base_options,
386
+ output_face_blendshapes=True,
387
+ output_facial_transformation_matrixes=True,
388
+ num_faces=1,
389
+ )
390
+ self.detector = vision.FaceLandmarker.create_from_options(options)
391
+
392
+ def _download_model(self) -> str:
393
+ import urllib.request
394
+ import os
395
+
396
+ model_path = "models/face_landmarker.task"
397
+ if not os.path.exists(model_path):
398
+ os.makedirs("models", exist_ok=True)
399
+ url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
400
+ urllib.request.urlretrieve(url, model_path)
401
+ return model_path
402
+
403
+ def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
404
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
405
+ result = self.detector.detect(mp_image)
406
+
407
+ if not result.face_landmarks:
408
+ return {"error": "No face detected by MediaPipe"}
409
+
410
+ landmarks = result.face_landmarks[0]
411
+ lm = [{"x": l.x, "y": l.y, "z": l.z} for l in landmarks]
412
+
413
+ blendshapes = {}
414
+ if result.face_blendshapes:
415
+ for bs in result.face_blendshapes[0]:
416
+ blendshapes[bs.category_name] = round(bs.score, 4)
417
+
418
+ attrs = {}
419
+ attrs["_raw_landmarks"] = lm
420
+
421
+ # === Face Shape ===
422
+ face_height = self._dist(lm[10], lm[152])
423
+ face_width = self._dist(lm[234], lm[454])
424
+ jaw_width = self._dist(lm[172], lm[397])
425
+ cheekbone_width = self._dist(lm[93], lm[323])
426
+ forehead_width = self._dist(lm[54], lm[284])
427
+
428
+ wh_ratio = face_width / face_height if face_height > 0 else 1
429
+ jaw_to_face = jaw_width / face_width if face_width > 0 else 1
430
+ forehead_to_jaw = forehead_width / jaw_width if jaw_width > 0 else 1
431
+ cheek_to_jaw = cheekbone_width / jaw_width if jaw_width > 0 else 1
432
+
433
+ if wh_ratio > 0.85 and jaw_to_face > 0.75:
434
+ attrs["face_shape"] = "round"
435
+ elif wh_ratio > 0.8 and jaw_to_face > 0.8 and forehead_to_jaw < 1.1:
436
+ attrs["face_shape"] = "square"
437
+ elif wh_ratio < 0.75:
438
+ attrs["face_shape"] = "oblong"
439
+ elif forehead_to_jaw > 1.3:
440
+ attrs["face_shape"] = "heart"
441
+ elif cheek_to_jaw > 1.25 and forehead_to_jaw < 1.15:
442
+ attrs["face_shape"] = "diamond"
443
+ elif forehead_to_jaw < 0.85:
444
+ attrs["face_shape"] = "triangle"
445
+ else:
446
+ attrs["face_shape"] = "oval"
447
+
448
+ attrs["face_shape_metrics"] = {
449
+ "width_height_ratio": round(wh_ratio, 3),
450
+ "jaw_to_face_ratio": round(jaw_to_face, 3),
451
+ "forehead_to_jaw_ratio": round(forehead_to_jaw, 3),
452
+ "cheekbone_to_jaw_ratio": round(cheek_to_jaw, 3),
453
+ }
454
+
455
+ # === Forehead ===
456
+ forehead_ratio = forehead_width / face_width if face_width > 0 else 0.6
457
+ attrs["forehead_width"] = (
458
+ "broad" if forehead_ratio > 0.7
459
+ else "narrow" if forehead_ratio < 0.55
460
+ else "average"
461
+ )
462
+
463
+ # === Jawline ===
464
+ jaw_angle = self._jaw_angle(lm)
465
+ attrs["jawline_angle"] = round(jaw_angle, 1)
466
+ if jaw_angle < 110:
467
+ attrs["jawline_type"] = "sharp"
468
+ elif jaw_angle > 140:
469
+ attrs["jawline_type"] = "soft"
470
+ elif jaw_to_face > 0.75:
471
+ attrs["jawline_type"] = "strong"
472
+ else:
473
+ attrs["jawline_type"] = "soft"
474
+
475
+ # === Chin ===
476
+ chin_width = self._dist(lm[175], lm[396])
477
+ chin_ratio = chin_width / jaw_width if jaw_width > 0 else 0.4
478
+ attrs["chin_type"] = (
479
+ "pointed" if chin_ratio < 0.3
480
+ else "wide" if chin_ratio > 0.5
481
+ else "normal"
482
+ )
483
+
484
+ # === Cheekbones ===
485
+ cheek_z = (lm[93]["z"] + lm[323]["z"]) / 2
486
+ attrs["cheekbone_prominence"] = (
487
+ "high" if cheek_z < -0.04
488
+ else "flat" if cheek_z > 0.0
489
+ else "moderate"
490
+ )
491
+
492
+ # Hollow vs full cheeks (blendshape-assisted)
493
+ cheek_puff = blendshapes.get("cheekPuff", 0)
494
+ cheek_squint_l = blendshapes.get("cheekSquintLeft", 0)
495
+ cheek_squint_r = blendshapes.get("cheekSquintRight", 0)
496
+ if cheek_puff > 0.3:
497
+ attrs["cheek_fullness"] = "full"
498
+ elif cheek_z > -0.01:
499
+ attrs["cheek_fullness"] = "hollow"
500
+ else:
501
+ attrs["cheek_fullness"] = "normal"
502
+
503
+ # === Eyes ===
504
+ left_eye_top = lm[159]
505
+ left_eye_bottom = lm[145]
506
+ left_eye_inner = lm[133]
507
+ left_eye_outer = lm[33]
508
+ eye_openness = self._dist(left_eye_top, left_eye_bottom)
509
+ eye_width_val = self._dist(left_eye_inner, left_eye_outer)
510
+ eye_ratio = eye_openness / eye_width_val if eye_width_val > 0 else 0.3
511
+
512
+ outer_angle = left_eye_outer["y"] - left_eye_inner["y"]
513
+ if outer_angle < -0.012:
514
+ attrs["eye_shape"] = "upturned"
515
+ elif outer_angle > 0.012:
516
+ attrs["eye_shape"] = "downturned"
517
+ elif eye_ratio > 0.38:
518
+ attrs["eye_shape"] = "round"
519
+ elif eye_ratio < 0.2:
520
+ attrs["eye_shape"] = "hooded"
521
+ else:
522
+ attrs["eye_shape"] = "almond"
523
+
524
+ # Deep-set vs protruding
525
+ eye_z = (lm[159]["z"] + lm[145]["z"]) / 2
526
+ nose_bridge_z = lm[6]["z"]
527
+ if eye_z > nose_bridge_z + 0.02:
528
+ attrs["eye_depth"] = "deep-set"
529
+ elif eye_z < nose_bridge_z - 0.01:
530
+ attrs["eye_depth"] = "protruding"
531
+ else:
532
+ attrs["eye_depth"] = "normal"
533
+
534
+ # Eye spacing
535
+ if len(lm) > 473: # Iris landmarks available
536
+ inter_pupillary = self._dist(lm[468], lm[473])
537
+ else:
538
+ inter_pupillary = self._dist(lm[133], lm[362])
539
+ ip_ratio = inter_pupillary / face_width if face_width > 0 else 0.35
540
+ attrs["eye_spacing"] = (
541
+ "wide-set" if ip_ratio > 0.38
542
+ else "close-set" if ip_ratio < 0.28
543
+ else "average"
544
+ )
545
+
546
+ # Eye size
547
+ right_eye_top = lm[386]
548
+ right_eye_bottom = lm[374]
549
+ right_eye_inner = lm[362]
550
+ right_eye_outer = lm[263]
551
+ r_eye_area = self._dist(right_eye_top, right_eye_bottom) * self._dist(right_eye_inner, right_eye_outer)
552
+ l_eye_area = eye_openness * eye_width_val
553
+ avg_eye_area = (l_eye_area + r_eye_area) / 2
554
+ face_area = face_width * face_height
555
+ eye_size_ratio = avg_eye_area / face_area if face_area > 0 else 0.015
556
+ attrs["eye_size"] = (
557
+ "large" if eye_size_ratio > 0.02
558
+ else "small" if eye_size_ratio < 0.012
559
+ else "average"
560
+ )
561
+
562
+ # Eye blink (closed vs open)
563
+ blink_l = blendshapes.get("eyeBlinkLeft", 0)
564
+ blink_r = blendshapes.get("eyeBlinkRight", 0)
565
+ attrs["eyes_open"] = (blink_l + blink_r) / 2 < 0.5
566
+
567
+ # === Eyebrows ===
568
+ brow_mid_l = lm[105]
569
+ brow_outer_l = lm[46]
570
+ brow_inner_l = lm[70]
571
+ brow_to_eye = self._dist(brow_mid_l, lm[159])
572
+ brow_arch_ratio = brow_to_eye / eye_openness if eye_openness > 0 else 1.5
573
+
574
+ attrs["eyebrow_arch_height"] = (
575
+ "high" if brow_arch_ratio > 2.2
576
+ else "low" if brow_arch_ratio < 1.3
577
+ else "average"
578
+ )
579
+
580
+ # Brow curvature
581
+ mid_y = brow_mid_l["y"]
582
+ avg_end_y = (brow_inner_l["y"] + brow_outer_l["y"]) / 2
583
+ curvature = mid_y - avg_end_y
584
+ if abs(curvature) < 0.003:
585
+ attrs["eyebrow_shape"] = "straight"
586
+ elif curvature < -0.008:
587
+ attrs["eyebrow_shape"] = "arched"
588
+ else:
589
+ attrs["eyebrow_shape"] = "flat"
590
+
591
+ # Eyebrow thickness (vertical span of brow landmarks)
592
+ brow_top = lm[66] # Top of left brow
593
+ brow_bottom = lm[105] # Bottom of left brow
594
+ brow_thickness = self._dist(brow_top, brow_bottom)
595
+ attrs["eyebrow_thickness"] = (
596
+ "thick" if brow_thickness > 0.015
597
+ else "thin" if brow_thickness < 0.008
598
+ else "medium"
599
+ )
600
+
601
+ # Unibrow detection
602
+ inner_brow_dist = self._dist(lm[70], lm[300])
603
+ attrs["possible_unibrow"] = inner_brow_dist < 0.04
604
+
605
+ # === Nose ===
606
+ nose_bridge_top = lm[6]
607
+ nose_tip = lm[1]
608
+ nose_bottom = lm[2]
609
+ left_nostril = lm[129]
610
+ right_nostril = lm[358]
611
+ nostril_w = self._dist(left_nostril, right_nostril)
612
+
613
+ nw_ratio = nostril_w / face_width if face_width > 0 else 0.24
614
+ attrs["nostril_width"] = (
615
+ "wide" if nw_ratio > 0.28
616
+ else "narrow" if nw_ratio < 0.2
617
+ else "average"
618
+ )
619
+
620
+ tip_angle = nose_tip["y"] - nose_bottom["y"]
621
+ if tip_angle < -0.005:
622
+ attrs["nose_shape"] = "upturned"
623
+ elif tip_angle > 0.01:
624
+ attrs["nose_shape"] = "aquiline"
625
+ elif nw_ratio > 0.28:
626
+ attrs["nose_shape"] = "wide"
627
+ elif nw_ratio < 0.2:
628
+ attrs["nose_shape"] = "narrow"
629
+ else:
630
+ attrs["nose_shape"] = "straight"
631
+
632
+ attrs["nose_bridge"] = (
633
+ "high" if nose_bridge_top["z"] < -0.05
634
+ else "flat" if nose_bridge_top["z"] > 0.0
635
+ else "average"
636
+ )
637
+
638
+ attrs["nose_tip_shape"] = (
639
+ "pointed" if nose_tip["z"] < nose_bottom["z"] - 0.01
640
+ else "rounded"
641
+ )
642
+
643
+ # === Lips & Mouth ===
644
+ upper_lip_top = lm[0]
645
+ upper_lip_bottom = lm[13]
646
+ lower_lip_top = lm[14]
647
+ lower_lip_bottom = lm[17]
648
+ mouth_left = lm[61]
649
+ mouth_right = lm[291]
650
+
651
+ upper_lip_h = self._dist(upper_lip_top, upper_lip_bottom)
652
+ lower_lip_h = self._dist(lower_lip_top, lower_lip_bottom)
653
+ total_lip_h = upper_lip_h + lower_lip_h
654
+ mouth_w = self._dist(mouth_left, mouth_right)
655
+
656
+ lip_ratio = total_lip_h / mouth_w if mouth_w > 0 else 0.3
657
+ attrs["lip_fullness"] = (
658
+ "full" if lip_ratio > 0.38
659
+ else "thin" if lip_ratio < 0.22
660
+ else "average"
661
+ )
662
+
663
+ attrs["lip_balance"] = (
664
+ "top-heavy" if upper_lip_h > lower_lip_h * 1.2
665
+ else "bottom-heavy" if lower_lip_h > upper_lip_h * 1.2
666
+ else "balanced"
667
+ )
668
+
669
+ mw_ratio = mouth_w / face_width if face_width > 0 else 0.37
670
+ attrs["mouth_width"] = (
671
+ "wide" if mw_ratio > 0.42
672
+ else "small" if mw_ratio < 0.32
673
+ else "average"
674
+ )
675
+
676
+ # Cupid's bow
677
+ cupid_left = lm[37]
678
+ cupid_center = lm[0]
679
+ cupid_right = lm[267]
680
+ bow_depth = cupid_center["y"] - (cupid_left["y"] + cupid_right["y"]) / 2
681
+ attrs["cupids_bow"] = (
682
+ "defined" if bow_depth > 0.005
683
+ else "subtle" if bow_depth > 0.002
684
+ else "flat"
685
+ )
686
+
687
+ # Smile
688
+ smile_l = blendshapes.get("mouthSmileLeft", 0)
689
+ smile_r = blendshapes.get("mouthSmileRight", 0)
690
+ attrs["smiling"] = (smile_l + smile_r) / 2 > 0.4
691
+ attrs["smile_asymmetry"] = round(abs(smile_l - smile_r), 3)
692
+
693
+ # Dimples (heuristic: strong smile with low cheek puff)
694
+ attrs["possible_dimples"] = (
695
+ (smile_l > 0.5 or smile_r > 0.5) and cheek_puff < 0.2
696
+ )
697
+
698
+ # === Facial Asymmetry ===
699
+ symmetry_pairs = [
700
+ (33, 263), (133, 362), (70, 300), (93, 323), (172, 397),
701
+ (61, 291), (159, 386), (145, 374), (46, 276),
702
+ ]
703
+ asymmetry_sum = 0.0
704
+ for li, ri in symmetry_pairs:
705
+ left_dist = abs(lm[li]["x"] - 0.5)
706
+ right_dist = abs(lm[ri]["x"] - 0.5)
707
+ asymmetry_sum += abs(left_dist - right_dist)
708
+ attrs["facial_asymmetry_score"] = round(
709
+ min(asymmetry_sum / len(symmetry_pairs) / 0.05, 1.0), 3
710
+ )
711
+
712
+ # === Head Pose (from transformation matrix) ===
713
+ attrs["blendshapes"] = blendshapes
714
+
715
+ return attrs
716
+
717
+ def _dist(self, a: dict, b: dict) -> float:
718
+ return math.sqrt(
719
+ (a["x"] - b["x"]) ** 2
720
+ + (a["y"] - b["y"]) ** 2
721
+ + (a.get("z", 0) - b.get("z", 0)) ** 2
722
+ )
723
+
724
+ def _jaw_angle(self, lm: list[dict]) -> float:
725
+ chin = lm[152]
726
+ left_jaw = lm[172]
727
+ right_jaw = lm[397]
728
+ v1 = (left_jaw["x"] - chin["x"], left_jaw["y"] - chin["y"])
729
+ v2 = (right_jaw["x"] - chin["x"], right_jaw["y"] - chin["y"])
730
+ dot = v1[0] * v2[0] + v1[1] * v2[1]
731
+ mag1 = math.sqrt(v1[0] ** 2 + v1[1] ** 2)
732
+ mag2 = math.sqrt(v2[0] ** 2 + v2[1] ** 2)
733
+ if mag1 * mag2 == 0:
734
+ return 120.0
735
+ cos_angle = max(-1, min(1, dot / (mag1 * mag2)))
736
+ return math.acos(cos_angle) * (180 / math.pi)
737
+ ```
738
+
739
+ #### face-service/analyzers/demographic_analyzer.py
740
+
741
+ ```python
742
+ """
743
+ FairFace β€” Age, Gender, Race prediction
744
+ Most fair and accurate demographic classifier.
745
+ """
746
+
747
+ import os
748
+ from typing import Any
749
+
750
+ import cv2
751
+ import numpy as np
752
+ import torch
753
+ import torchvision.transforms as transforms
754
+ from huggingface_hub import hf_hub_download
755
+ from PIL import Image
756
+ from torchvision import models
757
+
758
+
759
+ class DemographicAnalyzer:
760
+ """FairFace-based age, gender, race classifier."""
761
+
762
+ AGE_LABELS = [
763
+ "0-2", "3-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70+"
764
+ ]
765
+ GENDER_LABELS = ["Male", "Female"]
766
+ RACE_LABELS = [
767
+ "White", "Black", "Latino_Hispanic", "East Asian",
768
+ "Southeast Asian", "Indian", "Middle Eastern"
769
+ ]
770
+
771
+ def __init__(self):
772
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
773
+ self.model = self._load_model()
774
+ self.transform = transforms.Compose([
775
+ transforms.Resize((224, 224)),
776
+ transforms.ToTensor(),
777
+ transforms.Normalize(
778
+ mean=[0.485, 0.456, 0.406],
779
+ std=[0.229, 0.224, 0.225],
780
+ ),
781
+ ])
782
+
783
+ def _load_model(self):
784
+ """Load FairFace ResNet34 model."""
785
+ model_path = "models/fairface_model.pt"
786
+ if not os.path.exists(model_path):
787
+ os.makedirs("models", exist_ok=True)
788
+ # Download from HuggingFace mirror or original source
789
+ # FairFace official weights: res34_fair_align_multi_7_20190809.pt
790
+ try:
791
+ hf_hub_download(
792
+ repo_id="dchen236/FairFace",
793
+ filename="res34_fair_align_multi_7_20190809.pt",
794
+ local_dir="models",
795
+ local_dir_use_symlinks=False,
796
+ )
797
+ os.rename(
798
+ "models/res34_fair_align_multi_7_20190809.pt",
799
+ model_path,
800
+ )
801
+ except Exception:
802
+ # Fallback: use a smaller pretrained model
803
+ raise FileNotFoundError(
804
+ "Please download FairFace weights from "
805
+ "https://github.com/dchen236/FairFace and place at models/fairface_model.pt"
806
+ )
807
+
808
+ model = models.resnet34(pretrained=False)
809
+ # FairFace has 3 output heads: race(7), gender(2), age(9) = 18
810
+ model.fc = torch.nn.Linear(model.fc.in_features, 18)
811
+ model.load_state_dict(torch.load(model_path, map_location=self.device))
812
+ model.to(self.device)
813
+ model.eval()
814
+ return model
815
+
816
+ def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
817
+ """Predict age, gender, and race."""
818
+ pil_image = Image.fromarray(img_rgb)
819
+ input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
820
+
821
+ with torch.no_grad():
822
+ outputs = self.model(input_tensor)
823
+
824
+ outputs = outputs.cpu().numpy()[0]
825
+
826
+ # Split outputs: race(0-6), gender(7-8), age(9-17)
827
+ race_logits = outputs[0:7]
828
+ gender_logits = outputs[7:9]
829
+ age_logits = outputs[9:18]
830
+
831
+ race_probs = self._softmax(race_logits)
832
+ gender_probs = self._softmax(gender_logits)
833
+ age_probs = self._softmax(age_logits)
834
+
835
+ race_idx = int(np.argmax(race_probs))
836
+ gender_idx = int(np.argmax(gender_probs))
837
+ age_idx = int(np.argmax(age_probs))
838
+
839
+ # Estimate numeric age from bucket
840
+ age_ranges = [(0, 2), (3, 9), (10, 19), (20, 29), (30, 39), (40, 49), (50, 59), (60, 69), (70, 85)]
841
+ age_estimate = sum(age_ranges[age_idx]) / 2
842
+
843
+ return {
844
+ "age_estimate": round(age_estimate, 1),
845
+ "age_range": self.AGE_LABELS[age_idx],
846
+ "age_confidence": round(float(age_probs[age_idx]), 3),
847
+ "gender": self.GENDER_LABELS[gender_idx].lower(),
848
+ "gender_confidence": round(float(gender_probs[gender_idx]), 3),
849
+ "race": self.RACE_LABELS[race_idx],
850
+ "race_confidence": round(float(race_probs[race_idx]), 3),
851
+ "race_probabilities": {
852
+ label: round(float(prob), 3)
853
+ for label, prob in zip(self.RACE_LABELS, race_probs)
854
+ },
855
+ }
856
+
857
+ @staticmethod
858
+ def _softmax(x: np.ndarray) -> np.ndarray:
859
+ e_x = np.exp(x - np.max(x))
860
+ return e_x / e_x.sum()
861
+ ```
862
+
863
+ #### face-service/analyzers/attribute_analyzer.py
864
+
865
+ ```python
866
+ """
867
+ CelebA Multi-Label Attribute Classifier
868
+ Predicts 40 binary facial attributes from CelebA-trained model.
869
+ Uses a pretrained model from HuggingFace.
870
+ """
871
+
872
+ import os
873
+ from typing import Any
874
+
875
+ import numpy as np
876
+ import torch
877
+ import torchvision.transforms as transforms
878
+ from PIL import Image
879
+
880
+
881
+ CELEBA_ATTRIBUTES = [
882
+ "5_o_Clock_Shadow", "Arched_Eyebrows", "Attractive", "Bags_Under_Eyes",
883
+ "Bald", "Bangs", "Big_Lips", "Big_Nose", "Black_Hair", "Blond_Hair",
884
+ "Blurry", "Brown_Hair", "Bushy_Eyebrows", "Chubby", "Double_Chin",
885
+ "Eyeglasses", "Goatee", "Gray_Hair", "Heavy_Makeup", "High_Cheekbones",
886
+ "Male", "Mouth_Slightly_Open", "Mustache", "Narrow_Eyes", "No_Beard",
887
+ "Oval_Face", "Pale_Skin", "Pointy_Nose", "Receding_Hairline",
888
+ "Rosy_Cheeks", "Sideburns", "Smiling", "Straight_Hair", "Wavy_Hair",
889
+ "Wearing_Earrings", "Wearing_Hat", "Wearing_Lipstick", "Wearing_Necklace",
890
+ "Wearing_Necktie", "Young",
891
+ ]
892
+
893
+
894
+ class AttributeAnalyzer:
895
+ """CelebA 40-attribute binary classifier using a fine-tuned ResNet."""
896
+
897
+ def __init__(self):
898
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
899
+ self.model = self._load_model()
900
+ self.transform = transforms.Compose([
901
+ transforms.Resize((224, 224)),
902
+ transforms.ToTensor(),
903
+ transforms.Normalize(
904
+ mean=[0.485, 0.456, 0.406],
905
+ std=[0.229, 0.224, 0.225],
906
+ ),
907
+ ])
908
+
909
+ def _load_model(self):
910
+ """
911
+ Load a CelebA attribute prediction model.
912
+ Using a ResNet-18 fine-tuned on CelebA for 40 attributes.
913
+ """
914
+ from torchvision import models
915
+
916
+ model_path = "models/celeba_resnet18.pt"
917
+
918
+ if not os.path.exists(model_path):
919
+ os.makedirs("models", exist_ok=True)
920
+ # Try loading from HuggingFace
921
+ try:
922
+ from huggingface_hub import hf_hub_download
923
+ hf_hub_download(
924
+ repo_id="jnferreira/attribute-prediction-celebA",
925
+ filename="model.pt",
926
+ local_dir="models",
927
+ local_dir_use_symlinks=False,
928
+ )
929
+ os.rename("models/model.pt", model_path)
930
+ except Exception:
931
+ # Fallback: build a fresh model skeleton
932
+ # Users will need to train or provide weights
933
+ model = models.resnet18(pretrained=True)
934
+ model.fc = torch.nn.Linear(model.fc.in_features, 40)
935
+ torch.save(model.state_dict(), model_path)
936
+ print(
937
+ "WARNING: Using ImageNet-pretrained ResNet18 without CelebA fine-tuning. "
938
+ "Attribute predictions will be inaccurate. "
939
+ "Please provide CelebA-trained weights at models/celeba_resnet18.pt"
940
+ )
941
+
942
+ model = models.resnet18(pretrained=False)
943
+ model.fc = torch.nn.Linear(model.fc.in_features, 40)
944
+ model.load_state_dict(
945
+ torch.load(model_path, map_location=self.device)
946
+ )
947
+ model.to(self.device)
948
+ model.eval()
949
+ return model
950
+
951
+ def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
952
+ pil_image = Image.fromarray(img_rgb)
953
+ input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
954
+
955
+ with torch.no_grad():
956
+ logits = self.model(input_tensor)
957
+
958
+ probs = torch.sigmoid(logits).cpu().numpy()[0]
959
+
960
+ # Build structured results
961
+ raw_attrs = {
962
+ attr: round(float(prob), 3)
963
+ for attr, prob in zip(CELEBA_ATTRIBUTES, probs)
964
+ }
965
+
966
+ # Interpret into user-friendly categories
967
+ result: dict[str, Any] = {"celeba_raw": raw_attrs}
968
+
969
+ # Hair color (pick highest confidence)
970
+ hair_colors = {
971
+ "black": raw_attrs.get("Black_Hair", 0),
972
+ "brown": raw_attrs.get("Brown_Hair", 0),
973
+ "blonde": raw_attrs.get("Blond_Hair", 0),
974
+ "gray": raw_attrs.get("Gray_Hair", 0),
975
+ }
976
+ result["hair_color_celeba"] = max(hair_colors, key=hair_colors.get)
977
+
978
+ # Hair type
979
+ if raw_attrs.get("Straight_Hair", 0) > 0.5:
980
+ result["hair_type_celeba"] = "straight"
981
+ elif raw_attrs.get("Wavy_Hair", 0) > 0.5:
982
+ result["hair_type_celeba"] = "wavy"
983
+ else:
984
+ result["hair_type_celeba"] = "unknown"
985
+
986
+ result["bald"] = raw_attrs.get("Bald", 0) > 0.5
987
+ result["bangs"] = raw_attrs.get("Bangs", 0) > 0.5
988
+ result["receding_hairline"] = raw_attrs.get("Receding_Hairline", 0) > 0.5
989
+
990
+ # Facial hair
991
+ has_beard = raw_attrs.get("No_Beard", 0) < 0.5
992
+ has_goatee = raw_attrs.get("Goatee", 0) > 0.5
993
+ has_mustache = raw_attrs.get("Mustache", 0) > 0.5
994
+ has_sideburns = raw_attrs.get("Sideburns", 0) > 0.5
995
+ has_stubble = raw_attrs.get("5_o_Clock_Shadow", 0) > 0.5
996
+
997
+ if has_goatee:
998
+ result["facial_hair"] = "goatee"
999
+ elif has_mustache and has_beard:
1000
+ result["facial_hair"] = "full_beard"
1001
+ elif has_mustache:
1002
+ result["facial_hair"] = "mustache"
1003
+ elif has_sideburns:
1004
+ result["facial_hair"] = "sideburns"
1005
+ elif has_stubble:
1006
+ result["facial_hair"] = "stubble"
1007
+ elif not has_beard:
1008
+ result["facial_hair"] = "clean_shaven"
1009
+ else:
1010
+ result["facial_hair"] = "beard"
1011
+
1012
+ # Appearance attributes
1013
+ result["wearing_glasses"] = raw_attrs.get("Eyeglasses", 0) > 0.5
1014
+ result["wearing_hat"] = raw_attrs.get("Wearing_Hat", 0) > 0.5
1015
+ result["bushy_eyebrows"] = raw_attrs.get("Bushy_Eyebrows", 0) > 0.5
1016
+ result["arched_eyebrows_celeba"] = raw_attrs.get("Arched_Eyebrows", 0) > 0.5
1017
+ result["bags_under_eyes"] = raw_attrs.get("Bags_Under_Eyes", 0) > 0.5
1018
+ result["high_cheekbones_celeba"] = raw_attrs.get("High_Cheekbones", 0) > 0.5
1019
+ result["oval_face_celeba"] = raw_attrs.get("Oval_Face", 0) > 0.5
1020
+ result["pointy_nose_celeba"] = raw_attrs.get("Pointy_Nose", 0) > 0.5
1021
+ result["big_lips_celeba"] = raw_attrs.get("Big_Lips", 0) > 0.5
1022
+ result["big_nose_celeba"] = raw_attrs.get("Big_Nose", 0) > 0.5
1023
+ result["narrow_eyes_celeba"] = raw_attrs.get("Narrow_Eyes", 0) > 0.5
1024
+ result["double_chin"] = raw_attrs.get("Double_Chin", 0) > 0.5
1025
+ result["chubby"] = raw_attrs.get("Chubby", 0) > 0.5
1026
+ result["rosy_cheeks"] = raw_attrs.get("Rosy_Cheeks", 0) > 0.5
1027
+ result["pale_skin"] = raw_attrs.get("Pale_Skin", 0) > 0.5
1028
+ result["young"] = raw_attrs.get("Young", 0) > 0.5
1029
+ result["smiling_celeba"] = raw_attrs.get("Smiling", 0) > 0.5
1030
+ result["mouth_open"] = raw_attrs.get("Mouth_Slightly_Open", 0) > 0.5
1031
+
1032
+ return result
1033
+ ```
1034
+
1035
+ #### face-service/analyzers/parsing_analyzer.py
1036
+
1037
+ ```python
1038
+ """
1039
+ BiSeNet Face Parsing β€” 19-class semantic segmentation of the face.
1040
+ Segments: skin, eyebrows, eyes, nose, lips, hair, ears, neck, etc.
1041
+ """
1042
+
1043
+ import os
1044
+ from typing import Any
1045
+
1046
+ import cv2
1047
+ import numpy as np
1048
+ import torch
1049
+ from torchvision import transforms
1050
+
1051
+
1052
+ class ParsingAnalyzer:
1053
+ """
1054
+ BiSeNet face parsing for hair/skin/feature segmentation.
1055
+
1056
+ Parsing classes:
1057
+ 0: background, 1: skin, 2: l_brow, 3: r_brow, 4: l_eye, 5: r_eye,
1058
+ 6: eye_g (glasses), 7: l_ear, 8: r_ear, 9: ear_r (earring),
1059
+ 10: nose, 11: mouth, 12: u_lip, 13: l_lip, 14: neck,
1060
+ 15: necklace, 16: cloth, 17: hair, 18: hat
1061
+ """
1062
+
1063
+ LABELS = {
1064
+ 0: "background", 1: "skin", 2: "left_brow", 3: "right_brow",
1065
+ 4: "left_eye", 5: "right_eye", 6: "glasses", 7: "left_ear",
1066
+ 8: "right_ear", 9: "earring", 10: "nose", 11: "mouth",
1067
+ 12: "upper_lip", 13: "lower_lip", 14: "neck", 15: "necklace",
1068
+ 16: "cloth", 17: "hair", 18: "hat",
1069
+ }
1070
+
1071
+ def __init__(self):
1072
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1073
+ self.model = self._load_model()
1074
+ self.transform = transforms.Compose([
1075
+ transforms.ToTensor(),
1076
+ transforms.Normalize(
1077
+ mean=[0.485, 0.456, 0.406],
1078
+ std=[0.229, 0.224, 0.225],
1079
+ ),
1080
+ ])
1081
+
1082
+ def _load_model(self):
1083
+ model_path = "models/bisenet_face_parsing.pt"
1084
+ if not os.path.exists(model_path):
1085
+ os.makedirs("models", exist_ok=True)
1086
+ # BiSeNet model from face-parsing.PyTorch
1087
+ # Download from: https://drive.google.com/file/d/154JgKpzCPW82qINcVieuPH3fZ2e0P812
1088
+ raise FileNotFoundError(
1089
+ "Please download BiSeNet face parsing weights from "
1090
+ "https://github.com/zllrunning/face-parsing.PyTorch and place at "
1091
+ "models/bisenet_face_parsing.pt"
1092
+ )
1093
+
1094
+ from models.bisenet_model import BiSeNet # You'll need to include this
1095
+ model = BiSeNet(n_classes=19)
1096
+ model.load_state_dict(
1097
+ torch.load(model_path, map_location=self.device)
1098
+ )
1099
+ model.to(self.device)
1100
+ model.eval()
1101
+ return model
1102
+
1103
+ def analyze(self, img_bgr: np.ndarray) -> dict[str, Any]:
1104
+ h, w = img_bgr.shape[:2]
1105
+ img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
1106
+ img_resized = cv2.resize(img_rgb, (512, 512))
1107
+
1108
+ input_tensor = self.transform(img_resized).unsqueeze(0).to(self.device)
1109
+
1110
+ with torch.no_grad():
1111
+ output = self.model(input_tensor)[0] # BiSeNet returns tuple
1112
+
1113
+ parsing = output.squeeze(0).argmax(0).cpu().numpy()
1114
+ parsing = cv2.resize(
1115
+ parsing.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST
1116
+ )
1117
+
1118
+ # Generate masks
1119
+ skin_mask = (parsing == 1).astype(np.uint8)
1120
+ hair_mask = (parsing == 17).astype(np.uint8)
1121
+ glasses_mask = (parsing == 6).astype(np.uint8)
1122
+ hat_mask = (parsing == 18).astype(np.uint8)
1123
+
1124
+ # Facial hair detection: look for dark pixels in lower face skin region
1125
+ lower_face = parsing[int(h * 0.55):int(h * 0.85), int(w * 0.25):int(w * 0.75)]
1126
+ lower_skin = (lower_face == 1).sum()
1127
+ total_lower = lower_face.size or 1
1128
+
1129
+ # Region stats
1130
+ hair_area = hair_mask.sum() / (h * w)
1131
+ skin_area = skin_mask.sum() / (h * w)
1132
+
1133
+ result: dict[str, Any] = {
1134
+ "_skin_mask": skin_mask,
1135
+ "_hair_mask": hair_mask,
1136
+ "has_glasses_parsing": int(glasses_mask.sum()) > 100,
1137
+ "wearing_hat_parsing": int(hat_mask.sum()) > 500,
1138
+ "hair_coverage": round(float(hair_area), 3),
1139
+ "skin_coverage": round(float(skin_area), 3),
1140
+ }
1141
+
1142
+ # Hair length estimation from mask
1143
+ if hair_area < 0.01:
1144
+ result["hair_length_estimate"] = "bald"
1145
+ elif hair_area < 0.08:
1146
+ result["hair_length_estimate"] = "short"
1147
+ elif hair_area < 0.18:
1148
+ result["hair_length_estimate"] = "medium"
1149
+ else:
1150
+ result["hair_length_estimate"] = "long"
1151
+
1152
+ # Wrinkle analysis on forehead skin
1153
+ forehead_region = img_bgr[int(h * 0.05):int(h * 0.25), int(w * 0.3):int(w * 0.7)]
1154
+ forehead_skin = skin_mask[int(h * 0.05):int(h * 0.25), int(w * 0.3):int(w * 0.7)]
1155
+ if forehead_skin.sum() > 100:
1156
+ gray_forehead = cv2.cvtColor(forehead_region, cv2.COLOR_BGR2GRAY)
1157
+ # Apply mask
1158
+ gray_forehead = cv2.bitwise_and(gray_forehead, gray_forehead, mask=forehead_skin)
1159
+ edges = cv2.Canny(gray_forehead, 30, 80)
1160
+ edge_density = edges.sum() / (forehead_skin.sum() * 255 + 1)
1161
+ result["forehead_wrinkle_score"] = round(float(edge_density), 3)
1162
+ result["forehead_wrinkles"] = (
1163
+ "heavy" if edge_density > 0.15
1164
+ else "moderate" if edge_density > 0.08
1165
+ else "mild" if edge_density > 0.04
1166
+ else "none"
1167
+ )
1168
+
1169
+ # Freckles/moles detection on skin
1170
+ skin_region = cv2.bitwise_and(img_bgr, img_bgr, mask=skin_mask)
1171
+ gray_skin = cv2.cvtColor(skin_region, cv2.COLOR_BGR2GRAY)
1172
+ # Detect dark spots
1173
+ _, dark_spots = cv2.threshold(gray_skin, 80, 255, cv2.THRESH_BINARY_INV)
1174
+ dark_spots = cv2.bitwise_and(dark_spots, dark_spots, mask=skin_mask)
1175
+ # Find contours of dark spots
1176
+ contours, _ = cv2.findContours(dark_spots, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1177
+ small_spots = [c for c in contours if 5 < cv2.contourArea(c) < 200]
1178
+ result["possible_freckles_moles"] = len(small_spots) > 10
1179
+ result["dark_spot_count"] = len(small_spots)
1180
+
1181
+ return result
1182
+ ```
1183
+
1184
+ #### face-service/analyzers/emotion_analyzer.py
1185
+
1186
+ ```python
1187
+ """
1188
+ HSEmotion β€” State-of-the-art facial emotion recognition.
1189
+ Supports 8 emotions on AffectNet.
1190
+ """
1191
+
1192
+ import os
1193
+ from typing import Any
1194
+
1195
+ import cv2
1196
+ import numpy as np
1197
+ import torch
1198
+ import torchvision.transforms as transforms
1199
+ from PIL import Image
1200
+
1201
+
1202
+ class EmotionAnalyzer:
1203
+ """HSEmotion-based facial expression classifier."""
1204
+
1205
+ EMOTION_LABELS = [
1206
+ "angry", "contempt", "disgust", "fear",
1207
+ "happy", "neutral", "sad", "surprise",
1208
+ ]
1209
+
1210
+ def __init__(self):
1211
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1212
+ self.model = self._load_model()
1213
+ self.transform = transforms.Compose([
1214
+ transforms.Resize((260, 260)),
1215
+ transforms.CenterCrop(224),
1216
+ transforms.ToTensor(),
1217
+ transforms.Normalize(
1218
+ mean=[0.485, 0.456, 0.406],
1219
+ std=[0.229, 0.224, 0.225],
1220
+ ),
1221
+ ])
1222
+
1223
+ def _load_model(self):
1224
+ """Load HSEmotion EfficientNet model."""
1225
+ model_path = "models/hsemotion_enet_b0_8.pt"
1226
+
1227
+ if not os.path.exists(model_path):
1228
+ os.makedirs("models", exist_ok=True)
1229
+ try:
1230
+ from huggingface_hub import hf_hub_download
1231
+ # HSEmotion models available at:
1232
+ # https://github.com/HSE-asavchenko/face-emotion-recognition
1233
+ hf_hub_download(
1234
+ repo_id="HSE-asavchenko/hsemotion",
1235
+ filename="enet_b0_8_best_afew.pt",
1236
+ local_dir="models",
1237
+ local_dir_use_symlinks=False,
1238
+ )
1239
+ os.rename("models/enet_b0_8_best_afew.pt", model_path)
1240
+ except Exception:
1241
+ raise FileNotFoundError(
1242
+ "Please download HSEmotion weights from "
1243
+ "https://github.com/HSE-asavchenko/face-emotion-recognition"
1244
+ )
1245
+
1246
+ import timm
1247
+ model = timm.create_model("efficientnet_b0", pretrained=False, num_classes=8)
1248
+ model.load_state_dict(torch.load(model_path, map_location=self.device))
1249
+ model.to(self.device)
1250
+ model.eval()
1251
+ return model
1252
+
1253
+ def analyze(self, img_rgb: np.ndarray) -> dict[str, Any]:
1254
+ pil_image = Image.fromarray(img_rgb)
1255
+ input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
1256
+
1257
+ with torch.no_grad():
1258
+ logits = self.model(input_tensor)
1259
+
1260
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
1261
+ top_idx = int(np.argmax(probs))
1262
+
1263
+ return {
1264
+ "emotion": self.EMOTION_LABELS[top_idx],
1265
+ "emotion_confidence": round(float(probs[top_idx]), 3),
1266
+ "emotion_probabilities": {
1267
+ label: round(float(prob), 3)
1268
+ for label, prob in zip(self.EMOTION_LABELS, probs)
1269
+ },
1270
+ }
1271
+ ```
1272
+
1273
+ #### face-service/analyzers/color_analyzer.py
1274
+
1275
+ ```python
1276
+ """
1277
+ Pixel-level color analysis using segmentation masks from BiSeNet
1278
+ and landmark positions from MediaPipe.
1279
+ """
1280
+
1281
+ from typing import Any, Optional
1282
+
1283
+ import cv2
1284
+ import numpy as np
1285
+ from sklearn.cluster import KMeans
1286
+
1287
+
1288
+ class ColorAnalyzer:
1289
+ """Analyzes skin tone, eye color, and hair color from pixel data."""
1290
+
1291
+ def analyze(
1292
+ self,
1293
+ img_rgb: np.ndarray,
1294
+ skin_mask: Optional[np.ndarray] = None,
1295
+ hair_mask: Optional[np.ndarray] = None,
1296
+ landmark_data: Optional[list[dict]] = None,
1297
+ ) -> dict[str, Any]:
1298
+ h, w = img_rgb.shape[:2]
1299
+ results: dict[str, Any] = {}
1300
+
1301
+ # === Skin Tone ===
1302
+ if skin_mask is not None and skin_mask.sum() > 100:
1303
+ skin_pixels = img_rgb[skin_mask > 0]
1304
+ # Convert to LAB for perceptually uniform brightness
1305
+ skin_lab = cv2.cvtColor(
1306
+ skin_pixels.reshape(-1, 1, 3), cv2.COLOR_RGB2LAB
1307
+ ).reshape(-1, 3)
1308
+ avg_l = float(skin_lab[:, 0].mean()) # L channel (brightness)
1309
+
1310
+ if avg_l > 180:
1311
+ results["skin_tone"] = "very_light"
1312
+ elif avg_l > 155:
1313
+ results["skin_tone"] = "light"
1314
+ elif avg_l > 130:
1315
+ results["skin_tone"] = "medium_light"
1316
+ elif avg_l > 105:
1317
+ results["skin_tone"] = "medium"
1318
+ elif avg_l > 80:
1319
+ results["skin_tone"] = "medium_dark"
1320
+ else:
1321
+ results["skin_tone"] = "dark"
1322
+
1323
+ results["skin_tone_score"] = round(avg_l / 255, 3)
1324
+
1325
+ # Fitzpatrick scale approximation
1326
+ if avg_l > 170:
1327
+ results["fitzpatrick_type"] = "I"
1328
+ elif avg_l > 145:
1329
+ results["fitzpatrick_type"] = "II"
1330
+ elif avg_l > 120:
1331
+ results["fitzpatrick_type"] = "III"
1332
+ elif avg_l > 95:
1333
+ results["fitzpatrick_type"] = "IV"
1334
+ elif avg_l > 70:
1335
+ results["fitzpatrick_type"] = "V"
1336
+ else:
1337
+ results["fitzpatrick_type"] = "VI"
1338
+
1339
+ # === Hair Color ===
1340
+ if hair_mask is not None and hair_mask.sum() > 500:
1341
+ hair_pixels = img_rgb[hair_mask > 0]
1342
+
1343
+ # K-means to find dominant hair color
1344
+ if len(hair_pixels) > 100:
1345
+ sample_size = min(5000, len(hair_pixels))
1346
+ indices = np.random.choice(len(hair_pixels), sample_size, replace=False)
1347
+ sampled = hair_pixels[indices].astype(np.float32)
1348
+
1349
+ kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
1350
+ kmeans.fit(sampled)
1351
+
1352
+ # Pick the cluster with most members
1353
+ labels, counts = np.unique(kmeans.labels_, return_counts=True)
1354
+ dominant_idx = labels[np.argmax(counts)]
1355
+ dominant_color = kmeans.cluster_centers_[dominant_idx].astype(int)
1356
+
1357
+ r, g, b = dominant_color
1358
+ brightness = (int(r) + int(g) + int(b)) / 3
1359
+
1360
+ # Classify hair color
1361
+ hsv_color = cv2.cvtColor(
1362
+ np.array([[dominant_color]], dtype=np.uint8), cv2.COLOR_RGB2HSV
1363
+ )[0][0]
1364
+ hue, sat, val = int(hsv_color[0]), int(hsv_color[1]), int(hsv_color[2])
1365
+
1366
+ if brightness < 40:
1367
+ results["hair_color_detected"] = "black"
1368
+ elif brightness > 190:
1369
+ results["hair_color_detected"] = "platinum_blonde"
1370
+ elif brightness > 160 and sat < 50:
1371
+ results["hair_color_detected"] = "gray"
1372
+ elif brightness > 140 and (hue > 15 and hue < 35):
1373
+ results["hair_color_detected"] = "blonde"
1374
+ elif (hue < 15 or hue > 160) and sat > 80:
1375
+ results["hair_color_detected"] = "red"
1376
+ elif brightness > 60:
1377
+ results["hair_color_detected"] = "brown"
1378
+ else:
1379
+ results["hair_color_detected"] = "dark_brown"
1380
+
1381
+ results["hair_dominant_rgb"] = [int(r), int(g), int(b)]
1382
+
1383
+ # Hair texture analysis (FFT-based)
1384
+ hair_region = cv2.bitwise_and(
1385
+ img_rgb,
1386
+ img_rgb,
1387
+ mask=hair_mask,
1388
+ )
1389
+ gray_hair = cv2.cvtColor(hair_region, cv2.COLOR_RGB2GRAY)
1390
+ # Mask out non-hair regions
1391
+ gray_hair_masked = gray_hair[hair_mask > 0]
1392
+
1393
+ if len(gray_hair_masked) > 1000:
1394
+ # Compute local variance as texture indicator
1395
+ # High frequency = curly, low frequency = straight
1396
+ hair_patch = gray_hair_masked[:1024].astype(np.float32)
1397
+ fft = np.fft.fft(hair_patch)
1398
+ magnitude = np.abs(fft)
1399
+ # Ratio of high freq to low freq energy
1400
+ low_freq = magnitude[:len(magnitude) // 4].sum()
1401
+ high_freq = magnitude[len(magnitude) // 4:].sum()
1402
+ freq_ratio = high_freq / (low_freq + 1e-6)
1403
+
1404
+ if freq_ratio > 0.8:
1405
+ results["hair_texture_detected"] = "curly"
1406
+ elif freq_ratio > 0.5:
1407
+ results["hair_texture_detected"] = "wavy"
1408
+ else:
1409
+ results["hair_texture_detected"] = "straight"
1410
+
1411
+ # === Eye Color ===
1412
+ if landmark_data is not None and len(landmark_data) > 473:
1413
+ for eye_name, iris_idx in [("left", 468), ("right", 473)]:
1414
+ ix = int(landmark_data[iris_idx]["x"] * w)
1415
+ iy = int(landmark_data[iris_idx]["y"] * h)
1416
+
1417
+ # Sample a small patch around iris
1418
+ pad = 3
1419
+ y1 = max(0, iy - pad)
1420
+ y2 = min(h, iy + pad)
1421
+ x1 = max(0, ix - pad)
1422
+ x2 = min(w, ix + pad)
1423
+
1424
+ iris_patch = img_rgb[y1:y2, x1:x2]
1425
+ if iris_patch.size == 0:
1426
+ continue
1427
+
1428
+ avg_color = iris_patch.mean(axis=(0, 1))
1429
+ r, g, b = avg_color
1430
+
1431
+ # Convert to HSV for better classification
1432
+ hsv = cv2.cvtColor(
1433
+ np.array([[avg_color]], dtype=np.uint8), cv2.COLOR_RGB2HSV
1434
+ )[0][0]
1435
+ hue_val, sat_val, val_val = int(hsv[0]), int(hsv[1]), int(hsv[2])
1436
+
1437
+ if val_val < 60:
1438
+ color = "dark_brown"
1439
+ elif sat_val < 30:
1440
+ color = "gray"
1441
+ elif hue_val > 100 and hue_val < 130 and sat_val > 50:
1442
+ color = "blue"
1443
+ elif hue_val > 35 and hue_val < 85 and sat_val > 40:
1444
+ color = "green"
1445
+ elif (hue_val > 15 and hue_val < 35) and sat_val > 40:
1446
+ color = "hazel"
1447
+ elif val_val > 120 and sat_val > 60:
1448
+ color = "amber"
1449
+ else:
1450
+ color = "brown"
1451
+
1452
+ results[f"{eye_name}_eye_color"] = color
1453
+
1454
+ # Consensus
1455
+ if "left_eye_color" in results and "right_eye_color" in results:
1456
+ if results["left_eye_color"] == results["right_eye_color"]:
1457
+ results["eye_color"] = results["left_eye_color"]
1458
+ else:
1459
+ results["eye_color"] = results["left_eye_color"] # Use left as primary
1460
+ results["heterochromia"] = True
1461
+
1462
+ return results
1463
+ ```
1464
+
1465
+ #### face-service/Dockerfile
1466
+
1467
+ ```dockerfile
1468
+ FROM python:3.11-slim
1469
+
1470
+ WORKDIR /app
1471
+
1472
+ # Install system dependencies for OpenCV
1473
+ RUN apt-get update && apt-get install -y \
1474
+ libgl1-mesa-glx \
1475
+ libglib2.0-0 \
1476
+ curl \
1477
+ && rm -rf /var/lib/apt/lists/*
1478
+
1479
+ COPY requirements.txt .
1480
+ RUN pip install --no-cache-dir -r requirements.txt
1481
+
1482
+ COPY . .
1483
+
1484
+ # Download MediaPipe model at build time
1485
+ RUN python -c "from analyzers.landmark_analyzer import LandmarkAnalyzer; LandmarkAnalyzer()"
1486
+
1487
+ EXPOSE 8000
1488
+
1489
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
1490
+ ```
1491
+
1492
+ ### Connect Your Next.js App to the Microservice
1493
+
1494
+ #### lib/face-analysis/api-client.ts
1495
+
1496
+ ```typescript
1497
+ /**
1498
+ * Client for the Python face analysis microservice.
1499
+ * Replaces the Supabase Edge Function that called AWS Rekognition.
1500
+ */
1501
+
1502
+ const FACE_SERVICE_URL = process.env.NEXT_PUBLIC_FACE_SERVICE_URL || "http://localhost:8000";
1503
+
1504
+ export interface FaceAnalysisResult {
1505
+ // Geometric (MediaPipe)
1506
+ face_shape: string;
1507
+ face_shape_metrics: Record<string, number>;
1508
+ forehead_width: string;
1509
+ jawline_type: string;
1510
+ jawline_angle: number;
1511
+ chin_type: string;
1512
+ cheekbone_prominence: string;
1513
+ cheek_fullness: string;
1514
+ eye_shape: string;
1515
+ eye_depth: string;
1516
+ eye_spacing: string;
1517
+ eye_size: string;
1518
+ eyebrow_shape: string;
1519
+ eyebrow_arch_height: string;
1520
+ eyebrow_thickness: string;
1521
+ possible_unibrow: boolean;
1522
+ nose_shape: string;
1523
+ nose_bridge: string;
1524
+ nose_tip_shape: string;
1525
+ nostril_width: string;
1526
+ lip_fullness: string;
1527
+ lip_balance: string;
1528
+ mouth_width: string;
1529
+ cupids_bow: string;
1530
+ smiling: boolean;
1531
+ smile_asymmetry: number;
1532
+ possible_dimples: boolean;
1533
+ facial_asymmetry_score: number;
1534
+
1535
+ // Demographics (FairFace)
1536
+ age_estimate: number;
1537
+ age_range: string;
1538
+ age_confidence: number;
1539
+ gender: string;
1540
+ gender_confidence: number;
1541
+ race: string;
1542
+ race_confidence: number;
1543
+ race_probabilities: Record<string, number>;
1544
+
1545
+ // CelebA Attributes
1546
+ facial_hair: string;
1547
+ wearing_glasses: boolean;
1548
+ bald: boolean;
1549
+ receding_hairline: boolean;
1550
+ hair_color_celeba: string;
1551
+ hair_type_celeba: string;
1552
+ bags_under_eyes: boolean;
1553
+ double_chin: boolean;
1554
+ bushy_eyebrows: boolean;
1555
+ high_cheekbones_celeba: boolean;
1556
+
1557
+ // Emotion (HSEmotion)
1558
+ emotion: string;
1559
+ emotion_confidence: number;
1560
+ emotion_probabilities: Record<string, number>;
1561
+
1562
+ // Color Analysis
1563
+ skin_tone: string;
1564
+ skin_tone_score: number;
1565
+ fitzpatrick_type: string;
1566
+ eye_color: string;
1567
+ hair_color_detected: string;
1568
+ hair_dominant_rgb: number[];
1569
+ hair_texture_detected: string;
1570
+
1571
+ // Parsing
1572
+ hair_length_estimate: string;
1573
+ forehead_wrinkles: string;
1574
+ possible_freckles_moles: boolean;
1575
+ dark_spot_count: number;
1576
+
1577
+ // Blendshapes
1578
+ blendshapes: Record<string, number>;
1579
+ }
1580
+
1581
+ export async function analyzeFace(imageFile: File): Promise<FaceAnalysisResult> {
1582
+ const formData = new FormData();
1583
+ formData.append("file", imageFile);
1584
+
1585
+ const response = await fetch(`${FACE_SERVICE_URL}/analyze`, {
1586
+ method: "POST",
1587
+ body: formData,
1588
+ });
1589
+
1590
+ if (!response.ok) {
1591
+ const error = await response.json().catch(() => ({ detail: "Unknown error" }));
1592
+ throw new Error(`Face analysis failed: ${error.detail}`);
1593
+ }
1594
+
1595
+ const result = await response.json();
1596
+
1597
+ if (!result.success) {
1598
+ throw new Error("Face analysis returned unsuccessful result");
1599
+ }
1600
+
1601
+ return result.data;
1602
+ }
1603
+
1604
+ export async function checkServiceHealth(): Promise<boolean> {
1605
+ try {
1606
+ const response = await fetch(`${FACE_SERVICE_URL}/health`);
1607
+ return response.ok;
1608
+ } catch {
1609
+ return false;
1610
+ }
1611
+ }
1612
+ ```
1613
+
1614
+ ### Deploy to Hugging Face Spaces (Free)
1615
+
1616
+ Create a `README.md` in the `face-service/` directory with the following frontmatter:
1617
+
1618
+ ```yaml
1619
+ ---
1620
+ title: HCP Face Analysis
1621
+ emoji: πŸ”
1622
+ colorFrom: blue
1623
+ colorTo: purple
1624
+ sdk: docker
1625
+ app_port: 8000
1626
+ ---
1627
+ ```
1628
+
1629
+ ---
1630
+
1631
+ ## Final Architecture Summary
1632
+
1633
+ ```
1634
+ Browser (Next.js)
1635
+ β”‚
1636
+ β”‚ POST /analyze (image file)
1637
+ β–Ό
1638
+ Hugging Face Spaces (FREE, 2GB RAM)
1639
+ β”œβ”€β”€ FastAPI Server
1640
+ β”œβ”€β”€ MediaPipe (4MB) ──────► 478 landmarks β†’ ~40 geometric features
1641
+ β”œβ”€β”€ FairFace (90MB) ──────► age, gender, race
1642
+ β”œβ”€β”€ CelebA ResNet (44MB) ─► 40 binary attributes (hair, beard, glasses...)
1643
+ β”œβ”€β”€ BiSeNet (50MB) ───────► face parsing β†’ hair/skin segmentation
1644
+ β”œβ”€β”€ HSEmotion (20MB) ─────► 8 emotions
1645
+ └── Color Analysis ───────► skin tone, eye color, hair color
1646
+ β”‚
1647
+ β”‚ JSON response (~150 attributes)
1648
+ β–Ό
1649
+ Supabase (existing)
1650
+ β”œβ”€β”€ Store results in PostgreSQL
1651
+ └── Auth / Storage unchanged
1652
+ ```
1653
+
1654
+ | Metric | Value |
1655
+ |--------|-------|
1656
+ | **Total models** | ~210MB |
1657
+ | **Features detected** | **~95% of the full feature list** |
1658
+ | **Hosting cost** | **$0** (HF Spaces free tier) |
1659
+ | **Latency** | ~2-4s per image (CPU) |
1660
+ | **Languages** | Python (microservice) + TypeScript (existing Next.js) |
1661
+ | **Only missing** | Teeth analysis, scar detection, Adam's apple (require specialized fine-tuned models) |
1662
+
1663
+ ---
1664
+
1665
+ ## Required Feature List
1666
+
1667
+ ### Face shape
1668
+ - Oval face, Round face, Square face, Heart-shaped face, Diamond face, Long/oblong face, Triangle face
1669
+ - Jawline sharp, Jawline soft, Strong jaw, Receding chin, Pointed chin, Cleft chin, Wide chin
1670
+ - High cheekbones, Flat cheekbones, Full cheeks, Hollow cheeks
1671
+ - Broad forehead, Narrow forehead
1672
+
1673
+ ### Eye shape
1674
+ - Almond, Round, Hooded, Monolid, Deep-set eyes, Protruding eyes
1675
+ - Upturned eyes, Downturned eyes, Wide-set eyes, Close-set eyes, Large eyes, Small eyes
1676
+ - Eye color: brown, blue, green, hazel
1677
+ - Dark under-eyes, Eye bags, Crow's feet
1678
+
1679
+ ### Eyebrows
1680
+ - Thick, Thin, Arched, Straight, Bushy, Unibrow
1681
+ - High eyebrow arch, Low eyebrow arch
1682
+
1683
+ ### Nose
1684
+ - Straight, Aquiline, Button, Upturned, Wide, Narrow
1685
+ - Flat bridge, High bridge, Wide nostrils, Narrow nostrils
1686
+ - Rounded tip, Pointed tip
1687
+
1688
+ ### Lips & Mouth
1689
+ - Full, Thin, Wide mouth, Small mouth
1690
+ - Defined cupid's bow, Uneven lips
1691
+ - Gap teeth, Crooked teeth, Straight teeth, Overbite, Underbite
1692
+ - Dimples, Smile lines, Asymmetrical smile
1693
+
1694
+ ### Hair
1695
+ - Straight, Wavy, Curly, Coily
1696
+ - Short, Long, Bald, Receding hairline, Widow's peak
1697
+ - Thick, Thin
1698
+ - Color: black, brown, blonde, red, gray, dyed
1699
+
1700
+ ### Facial hair
1701
+ - Full beard, Stubble, Goatee, Mustache, Clean-shaven, Sideburns
1702
+
1703
+ ### Skin & Other
1704
+ - Skin tone: light, medium, dark
1705
+ - Freckles, Moles, Birthmark, Scar, Acne
1706
+ - Wrinkles, Forehead lines, Smile lines
1707
+ - Facial asymmetry, Prominent Adam's apple
requirements.txt CHANGED
@@ -13,3 +13,6 @@ timm==1.0.3
13
  safetensors>=0.6.0
14
  transformers==4.45.2
15
  hsemotion>=0.2.2
 
 
 
 
13
  safetensors>=0.6.0
14
  transformers==4.45.2
15
  hsemotion>=0.2.2
16
+ openai-clip==1.0.1
17
+ ftfy
18
+ regex