Spaces:
Running
Running
| """ | |
| HCP Face Analysis Microservice | |
| ============================== | |
| FastAPI service that runs nine specialized analyzers over a single photo | |
| and merges their outputs into one facial-attribute dictionary, including | |
| a face-recognition embedding for cross-photo grouping and a numeric | |
| "chopped score" aesthetic rating. | |
| Pipeline (in execution order) | |
| ----------------------------- | |
| 1. InsightFaceAnalyzer InsightFace buffalo_l (ONNX). SCRFD | |
| detection + ArcFace 512-d embedding + | |
| age regression + gender + 106 landmarks. | |
| Replaces the previous three FairFace ViTs | |
| and adds face matching as a new capability. | |
| 2. LandmarkAnalyzer MediaPipe Face Landmarker. 478 3D | |
| landmarks + 52 ARKit blendshapes → | |
| geometric features, smiling, mouth_open. | |
| 3. EthnicityAnalyzer cledoux42/Ethnicity_Test_v003 ViT. | |
| 5-class ethnicity widened to a 7-bucket | |
| schema for legacy compatibility. | |
| 4. ParsingAnalyzer SegFormer-B5 human parsing. Now receives | |
| a face-cropped image (smaller, cleaner). | |
| Emits face/hair masks + hair length + | |
| hat detection + OpenCV-derived skin stats. | |
| 5. EmotionAnalyzer HSEmotion EfficientNet-B0. 8-class | |
| emotion + valence/arousal/mood. | |
| 6. ColorAnalyzer Pure OpenCV LAB/HSV statistics. Uses | |
| SegFormer masks + MediaPipe lip/iris | |
| landmarks. No ML model. | |
| 7. ObstructionAnalyzer dima806 ViT-B/16. Glasses, sunglasses, | |
| mask. ~99% precision on each. | |
| 8. HairTypeAnalyzer dima806 ViT-B/16. Curly/dreadlocks/kinky/ | |
| straight/wavy. ~93% accuracy. | |
| 9. BeautyAnalyzer Optional. ResNet-50 trained on | |
| SCUT-FBP5500 (see training/beauty/). | |
| Outputs a 1.0–5.0 beauty score plus a | |
| 0–100 normalised version. Falls back to | |
| None when no weights are loaded — the | |
| AestheticAnalyzer then uses rule-based | |
| scoring only. | |
| 10. AestheticAnalyzer Pure-Python aggregator. Reads the merged | |
| dict from analyzers 1–9 and produces the | |
| final `chopped_score` (0–100, higher = | |
| more chopped) and a per-factor breakdown. | |
| Endpoints | |
| --------- | |
| GET / service banner | |
| GET /health liveness check | |
| POST /analyze multipart file upload | |
| POST /analyze-base64 JSON {"image": "<base64>"} | |
| All analyzers are lazily instantiated on first request to keep | |
| cold-start latency manageable on the Hugging Face Spaces free tier. | |
| """ | |
| import os | |
| # hf_transfer makes initial model downloads from the HF Hub much faster. | |
| # The default HF_HUB_DOWNLOAD_TIMEOUT (10 s) is too short for the larger | |
| # ViT checkpoints on a cold start. | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60" | |
| import io | |
| import logging | |
| from typing import Optional | |
| import numpy as np | |
| from fastapi import FastAPI, File, HTTPException, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from PIL import Image | |
| from analyzers.landmark_analyzer import LandmarkAnalyzer | |
| from analyzers.ethnicity_analyzer import EthnicityAnalyzer | |
| from analyzers.parsing_analyzer import ParsingAnalyzer | |
| from analyzers.emotion_analyzer import EmotionAnalyzer | |
| from analyzers.color_analyzer import ColorAnalyzer | |
| from analyzers.obstruction_analyzer import ObstructionAnalyzer | |
| from analyzers.hair_type_analyzer import HairTypeAnalyzer | |
| from analyzers.insightface_analyzer import InsightFaceAnalyzer | |
| from analyzers.beauty_analyzer import BeautyAnalyzer | |
| from analyzers.aesthetic_analyzer import AestheticAnalyzer | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = FastAPI(title="HCP Face Analysis Service", version="3.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Restrict to your domain in production. | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Lazy slots, one per analyzer. The first request pays the full | |
| # model-load cost; subsequent requests are warm. | |
| insightface_analyzer: Optional[InsightFaceAnalyzer] = None | |
| landmark_analyzer: Optional[LandmarkAnalyzer] = None | |
| ethnicity_analyzer: Optional[EthnicityAnalyzer] = None | |
| parsing_analyzer: Optional[ParsingAnalyzer] = None | |
| emotion_analyzer: Optional[EmotionAnalyzer] = None | |
| color_analyzer: Optional[ColorAnalyzer] = None | |
| obstruction_analyzer: Optional[ObstructionAnalyzer] = None | |
| hair_type_analyzer: Optional[HairTypeAnalyzer] = None | |
| beauty_analyzer: Optional[BeautyAnalyzer] = None | |
| aesthetic_analyzer: Optional[AestheticAnalyzer] = None | |
| def _to_json_safe(value): | |
| """Recursively coerce numpy scalars/arrays into JSON-serialisable types. | |
| Several analyzers return numpy floats/booleans (e.g. from `np.std` | |
| or boolean mask logic). FastAPI's default JSON encoder doesn't | |
| handle those, so we normalise everything here before returning. | |
| """ | |
| if isinstance(value, (np.ndarray,)): | |
| return value.tolist() | |
| if isinstance(value, (np.integer, np.floating)): | |
| return value.item() | |
| if isinstance(value, (np.bool_,)): | |
| return bool(value) | |
| if isinstance(value, np.generic): | |
| return value.item() | |
| if isinstance(value, dict): | |
| return {str(k): _to_json_safe(v) for k, v in value.items()} | |
| if isinstance(value, (list, tuple, set)): | |
| return [_to_json_safe(v) for v in value] | |
| return value | |
| def get_analyzers(): | |
| """Lazy-load all analyzer models on first use. | |
| Each analyzer is instantiated once per process and reused across | |
| requests. First request pays the full model-load cost; subsequent | |
| requests are warm. | |
| """ | |
| global insightface_analyzer, landmark_analyzer, ethnicity_analyzer | |
| global parsing_analyzer, emotion_analyzer, color_analyzer | |
| global obstruction_analyzer, hair_type_analyzer | |
| global beauty_analyzer, aesthetic_analyzer | |
| if insightface_analyzer is None: | |
| logger.info("Loading InsightFace buffalo_l bundle...") | |
| insightface_analyzer = InsightFaceAnalyzer() | |
| if landmark_analyzer is None: | |
| logger.info("Loading MediaPipe Face Landmarker...") | |
| landmark_analyzer = LandmarkAnalyzer() | |
| if ethnicity_analyzer is None: | |
| logger.info("Loading Ethnicity classifier...") | |
| ethnicity_analyzer = EthnicityAnalyzer() | |
| if parsing_analyzer is None: | |
| logger.info("Loading SegFormer face parser...") | |
| parsing_analyzer = ParsingAnalyzer() | |
| if emotion_analyzer is None: | |
| logger.info("Loading HSEmotion model...") | |
| emotion_analyzer = EmotionAnalyzer() | |
| if color_analyzer is None: | |
| color_analyzer = ColorAnalyzer() | |
| if obstruction_analyzer is None: | |
| logger.info("Loading face obstruction classifier...") | |
| obstruction_analyzer = ObstructionAnalyzer() | |
| if hair_type_analyzer is None: | |
| logger.info("Loading hair type classifier...") | |
| hair_type_analyzer = HairTypeAnalyzer() | |
| if beauty_analyzer is None: | |
| logger.info("Loading beauty regressor (or no-op if untrained)...") | |
| beauty_analyzer = BeautyAnalyzer() | |
| if aesthetic_analyzer is None: | |
| aesthetic_analyzer = AestheticAnalyzer() | |
| return ( | |
| insightface_analyzer, | |
| landmark_analyzer, | |
| ethnicity_analyzer, | |
| parsing_analyzer, | |
| emotion_analyzer, | |
| color_analyzer, | |
| obstruction_analyzer, | |
| hair_type_analyzer, | |
| beauty_analyzer, | |
| aesthetic_analyzer, | |
| ) | |
| def _crop_to_face(img_rgb: np.ndarray, bbox, padding: float = 0.4) -> np.ndarray: | |
| """Crop the image to a face-centred rectangle with extra context. | |
| SegFormer and the ViT classifiers tend to do better with the face | |
| occupying a large fraction of the input. We pad the InsightFace | |
| bbox by `padding` (fraction of bbox size) so context like ears, | |
| hair, and the top of the shoulders is preserved. | |
| Returns the full image unchanged if bbox is None, malformed, or | |
| the resulting crop would be degenerate. | |
| """ | |
| if bbox is None or len(bbox) != 4: | |
| return img_rgb | |
| h, w = img_rgb.shape[:2] | |
| try: | |
| x1, y1, x2, y2 = bbox | |
| bw = max(1.0, x2 - x1) | |
| bh = max(1.0, y2 - y1) | |
| pad_x = bw * padding | |
| pad_y = bh * padding | |
| cx1 = max(0, int(x1 - pad_x)) | |
| cy1 = max(0, int(y1 - pad_y)) | |
| cx2 = min(w, int(x2 + pad_x)) | |
| cy2 = min(h, int(y2 + pad_y)) | |
| if cx2 - cx1 < 32 or cy2 - cy1 < 32: | |
| return img_rgb | |
| return img_rgb[cy1:cy2, cx1:cx2] | |
| except Exception: | |
| return img_rgb | |
| def _run_pipeline(img_array: np.ndarray) -> dict: | |
| """Run all ten analyzers against `img_array` and return the merged dict. | |
| Shared by /analyze and /analyze-base64. Kept as a function rather | |
| than inlined twice so the per-step ordering is the single source | |
| of truth. | |
| """ | |
| ( | |
| insight, | |
| landmarks, | |
| ethnicities, | |
| parsing, | |
| emotions, | |
| colors, | |
| obstructions, | |
| hair_types, | |
| beauty, | |
| aesthetics, | |
| ) = get_analyzers() | |
| results: dict = {} | |
| # Step 1: InsightFace detection + age + gender + recognition embedding. | |
| logger.info("Running InsightFace analysis...") | |
| insight_results = insight.analyze(img_array) | |
| results.update(insight_results) | |
| # Compute a face crop once and pass it to every downstream analyzer | |
| # that benefits from it (parsing, ethnicity, obstruction, hair type, | |
| # beauty regressor). Falls back to the full image when InsightFace | |
| # didn't find a face. | |
| face_crop = _crop_to_face(img_array, insight_results.get("face_bbox")) | |
| # Step 2: MediaPipe landmarks (works on the full image; it has its | |
| # own internal detector). | |
| logger.info("Running landmark analysis...") | |
| landmark_results = landmarks.analyze(img_array) | |
| results.update(landmark_results) | |
| # Step 3: ethnicity classifier — likes a tighter face crop. | |
| logger.info("Running ethnicity analysis...") | |
| results.update(ethnicities.analyze(face_crop)) | |
| # Step 4: SegFormer parsing on the face crop (cleaner masks). | |
| logger.info("Running face parsing...") | |
| parse_results = parsing.analyze(face_crop) | |
| results.update(parse_results) | |
| # Step 5: HSEmotion on the face crop. | |
| logger.info("Running emotion analysis...") | |
| results.update(emotions.analyze(face_crop)) | |
| # Step 6: pixel-level colour analysis. Uses the face/hair masks | |
| # from step 4 (already in face-crop coordinate space) and the | |
| # MediaPipe lip/iris landmarks from step 2 (still in full-image | |
| # space, normalised). We pass `face_crop` so mask coordinates | |
| # line up; landmarks are in normalised coordinates so they map | |
| # correctly to either image. | |
| logger.info("Running color analysis...") | |
| color_results = colors.analyze( | |
| face_crop, | |
| skin_mask=parse_results.get("_skin_mask"), | |
| hair_mask=parse_results.get("_hair_mask"), | |
| landmarks=landmark_results.get("_raw_landmarks"), | |
| ) | |
| results.update(color_results) | |
| # Step 7: obstruction classifier — also benefits from a face crop. | |
| logger.info("Running obstruction analysis...") | |
| results.update(obstructions.analyze(face_crop)) | |
| # Step 8: hair-type classifier. | |
| logger.info("Running hair-type analysis...") | |
| results.update(hair_types.analyze(face_crop)) | |
| # Step 9: learned beauty regressor (no-op if no weights present). | |
| logger.info("Running beauty regressor...") | |
| results.update(beauty.analyze(face_crop)) | |
| # Step 10: aesthetic aggregator. Reads the merged dict; no image | |
| # input. Always runs last so it can see every other analyzer's | |
| # outputs. | |
| logger.info("Running aesthetic aggregator...") | |
| results.update(aesthetics.analyze(results)) | |
| # Drop internal/scratch fields (leading underscore) before | |
| # returning. Keeps masks and raw landmark lists out of the JSON. | |
| results = {k: v for k, v in results.items() if not k.startswith("_")} | |
| return results | |
| async def root(): | |
| """Service banner — confirms the server is reachable and which version.""" | |
| return { | |
| "name": "HCP Face Analysis Service", | |
| "version": "3.0.0", | |
| "status": "running", | |
| "endpoints": { | |
| "health": "/health", | |
| "analyze": "/analyze", | |
| "analyze-base64": "/analyze-base64", | |
| } | |
| } | |
| async def health(): | |
| """Liveness probe. Used by the Express server and HF Spaces uptime checks.""" | |
| return {"status": "ok"} | |
| async def analyze_face(file: UploadFile = File(...)): | |
| """Multipart endpoint for direct uploads. | |
| Runs the full ten-step pipeline and returns the merged attribute | |
| dict. See `analyze_face_base64` for the JSON-body variant the | |
| Express server calls. | |
| """ | |
| try: | |
| contents = await file.read() | |
| image = Image.open(io.BytesIO(contents)).convert("RGB") | |
| img_array = np.array(image) | |
| results = _run_pipeline(img_array) | |
| return {"success": True, "data": _to_json_safe(results)} | |
| except Exception as e: | |
| logger.error(f"Analysis failed: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def analyze_face_base64(body: dict): | |
| """JSON-body endpoint accepting `{"image": "<base64>"}`. | |
| This is what the Node/Express server forwards client requests to | |
| so we don't have to push multipart payloads through the proxy. | |
| The pipeline body is identical to `/analyze`. | |
| """ | |
| import base64 | |
| try: | |
| image_b64 = body.get("image", "") | |
| if not image_b64: | |
| raise HTTPException(status_code=400, detail="No image data provided") | |
| # Strip a possible "data:image/...;base64," prefix. | |
| if "," in image_b64: | |
| image_b64 = image_b64.split(",", 1)[1] | |
| image_bytes = base64.b64decode(image_b64) | |
| image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
| img_array = np.array(image) | |
| results = _run_pipeline(img_array) | |
| return {"success": True, "data": _to_json_safe(results)} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Analysis failed: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail=str(e)) | |