Spaces:
Build error
Build error
github-actions[bot]
commited on
Commit
·
a2b95a9
0
Parent(s):
Deploy from 128291a769737147011181c09a08b5186e167d8e
Browse files- README.md +77 -0
- app.py +99 -0
- face_age_inference/__init__.py +52 -0
- face_age_inference/config.py +155 -0
- face_age_inference/engine.py +429 -0
- face_age_inference/image.py +186 -0
- face_age_inference/types.py +74 -0
- requirements.txt +401 -0
README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: TangoBot
|
| 3 |
+
emoji: 📸
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: gradio
|
| 7 |
+
python_version: "3.12"
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# TangoBot
|
| 13 |
+
|
| 14 |
+
Gradio web UI for face detection and age estimation, designed for HuggingFace Spaces deployment. This demo showcases TangoBot's photo analysis capabilities for [Tango Charities](https://www.tangocharities.org/).
|
| 15 |
+
|
| 16 |
+
## About Tango Charities
|
| 17 |
+
|
| 18 |
+
Tango Charities is a volunteer-led nonprofit fighting hunger through Feed The City events. Since 2015, they've mobilized 110,000+ volunteers to provide 8+ million meals across 73 cities. TangoBot helps the team count attendees in event photos and identify youth participation.
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
|
| 22 |
+
- Interactive web interface for image upload
|
| 23 |
+
- Real-time face detection and age estimation
|
| 24 |
+
- Webcam and clipboard support
|
| 25 |
+
- Embeds inference directly (no external HTTP calls)
|
| 26 |
+
|
| 27 |
+
## Running Locally
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
cd apps/huggingface-space
|
| 31 |
+
uv run gradio app.py
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
Opens at http://localhost:7860
|
| 35 |
+
|
| 36 |
+
## Usage
|
| 37 |
+
|
| 38 |
+
1. Upload an image (or use webcam/clipboard)
|
| 39 |
+
2. View the annotated image with face bounding boxes
|
| 40 |
+
3. See detection results:
|
| 41 |
+
- Number of faces detected
|
| 42 |
+
- Count of people estimated under 18
|
| 43 |
+
|
| 44 |
+
## Color Coding
|
| 45 |
+
|
| 46 |
+
Bounding boxes are color-coded:
|
| 47 |
+
- **Green**: Under 18 (minors)
|
| 48 |
+
- **Blue**: 18 and older (adults)
|
| 49 |
+
|
| 50 |
+
## Deploying to HuggingFace Spaces
|
| 51 |
+
|
| 52 |
+
1. Create a new Space on HuggingFace (Gradio SDK)
|
| 53 |
+
2. Copy the package source and app files:
|
| 54 |
+
```
|
| 55 |
+
packages/face-age-inference/src/face_age_inference/ -> face_age_inference/
|
| 56 |
+
apps/huggingface-space/app.py -> app.py
|
| 57 |
+
apps/huggingface-space/requirements.txt -> requirements.txt
|
| 58 |
+
```
|
| 59 |
+
3. Push to the Space repository
|
| 60 |
+
|
| 61 |
+
Models are automatically downloaded from HuggingFace Hub on first run (~450MB).
|
| 62 |
+
|
| 63 |
+
## Requirements
|
| 64 |
+
|
| 65 |
+
See `requirements.txt` for dependencies. Key packages:
|
| 66 |
+
- `gradio>=5.0.0` - Web UI framework
|
| 67 |
+
- `torch>=2.9.1` - PyTorch
|
| 68 |
+
- `transformers>=4.51.0` - HuggingFace Transformers
|
| 69 |
+
- `ultralytics>=8.3.230` - YOLO implementation
|
| 70 |
+
|
| 71 |
+
## Configuration
|
| 72 |
+
|
| 73 |
+
The app uses default settings from `face-age-inference`. To customize, modify environment variables:
|
| 74 |
+
|
| 75 |
+
```bash
|
| 76 |
+
FACE_AGE_DEVICE=cuda:0 uv run gradio app.py
|
| 77 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio app for face detection and age estimation.
|
| 2 |
+
|
| 3 |
+
This app embeds the face-age-inference engine directly for deployment
|
| 4 |
+
on HuggingFace Spaces (which cannot make external HTTP calls).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import cv2
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import numpy as np
|
| 10 |
+
import spaces
|
| 11 |
+
from face_age_inference import (
|
| 12 |
+
FaceAgeInferenceEngine,
|
| 13 |
+
InferenceError,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# Initialize the inference engine once at startup
|
| 17 |
+
engine: FaceAgeInferenceEngine | None = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_engine() -> FaceAgeInferenceEngine:
|
| 21 |
+
"""Get or create the inference engine singleton."""
|
| 22 |
+
global engine
|
| 23 |
+
if engine is None:
|
| 24 |
+
engine = FaceAgeInferenceEngine()
|
| 25 |
+
return engine
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@spaces.GPU
|
| 29 |
+
def predict(image: np.ndarray | None) -> tuple[np.ndarray | None, str]:
|
| 30 |
+
"""Run face detection and age estimation on an image.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
image: Input image as RGB numpy array from Gradio.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Tuple of (annotated image, results text).
|
| 37 |
+
"""
|
| 38 |
+
if image is None:
|
| 39 |
+
return None, "Please upload an image."
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# Convert RGB to BGR for OpenCV processing
|
| 43 |
+
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
| 44 |
+
|
| 45 |
+
# Run inference
|
| 46 |
+
engine = get_engine()
|
| 47 |
+
result = engine.predict(image_bgr)
|
| 48 |
+
face_count = len(result.ages)
|
| 49 |
+
|
| 50 |
+
# Convert annotated image back to RGB for Gradio
|
| 51 |
+
annotated_rgb = cv2.cvtColor(result.annotated_image, cv2.COLOR_BGR2RGB)
|
| 52 |
+
|
| 53 |
+
# Format results text
|
| 54 |
+
if face_count == 0:
|
| 55 |
+
results_text = "No faces detected."
|
| 56 |
+
else:
|
| 57 |
+
# Count minors (under 18, excluding unknown ages)
|
| 58 |
+
minors_count = sum(
|
| 59 |
+
1 for age in result.ages if not np.isnan(age) and age < 18
|
| 60 |
+
)
|
| 61 |
+
face_word = "face" if face_count == 1 else "faces"
|
| 62 |
+
minor_word = "person" if minors_count == 1 else "people"
|
| 63 |
+
results_text = (
|
| 64 |
+
f"Detected {face_count} {face_word}.\n"
|
| 65 |
+
f"Estimated {minors_count} {minor_word} under 18."
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
return annotated_rgb, results_text
|
| 69 |
+
|
| 70 |
+
except InferenceError as e:
|
| 71 |
+
return None, f"Error: {e}"
|
| 72 |
+
except Exception as e:
|
| 73 |
+
return None, f"Unexpected error: {e}"
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# Create Gradio interface
|
| 77 |
+
demo = gr.Interface(
|
| 78 |
+
fn=predict,
|
| 79 |
+
inputs=gr.Image(
|
| 80 |
+
label="Upload Image",
|
| 81 |
+
type="numpy",
|
| 82 |
+
sources=["upload", "webcam", "clipboard"],
|
| 83 |
+
),
|
| 84 |
+
outputs=[
|
| 85 |
+
gr.Image(label="Annotated Image", type="numpy"),
|
| 86 |
+
gr.Textbox(label="Results", lines=2),
|
| 87 |
+
],
|
| 88 |
+
title="Face Detection & Age Estimation",
|
| 89 |
+
description=(
|
| 90 |
+
"Upload an image to detect faces and estimate ages. "
|
| 91 |
+
"Faces are highlighted with bounding boxes: "
|
| 92 |
+
"**green** for minors (under 18), **blue** for adults."
|
| 93 |
+
),
|
| 94 |
+
examples=[],
|
| 95 |
+
cache_examples=False,
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
demo.launch(server_name="0.0.0.0")
|
face_age_inference/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Face detection and age estimation inference package.
|
| 2 |
+
|
| 3 |
+
This package provides a reusable inference engine for detecting faces
|
| 4 |
+
and estimating ages using YOLO Face-Person Detector and MiVOLO v2 models.
|
| 5 |
+
Models are automatically downloaded from HuggingFace Hub on first use.
|
| 6 |
+
|
| 7 |
+
Example usage:
|
| 8 |
+
from face_age_inference import FaceAgeInferenceEngine, decode_image
|
| 9 |
+
|
| 10 |
+
engine = FaceAgeInferenceEngine()
|
| 11 |
+
image_bgr = decode_image(image_bytes)
|
| 12 |
+
result = engine.predict(image_bgr)
|
| 13 |
+
print(f"Detected {len(result.ages)} faces with ages: {result.ages}")
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from .config import Settings, settings
|
| 17 |
+
from .engine import FaceAgeInferenceEngine, get_inference_engine
|
| 18 |
+
from .image import (
|
| 19 |
+
choose_bbox_color,
|
| 20 |
+
compute_scaled_line_width,
|
| 21 |
+
decode_image,
|
| 22 |
+
draw_face_annotations,
|
| 23 |
+
encode_image_to_base64,
|
| 24 |
+
normalize_file_extension,
|
| 25 |
+
)
|
| 26 |
+
from .types import (
|
| 27 |
+
DEFAULT_MEDIA_TYPE,
|
| 28 |
+
SUPPORTED_IMAGE_MEDIA_TYPES,
|
| 29 |
+
InferenceError,
|
| 30 |
+
InferenceOutput,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
__all__ = [
|
| 34 |
+
# Config
|
| 35 |
+
"Settings",
|
| 36 |
+
"settings",
|
| 37 |
+
# Engine
|
| 38 |
+
"FaceAgeInferenceEngine",
|
| 39 |
+
"get_inference_engine",
|
| 40 |
+
# Image utilities
|
| 41 |
+
"decode_image",
|
| 42 |
+
"encode_image_to_base64",
|
| 43 |
+
"compute_scaled_line_width",
|
| 44 |
+
"normalize_file_extension",
|
| 45 |
+
"choose_bbox_color",
|
| 46 |
+
"draw_face_annotations",
|
| 47 |
+
# Types
|
| 48 |
+
"InferenceError",
|
| 49 |
+
"InferenceOutput",
|
| 50 |
+
"SUPPORTED_IMAGE_MEDIA_TYPES",
|
| 51 |
+
"DEFAULT_MEDIA_TYPE",
|
| 52 |
+
]
|
face_age_inference/config.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Application configuration using Pydantic Settings.
|
| 2 |
+
|
| 3 |
+
Configuration is loaded from environment variables with the FACE_AGE_ prefix.
|
| 4 |
+
Falls back to sensible defaults for local development.
|
| 5 |
+
|
| 6 |
+
Environment variables:
|
| 7 |
+
FACE_AGE_DEVICE: PyTorch device (default: cpu)
|
| 8 |
+
FACE_AGE_DETECTOR_MODEL_ID: HuggingFace model ID for face/person detector
|
| 9 |
+
FACE_AGE_MIVOLO_MODEL_ID: HuggingFace model ID for MiVOLO v2 age estimator
|
| 10 |
+
FACE_AGE_CONFIDENCE_THRESHOLD: Detection confidence threshold
|
| 11 |
+
FACE_AGE_IOU_THRESHOLD: Detection IoU threshold
|
| 12 |
+
FACE_AGE_ANNOTATION_FORMAT: Output image format
|
| 13 |
+
FACE_AGE_MIVOLO_BATCH_SIZE: Max batch size for MiVOLO forward passes
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import torch
|
| 17 |
+
from pydantic import Field, field_validator
|
| 18 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 19 |
+
|
| 20 |
+
# Type aliases
|
| 21 |
+
type DeviceSpec = str
|
| 22 |
+
type Probability = float
|
| 23 |
+
|
| 24 |
+
# Default HuggingFace model IDs
|
| 25 |
+
_DEFAULT_DETECTOR_MODEL_ID: str = "iitolstykh/YOLO-Face-Person-Detector"
|
| 26 |
+
_DEFAULT_MIVOLO_MODEL_ID: str = "iitolstykh/mivolo_v2"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _detect_best_device() -> str:
|
| 30 |
+
"""Auto-detect the best available device for inference.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
Device string: 'cuda' if available, 'mps' on Apple Silicon, else 'cpu'.
|
| 34 |
+
"""
|
| 35 |
+
if torch.cuda.is_available():
|
| 36 |
+
return "cuda"
|
| 37 |
+
if torch.backends.mps.is_available():
|
| 38 |
+
return "mps"
|
| 39 |
+
return "cpu"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Threshold bounds
|
| 43 |
+
MIN_PROBABILITY: Probability = 0.0
|
| 44 |
+
MAX_PROBABILITY: Probability = 1.0
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class Settings(BaseSettings):
|
| 48 |
+
"""Runtime configuration for face-age-inference.
|
| 49 |
+
|
| 50 |
+
Attributes:
|
| 51 |
+
device: PyTorch device specification (e.g., 'cpu', 'cuda:0', 'mps').
|
| 52 |
+
detector_model_id: HuggingFace model ID for YOLO face/person detector.
|
| 53 |
+
mivolo_model_id: HuggingFace model ID for MiVOLO v2 age estimator.
|
| 54 |
+
confidence_threshold: Minimum confidence for face detection (0-1).
|
| 55 |
+
iou_threshold: IoU threshold for non-maximum suppression (0-1).
|
| 56 |
+
annotation_format: Image format for annotated outputs ('.jpg' or '.png').
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
model_config = SettingsConfigDict(
|
| 60 |
+
env_prefix="FACE_AGE_",
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
device: DeviceSpec = Field(
|
| 64 |
+
default_factory=_detect_best_device,
|
| 65 |
+
description="PyTorch device identifier for inference (e.g., 'cpu', 'cuda:0', 'mps')",
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
detector_model_id: str = Field(
|
| 69 |
+
default=_DEFAULT_DETECTOR_MODEL_ID,
|
| 70 |
+
description="HuggingFace model ID for YOLO face/person detector",
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
mivolo_model_id: str = Field(
|
| 74 |
+
default=_DEFAULT_MIVOLO_MODEL_ID,
|
| 75 |
+
description="HuggingFace model ID for MiVOLO v2 age estimator",
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
confidence_threshold: Probability = Field(
|
| 79 |
+
default=0.15,
|
| 80 |
+
ge=MIN_PROBABILITY,
|
| 81 |
+
le=MAX_PROBABILITY,
|
| 82 |
+
description="Minimum confidence score for face detection (0.0 to 1.0)",
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
iou_threshold: Probability = Field(
|
| 86 |
+
default=0.4,
|
| 87 |
+
ge=MIN_PROBABILITY,
|
| 88 |
+
le=MAX_PROBABILITY,
|
| 89 |
+
description="IoU threshold for non-maximum suppression (0.0 to 1.0)",
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
annotation_format: str = Field(
|
| 93 |
+
default=".jpg",
|
| 94 |
+
description="Image format for annotated outputs ('.jpg' or '.png')",
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
mivolo_batch_size: int = Field(
|
| 98 |
+
default=8,
|
| 99 |
+
ge=1,
|
| 100 |
+
description="Max batch size for MiVOLO forward passes (reduce if you hit OOM)",
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
@field_validator("annotation_format")
|
| 104 |
+
@classmethod
|
| 105 |
+
def validate_annotation_format(cls, value: str) -> str:
|
| 106 |
+
"""Ensure annotation format is supported.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
value: The format string to validate.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
Validated format string.
|
| 113 |
+
|
| 114 |
+
Raises:
|
| 115 |
+
ValueError: If format is not supported.
|
| 116 |
+
"""
|
| 117 |
+
normalized = value.lower()
|
| 118 |
+
if normalized not in {".jpg", ".jpeg", ".png"}:
|
| 119 |
+
raise ValueError(
|
| 120 |
+
f"Unsupported annotation format: {value}. "
|
| 121 |
+
"Must be one of: .jpg, .jpeg, .png"
|
| 122 |
+
)
|
| 123 |
+
# Normalize .jpeg to .jpg for consistency
|
| 124 |
+
return ".jpg" if normalized == ".jpeg" else normalized
|
| 125 |
+
|
| 126 |
+
@field_validator("device")
|
| 127 |
+
@classmethod
|
| 128 |
+
def validate_device(cls, value: str) -> str:
|
| 129 |
+
"""Validate device specification format.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
value: Device specification string.
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
Validated device string.
|
| 136 |
+
|
| 137 |
+
Raises:
|
| 138 |
+
ValueError: If device format is invalid.
|
| 139 |
+
"""
|
| 140 |
+
valid_prefixes = ("cpu", "cuda", "mps")
|
| 141 |
+
if not any(value.startswith(prefix) for prefix in valid_prefixes):
|
| 142 |
+
raise ValueError(
|
| 143 |
+
f"Invalid device specification: {value}. "
|
| 144 |
+
f"Must start with one of: {', '.join(valid_prefixes)}"
|
| 145 |
+
)
|
| 146 |
+
return value
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# Global settings instance
|
| 150 |
+
settings = Settings()
|
| 151 |
+
|
| 152 |
+
__all__ = [
|
| 153 |
+
"Settings",
|
| 154 |
+
"settings",
|
| 155 |
+
]
|
face_age_inference/engine.py
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Face detection and age estimation inference engine.
|
| 2 |
+
|
| 3 |
+
This module implements the FaceAgeInferenceEngine class that coordinates
|
| 4 |
+
face detection and age estimation using YOLO Face-Person Detector and MiVOLO v2.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import time
|
| 8 |
+
from contextlib import contextmanager
|
| 9 |
+
from functools import lru_cache
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import torch
|
| 13 |
+
from opentelemetry import metrics, trace
|
| 14 |
+
from transformers import (
|
| 15 |
+
AutoConfig,
|
| 16 |
+
AutoImageProcessor,
|
| 17 |
+
AutoModel,
|
| 18 |
+
AutoModelForImageClassification,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
from .config import Settings, settings
|
| 22 |
+
from .image import compute_scaled_line_width, draw_face_annotations
|
| 23 |
+
from .types import BoundingBox, InferenceError, InferenceOutput
|
| 24 |
+
|
| 25 |
+
# Type alias for detection results
|
| 26 |
+
type FaceDetections = list[BoundingBox]
|
| 27 |
+
type PersonDetections = list[BoundingBox]
|
| 28 |
+
|
| 29 |
+
# Get tracer for this module
|
| 30 |
+
tracer = trace.get_tracer(__name__)
|
| 31 |
+
|
| 32 |
+
# Get meter and create metrics instruments
|
| 33 |
+
# Uses no-op provider when running standalone, real provider when ml-api sets one
|
| 34 |
+
meter = metrics.get_meter(__name__)
|
| 35 |
+
_inference_duration = meter.create_histogram(
|
| 36 |
+
"inference.duration_ms",
|
| 37 |
+
unit="ms",
|
| 38 |
+
description="Total inference time in milliseconds",
|
| 39 |
+
)
|
| 40 |
+
_yolo_duration = meter.create_histogram(
|
| 41 |
+
"inference.yolo_duration_ms",
|
| 42 |
+
unit="ms",
|
| 43 |
+
description="YOLO face detection time in milliseconds",
|
| 44 |
+
)
|
| 45 |
+
_mivolo_duration = meter.create_histogram(
|
| 46 |
+
"inference.mivolo_duration_ms",
|
| 47 |
+
unit="ms",
|
| 48 |
+
description="MiVOLO age estimation time in milliseconds",
|
| 49 |
+
)
|
| 50 |
+
_faces_detected = meter.create_counter(
|
| 51 |
+
"inference.faces_detected",
|
| 52 |
+
description="Total number of faces detected",
|
| 53 |
+
)
|
| 54 |
+
_inference_errors = meter.create_counter(
|
| 55 |
+
"inference.errors",
|
| 56 |
+
description="Number of inference errors",
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@contextmanager
|
| 61 |
+
def _telemetry_span(name: str, histogram=None):
|
| 62 |
+
"""Start a span and optionally record elapsed time to a histogram."""
|
| 63 |
+
start = time.perf_counter()
|
| 64 |
+
with tracer.start_as_current_span(name) as span:
|
| 65 |
+
yield span
|
| 66 |
+
if histogram is not None:
|
| 67 |
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
| 68 |
+
histogram.record(elapsed_ms)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _compute_iou(box1: BoundingBox, box2: BoundingBox) -> float:
|
| 72 |
+
"""Compute intersection over union between two bounding boxes.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
box1: First bounding box (x1, y1, x2, y2).
|
| 76 |
+
box2: Second bounding box (x1, y1, x2, y2).
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
IoU value between 0 and 1.
|
| 80 |
+
"""
|
| 81 |
+
x1 = max(box1[0], box2[0])
|
| 82 |
+
y1 = max(box1[1], box2[1])
|
| 83 |
+
x2 = min(box1[2], box2[2])
|
| 84 |
+
y2 = min(box1[3], box2[3])
|
| 85 |
+
|
| 86 |
+
if x2 <= x1 or y2 <= y1:
|
| 87 |
+
return 0.0
|
| 88 |
+
|
| 89 |
+
intersection = (x2 - x1) * (y2 - y1)
|
| 90 |
+
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
| 91 |
+
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
| 92 |
+
union = area1 + area2 - intersection
|
| 93 |
+
|
| 94 |
+
return intersection / union if union > 0 else 0.0
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _face_inside_person(face: BoundingBox, person: BoundingBox) -> bool:
|
| 98 |
+
"""Check if a face bounding box is inside a person bounding box.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
face: Face bounding box (x1, y1, x2, y2).
|
| 102 |
+
person: Person bounding box (x1, y1, x2, y2).
|
| 103 |
+
|
| 104 |
+
Returns:
|
| 105 |
+
True if face center is inside person box.
|
| 106 |
+
"""
|
| 107 |
+
face_cx = (face[0] + face[2]) / 2
|
| 108 |
+
face_cy = (face[1] + face[3]) / 2
|
| 109 |
+
return person[0] <= face_cx <= person[2] and person[1] <= face_cy <= person[3]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
class FaceAgeInferenceEngine:
|
| 113 |
+
"""Inference engine coordinating detection and age estimation.
|
| 114 |
+
|
| 115 |
+
Uses YOLO Face-Person Detector for detection and MiVOLO v2 for age estimation.
|
| 116 |
+
Models are automatically downloaded from HuggingFace Hub on first use.
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
def __init__(self, service_settings: Settings | None = None) -> None:
|
| 120 |
+
"""Initialize inference models.
|
| 121 |
+
|
| 122 |
+
Effectful: downloads models from HuggingFace Hub if not cached.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
service_settings: Configuration object (uses global if None).
|
| 126 |
+
|
| 127 |
+
Raises:
|
| 128 |
+
InferenceError: If models cannot be loaded.
|
| 129 |
+
"""
|
| 130 |
+
self.settings = service_settings or settings
|
| 131 |
+
|
| 132 |
+
# Determine torch dtype and device
|
| 133 |
+
self.device = torch.device(self.settings.device)
|
| 134 |
+
self.dtype = torch.float16 if "cuda" in self.settings.device else torch.float32
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
# Load YOLO Face-Person Detector from HuggingFace Hub
|
| 138 |
+
self.detector = AutoModel.from_pretrained(
|
| 139 |
+
self.settings.detector_model_id,
|
| 140 |
+
trust_remote_code=True,
|
| 141 |
+
dtype=self.dtype,
|
| 142 |
+
).to(self.device)
|
| 143 |
+
|
| 144 |
+
# Load MiVOLO v2 config, model, and image processor
|
| 145 |
+
self.mivolo_config = AutoConfig.from_pretrained(
|
| 146 |
+
self.settings.mivolo_model_id,
|
| 147 |
+
trust_remote_code=True,
|
| 148 |
+
)
|
| 149 |
+
self.mivolo = AutoModelForImageClassification.from_pretrained(
|
| 150 |
+
self.settings.mivolo_model_id,
|
| 151 |
+
trust_remote_code=True,
|
| 152 |
+
dtype=self.dtype,
|
| 153 |
+
).to(self.device)
|
| 154 |
+
self.image_processor = AutoImageProcessor.from_pretrained(
|
| 155 |
+
self.settings.mivolo_model_id,
|
| 156 |
+
trust_remote_code=True,
|
| 157 |
+
)
|
| 158 |
+
except Exception as exc:
|
| 159 |
+
raise InferenceError(
|
| 160 |
+
f"Failed to load models from HuggingFace Hub: {exc}"
|
| 161 |
+
) from exc
|
| 162 |
+
|
| 163 |
+
def _extract_detections(self, results) -> tuple[FaceDetections, PersonDetections]:
|
| 164 |
+
"""Extract face and person bounding boxes from YOLO results.
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
results: YOLO detection results.
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
Tuple of (face_boxes, person_boxes) where each box is (x1, y1, x2, y2).
|
| 171 |
+
"""
|
| 172 |
+
faces: FaceDetections = []
|
| 173 |
+
persons: PersonDetections = []
|
| 174 |
+
|
| 175 |
+
for box in results.boxes:
|
| 176 |
+
cls_id = int(box.cls.item())
|
| 177 |
+
cls_name = results.names[cls_id].lower()
|
| 178 |
+
coords = box.xyxy[0].cpu().numpy()
|
| 179 |
+
bbox: BoundingBox = (
|
| 180 |
+
int(coords[0]),
|
| 181 |
+
int(coords[1]),
|
| 182 |
+
int(coords[2]),
|
| 183 |
+
int(coords[3]),
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if cls_name == "face":
|
| 187 |
+
faces.append(bbox)
|
| 188 |
+
elif cls_name == "person":
|
| 189 |
+
persons.append(bbox)
|
| 190 |
+
|
| 191 |
+
return faces, persons
|
| 192 |
+
|
| 193 |
+
def _match_faces_to_persons(
|
| 194 |
+
self,
|
| 195 |
+
faces: FaceDetections,
|
| 196 |
+
persons: PersonDetections,
|
| 197 |
+
) -> list[tuple[BoundingBox, BoundingBox | None]]:
|
| 198 |
+
"""Match each face to its corresponding person bounding box.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
faces: List of face bounding boxes.
|
| 202 |
+
persons: List of person bounding boxes.
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
List of (face, person) pairs. Person may be None if no match found.
|
| 206 |
+
"""
|
| 207 |
+
matched: list[tuple[BoundingBox, BoundingBox | None]] = []
|
| 208 |
+
|
| 209 |
+
for face in faces:
|
| 210 |
+
best_person: BoundingBox | None = None
|
| 211 |
+
best_overlap = 0.0
|
| 212 |
+
|
| 213 |
+
for person in persons:
|
| 214 |
+
if _face_inside_person(face, person):
|
| 215 |
+
overlap = _compute_iou(face, person)
|
| 216 |
+
if overlap > best_overlap or best_person is None:
|
| 217 |
+
best_person = person
|
| 218 |
+
best_overlap = overlap
|
| 219 |
+
|
| 220 |
+
matched.append((face, best_person))
|
| 221 |
+
|
| 222 |
+
return matched
|
| 223 |
+
|
| 224 |
+
def _crop_regions(
|
| 225 |
+
self,
|
| 226 |
+
image_bgr: np.ndarray,
|
| 227 |
+
matched_pairs: list[tuple[BoundingBox, BoundingBox | None]],
|
| 228 |
+
) -> tuple[list[np.ndarray], list[np.ndarray | None]]:
|
| 229 |
+
"""Crop face and body regions from image.
|
| 230 |
+
|
| 231 |
+
Args:
|
| 232 |
+
image_bgr: Input image in BGR format.
|
| 233 |
+
matched_pairs: List of (face, person) bounding box pairs.
|
| 234 |
+
|
| 235 |
+
Returns:
|
| 236 |
+
Tuple of (face_crops, body_crops). Body crop may be None if no person matched.
|
| 237 |
+
"""
|
| 238 |
+
face_crops: list[np.ndarray] = []
|
| 239 |
+
body_crops: list[np.ndarray | None] = []
|
| 240 |
+
|
| 241 |
+
h, w = image_bgr.shape[:2]
|
| 242 |
+
|
| 243 |
+
for face, person in matched_pairs:
|
| 244 |
+
# Crop face (clamp to image bounds)
|
| 245 |
+
x1, y1, x2, y2 = face
|
| 246 |
+
x1, y1 = max(0, x1), max(0, y1)
|
| 247 |
+
x2, y2 = min(w, x2), min(h, y2)
|
| 248 |
+
face_crop = image_bgr[y1:y2, x1:x2]
|
| 249 |
+
face_crops.append(face_crop)
|
| 250 |
+
|
| 251 |
+
# Crop body if available
|
| 252 |
+
if person is not None:
|
| 253 |
+
px1, py1, px2, py2 = person
|
| 254 |
+
px1, py1 = max(0, px1), max(0, py1)
|
| 255 |
+
px2, py2 = min(w, px2), min(h, py2)
|
| 256 |
+
body_crop = image_bgr[py1:py2, px1:px2]
|
| 257 |
+
body_crops.append(body_crop)
|
| 258 |
+
else:
|
| 259 |
+
body_crops.append(None)
|
| 260 |
+
|
| 261 |
+
return face_crops, body_crops
|
| 262 |
+
|
| 263 |
+
def _run_mivolo(
|
| 264 |
+
self,
|
| 265 |
+
face_crops: list[np.ndarray],
|
| 266 |
+
body_crops: list[np.ndarray | None],
|
| 267 |
+
) -> list[float]:
|
| 268 |
+
"""Run MiVOLO v2 age estimation on cropped regions.
|
| 269 |
+
|
| 270 |
+
Uses chunked batching to avoid OOM on group photos with many faces.
|
| 271 |
+
|
| 272 |
+
Args:
|
| 273 |
+
face_crops: List of face crop images (BGR).
|
| 274 |
+
body_crops: List of body crop images (BGR), may contain None.
|
| 275 |
+
|
| 276 |
+
Returns:
|
| 277 |
+
List of estimated ages.
|
| 278 |
+
"""
|
| 279 |
+
if not face_crops:
|
| 280 |
+
return []
|
| 281 |
+
|
| 282 |
+
batch_size = max(1, int(self.settings.mivolo_batch_size))
|
| 283 |
+
|
| 284 |
+
def _run_batch(
|
| 285 |
+
batch_faces: list[np.ndarray],
|
| 286 |
+
batch_bodies: list[np.ndarray | None],
|
| 287 |
+
) -> list[float]:
|
| 288 |
+
faces_input = self.image_processor(images=batch_faces)["pixel_values"]
|
| 289 |
+
faces_input = faces_input.to(dtype=self.dtype, device=self.device)
|
| 290 |
+
|
| 291 |
+
valid_body_indices: list[int] = []
|
| 292 |
+
valid_body_images: list[np.ndarray] = []
|
| 293 |
+
for i, body_crop in enumerate(batch_bodies):
|
| 294 |
+
if body_crop is not None:
|
| 295 |
+
valid_body_indices.append(i)
|
| 296 |
+
valid_body_images.append(body_crop)
|
| 297 |
+
|
| 298 |
+
body_input = torch.zeros_like(faces_input)
|
| 299 |
+
|
| 300 |
+
if valid_body_images:
|
| 301 |
+
valid_body_input = self.image_processor(images=valid_body_images)[
|
| 302 |
+
"pixel_values"
|
| 303 |
+
]
|
| 304 |
+
valid_body_input = valid_body_input.to(dtype=self.dtype, device=self.device)
|
| 305 |
+
|
| 306 |
+
for tensor_idx, batch_idx in enumerate(valid_body_indices):
|
| 307 |
+
body_input[batch_idx] = valid_body_input[tensor_idx]
|
| 308 |
+
|
| 309 |
+
with torch.no_grad():
|
| 310 |
+
output = self.mivolo(faces_input=faces_input, body_input=body_input)
|
| 311 |
+
|
| 312 |
+
return output.age_output.cpu().flatten().tolist()
|
| 313 |
+
|
| 314 |
+
ages: list[float] = []
|
| 315 |
+
for start in range(0, len(face_crops), batch_size):
|
| 316 |
+
ages.extend(
|
| 317 |
+
_run_batch(
|
| 318 |
+
face_crops[start : start + batch_size],
|
| 319 |
+
body_crops[start : start + batch_size],
|
| 320 |
+
)
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
return ages
|
| 324 |
+
|
| 325 |
+
def _run_yolo_detection(
|
| 326 |
+
self,
|
| 327 |
+
image_bgr: np.ndarray,
|
| 328 |
+
) -> tuple[FaceDetections, PersonDetections]:
|
| 329 |
+
"""Run YOLO face/person detection with telemetry."""
|
| 330 |
+
with _telemetry_span("inference.yolo_detection", _yolo_duration) as det_span:
|
| 331 |
+
results = self.detector(
|
| 332 |
+
image_bgr,
|
| 333 |
+
conf=self.settings.confidence_threshold,
|
| 334 |
+
iou=self.settings.iou_threshold,
|
| 335 |
+
)[0]
|
| 336 |
+
|
| 337 |
+
faces, persons = self._extract_detections(results)
|
| 338 |
+
det_span.set_attribute("faces_detected", len(faces))
|
| 339 |
+
det_span.set_attribute("persons_detected", len(persons))
|
| 340 |
+
_faces_detected.add(len(faces))
|
| 341 |
+
|
| 342 |
+
return faces, persons
|
| 343 |
+
|
| 344 |
+
def _run_mivolo_with_metrics(
|
| 345 |
+
self,
|
| 346 |
+
face_crops: list[np.ndarray],
|
| 347 |
+
body_crops: list[np.ndarray | None],
|
| 348 |
+
) -> list[float]:
|
| 349 |
+
"""Run MiVOLO v2 age estimation with telemetry."""
|
| 350 |
+
with _telemetry_span("inference.mivolo_age", _mivolo_duration) as age_span:
|
| 351 |
+
ages = self._run_mivolo(face_crops, body_crops)
|
| 352 |
+
age_span.set_attribute("ages_estimated", len(ages))
|
| 353 |
+
|
| 354 |
+
return ages
|
| 355 |
+
|
| 356 |
+
def predict(self, image_bgr: np.ndarray) -> InferenceOutput:
|
| 357 |
+
"""Run face detection and age estimation on an image.
|
| 358 |
+
|
| 359 |
+
Effectful: calls ML models, renders annotations.
|
| 360 |
+
|
| 361 |
+
Args:
|
| 362 |
+
image_bgr: Input image in BGR format.
|
| 363 |
+
|
| 364 |
+
Returns:
|
| 365 |
+
Inference results with annotated image.
|
| 366 |
+
|
| 367 |
+
Raises:
|
| 368 |
+
InferenceError: If inference or annotation fails.
|
| 369 |
+
"""
|
| 370 |
+
if image_bgr.size == 0:
|
| 371 |
+
raise InferenceError("Decoded image is empty.")
|
| 372 |
+
|
| 373 |
+
with _telemetry_span("inference.predict", _inference_duration) as span:
|
| 374 |
+
span.set_attribute("image.height", image_bgr.shape[0])
|
| 375 |
+
span.set_attribute("image.width", image_bgr.shape[1])
|
| 376 |
+
|
| 377 |
+
try:
|
| 378 |
+
# 1. Run face+person detection
|
| 379 |
+
faces, persons = self._run_yolo_detection(image_bgr)
|
| 380 |
+
|
| 381 |
+
# 2. Match faces to persons
|
| 382 |
+
matched_pairs = self._match_faces_to_persons(faces, persons)
|
| 383 |
+
|
| 384 |
+
# 3. Crop face and body regions
|
| 385 |
+
face_crops, body_crops = self._crop_regions(image_bgr, matched_pairs)
|
| 386 |
+
|
| 387 |
+
# 4. Run MiVOLO v2 on crops
|
| 388 |
+
ages = self._run_mivolo_with_metrics(face_crops, body_crops)
|
| 389 |
+
|
| 390 |
+
# 5. Compute annotation parameters
|
| 391 |
+
line_width = compute_scaled_line_width(image_bgr.shape)
|
| 392 |
+
|
| 393 |
+
# 6. Draw annotations (face boxes only with age labels)
|
| 394 |
+
annotated = draw_face_annotations(image_bgr, faces, ages, line_width)
|
| 395 |
+
|
| 396 |
+
span.set_attribute("total_faces", len(faces))
|
| 397 |
+
|
| 398 |
+
except InferenceError:
|
| 399 |
+
_inference_errors.add(1, {"error_type": "inference_error"})
|
| 400 |
+
raise
|
| 401 |
+
except Exception as exc:
|
| 402 |
+
_inference_errors.add(1, {"error_type": "unknown_error"})
|
| 403 |
+
span.record_exception(exc)
|
| 404 |
+
raise InferenceError(
|
| 405 |
+
"Unable to run inference on the provided image."
|
| 406 |
+
) from exc
|
| 407 |
+
|
| 408 |
+
return InferenceOutput(
|
| 409 |
+
ages=tuple(ages),
|
| 410 |
+
annotated_image=annotated,
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
@lru_cache(maxsize=1)
|
| 415 |
+
def get_inference_engine() -> FaceAgeInferenceEngine:
|
| 416 |
+
"""Get or create singleton inference engine.
|
| 417 |
+
|
| 418 |
+
Cached to avoid reloading heavy ML models.
|
| 419 |
+
|
| 420 |
+
Returns:
|
| 421 |
+
Initialized inference engine.
|
| 422 |
+
"""
|
| 423 |
+
return FaceAgeInferenceEngine()
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
__all__ = [
|
| 427 |
+
"FaceAgeInferenceEngine",
|
| 428 |
+
"get_inference_engine",
|
| 429 |
+
]
|
face_age_inference/image.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Image processing utilities for face-age-inference.
|
| 2 |
+
|
| 3 |
+
This module handles image decoding, encoding, and annotation operations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import base64
|
| 7 |
+
import math
|
| 8 |
+
from io import BytesIO
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
from PIL import Image
|
| 13 |
+
from pillow_heif import register_heif_opener
|
| 14 |
+
|
| 15 |
+
from .types import (
|
| 16 |
+
ADULT_COLOR,
|
| 17 |
+
AGE_THRESHOLD,
|
| 18 |
+
BASE_LINE_WIDTH,
|
| 19 |
+
DEFAULT_MEDIA_TYPE,
|
| 20 |
+
MEDIA_TYPE_BY_EXTENSION,
|
| 21 |
+
MIN_LINE_WIDTH,
|
| 22 |
+
MINOR_COLOR,
|
| 23 |
+
REFERENCE_DIAGONAL,
|
| 24 |
+
BoundingBox,
|
| 25 |
+
Color,
|
| 26 |
+
ImageShape,
|
| 27 |
+
InferenceError,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
register_heif_opener()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ============================================================================
|
| 34 |
+
# PURE FUNCTIONS: No side effects
|
| 35 |
+
# ============================================================================
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def compute_scaled_line_width(
|
| 39 |
+
img_shape: ImageShape,
|
| 40 |
+
*,
|
| 41 |
+
reference_diagonal: float = REFERENCE_DIAGONAL,
|
| 42 |
+
base_line_width: int = BASE_LINE_WIDTH,
|
| 43 |
+
min_width: int = MIN_LINE_WIDTH,
|
| 44 |
+
) -> int:
|
| 45 |
+
"""Calculate line width scaled to image dimensions.
|
| 46 |
+
|
| 47 |
+
Pure function: deterministic output based only on inputs.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
img_shape: Image dimensions (height, width, ...).
|
| 51 |
+
reference_diagonal: Reference diagonal for scaling.
|
| 52 |
+
base_line_width: Base width before scaling.
|
| 53 |
+
min_width: Minimum allowed width.
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
Scaled line width, at least min_width.
|
| 57 |
+
"""
|
| 58 |
+
height, width = img_shape[:2]
|
| 59 |
+
diagonal = math.hypot(width, height)
|
| 60 |
+
scale_factor = diagonal / reference_diagonal
|
| 61 |
+
return max(min_width, int(base_line_width * scale_factor))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def normalize_file_extension(extension: str) -> str:
|
| 65 |
+
"""Ensure extension starts with a dot.
|
| 66 |
+
|
| 67 |
+
Pure function: no side effects, deterministic.
|
| 68 |
+
"""
|
| 69 |
+
return extension if extension.startswith(".") else f".{extension}"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def choose_bbox_color(age: float | None, *, threshold: float = AGE_THRESHOLD) -> Color:
|
| 73 |
+
"""Determine bounding box color based on age.
|
| 74 |
+
|
| 75 |
+
Pure function: deterministic color selection.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
age: Estimated age (None if unavailable).
|
| 79 |
+
threshold: Age threshold for color change.
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
Color tuple (BGR format).
|
| 83 |
+
"""
|
| 84 |
+
if age is not None and age < threshold:
|
| 85 |
+
return MINOR_COLOR
|
| 86 |
+
return ADULT_COLOR
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# ============================================================================
|
| 90 |
+
# IMAGE PROCESSING: I/O operations
|
| 91 |
+
# ============================================================================
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def decode_image(data: bytes) -> np.ndarray:
|
| 95 |
+
"""Decode image bytes to BGR numpy array.
|
| 96 |
+
|
| 97 |
+
Effectful: performs I/O (reads bytes, opens image).
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
data: Raw image bytes.
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
BGR numpy array.
|
| 104 |
+
|
| 105 |
+
Raises:
|
| 106 |
+
InferenceError: If image cannot be decoded.
|
| 107 |
+
"""
|
| 108 |
+
if not data:
|
| 109 |
+
raise InferenceError("Uploaded file is empty.")
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
with Image.open(BytesIO(data)) as pil_image:
|
| 113 |
+
rgb = pil_image.convert("RGB")
|
| 114 |
+
except Exception as exc:
|
| 115 |
+
raise InferenceError("Unable to read the provided image.") from exc
|
| 116 |
+
|
| 117 |
+
return cv2.cvtColor(np.array(rgb), cv2.COLOR_RGB2BGR)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def encode_image_to_base64(image: np.ndarray, file_extension: str) -> tuple[str, str]:
|
| 121 |
+
"""Encode BGR image to base64 string.
|
| 122 |
+
|
| 123 |
+
Effectful: performs encoding operation.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
image: BGR numpy array.
|
| 127 |
+
file_extension: Desired output format.
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
Tuple of (media_type, base64_string).
|
| 131 |
+
|
| 132 |
+
Raises:
|
| 133 |
+
InferenceError: If encoding fails.
|
| 134 |
+
"""
|
| 135 |
+
ext = normalize_file_extension(file_extension)
|
| 136 |
+
media_type = MEDIA_TYPE_BY_EXTENSION.get(ext.lower(), DEFAULT_MEDIA_TYPE)
|
| 137 |
+
encode_ext = ".jpg" if media_type == "image/jpeg" else ".png"
|
| 138 |
+
|
| 139 |
+
success, buffer = cv2.imencode(encode_ext, image)
|
| 140 |
+
if not success:
|
| 141 |
+
raise InferenceError("Unable to encode annotated image.")
|
| 142 |
+
|
| 143 |
+
return media_type, base64.b64encode(buffer).decode("utf-8")
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def draw_face_annotations(
|
| 147 |
+
image: np.ndarray,
|
| 148 |
+
faces: list[BoundingBox],
|
| 149 |
+
ages: list[float],
|
| 150 |
+
line_width: int,
|
| 151 |
+
) -> np.ndarray:
|
| 152 |
+
"""Draw bounding boxes and age labels on image.
|
| 153 |
+
|
| 154 |
+
Effectful: modifies image array (returns copy to maintain immutability at API level).
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
image: Source BGR image.
|
| 158 |
+
faces: List of face bounding boxes (x1, y1, x2, y2).
|
| 159 |
+
ages: List of estimated ages corresponding to each face.
|
| 160 |
+
line_width: Thickness of bounding boxes.
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
Annotated image copy.
|
| 164 |
+
|
| 165 |
+
Raises:
|
| 166 |
+
InferenceError: If annotation fails.
|
| 167 |
+
"""
|
| 168 |
+
try:
|
| 169 |
+
annotated = image.copy()
|
| 170 |
+
for face, age in zip(faces, ages, strict=True):
|
| 171 |
+
x1, y1, x2, y2 = face
|
| 172 |
+
color = choose_bbox_color(age)
|
| 173 |
+
cv2.rectangle(annotated, (x1, y1), (x2, y2), color, line_width)
|
| 174 |
+
return annotated
|
| 175 |
+
except Exception as exc:
|
| 176 |
+
raise InferenceError("Unable to draw annotated detections.") from exc
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
__all__ = [
|
| 180 |
+
"compute_scaled_line_width",
|
| 181 |
+
"normalize_file_extension",
|
| 182 |
+
"choose_bbox_color",
|
| 183 |
+
"decode_image",
|
| 184 |
+
"encode_image_to_base64",
|
| 185 |
+
"draw_face_annotations",
|
| 186 |
+
]
|
face_age_inference/types.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Type definitions for face detection and age estimation.
|
| 2 |
+
|
| 3 |
+
This module defines the core data types and constants used throughout
|
| 4 |
+
the face-age-inference package.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import Final
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
# Type aliases using modern Python 3.12 style
|
| 13 |
+
type ImageShape = tuple[int, ...]
|
| 14 |
+
type BoundingBox = tuple[int, int, int, int]
|
| 15 |
+
type Color = tuple[int, int, int]
|
| 16 |
+
|
| 17 |
+
# Constants for supported media types
|
| 18 |
+
SUPPORTED_IMAGE_MEDIA_TYPES: Final[dict[str, str]] = {
|
| 19 |
+
"image/jpeg": ".jpg",
|
| 20 |
+
"image/png": ".png",
|
| 21 |
+
"image/jpg": ".jpg",
|
| 22 |
+
"image/heic": ".jpg",
|
| 23 |
+
"image/heif": ".jpg",
|
| 24 |
+
"image/heic-sequence": ".jpg",
|
| 25 |
+
}
|
| 26 |
+
MEDIA_TYPE_BY_EXTENSION: Final[dict[str, str]] = {
|
| 27 |
+
".jpg": "image/jpeg",
|
| 28 |
+
".jpeg": "image/jpeg",
|
| 29 |
+
".png": "image/png",
|
| 30 |
+
}
|
| 31 |
+
DEFAULT_MEDIA_TYPE: Final[str] = "image/jpeg"
|
| 32 |
+
|
| 33 |
+
# Annotation constants
|
| 34 |
+
REFERENCE_DIAGONAL: Final[float] = 4600.0
|
| 35 |
+
BASE_LINE_WIDTH: Final[int] = 5
|
| 36 |
+
MIN_LINE_WIDTH: Final[int] = 1
|
| 37 |
+
MINOR_COLOR: Final[Color] = (0, 255, 0) # Green for minors
|
| 38 |
+
ADULT_COLOR: Final[Color] = (255, 0, 0) # Blue for adults
|
| 39 |
+
AGE_THRESHOLD: Final[float] = 18.0
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class InferenceError(RuntimeError):
|
| 43 |
+
"""Raised when an unrecoverable inference issue occurs."""
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@dataclass(slots=True, frozen=True)
|
| 47 |
+
class InferenceOutput:
|
| 48 |
+
"""Immutable output from face detection and age estimation."""
|
| 49 |
+
|
| 50 |
+
ages: tuple[float, ...]
|
| 51 |
+
annotated_image: np.ndarray
|
| 52 |
+
|
| 53 |
+
def __post_init__(self) -> None:
|
| 54 |
+
"""Validate invariants."""
|
| 55 |
+
if any(age < 0 for age in self.ages if not age != age): # NaN allowed
|
| 56 |
+
raise ValueError("Age estimates must be non-negative (NaN is allowed)")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
__all__ = [
|
| 60 |
+
"ImageShape",
|
| 61 |
+
"BoundingBox",
|
| 62 |
+
"Color",
|
| 63 |
+
"SUPPORTED_IMAGE_MEDIA_TYPES",
|
| 64 |
+
"MEDIA_TYPE_BY_EXTENSION",
|
| 65 |
+
"DEFAULT_MEDIA_TYPE",
|
| 66 |
+
"REFERENCE_DIAGONAL",
|
| 67 |
+
"BASE_LINE_WIDTH",
|
| 68 |
+
"MIN_LINE_WIDTH",
|
| 69 |
+
"MINOR_COLOR",
|
| 70 |
+
"ADULT_COLOR",
|
| 71 |
+
"AGE_THRESHOLD",
|
| 72 |
+
"InferenceError",
|
| 73 |
+
"InferenceOutput",
|
| 74 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv export --package huggingface-space --no-dev --no-hashes --no-emit-workspace
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
# via gradio
|
| 5 |
+
annotated-doc==0.0.4
|
| 6 |
+
# via fastapi
|
| 7 |
+
annotated-types==0.7.0
|
| 8 |
+
# via pydantic
|
| 9 |
+
anyio==4.12.1
|
| 10 |
+
# via
|
| 11 |
+
# gradio
|
| 12 |
+
# httpx
|
| 13 |
+
# starlette
|
| 14 |
+
# watchfiles
|
| 15 |
+
audioop-lts==0.2.2 ; python_full_version >= '3.13'
|
| 16 |
+
# via gradio
|
| 17 |
+
brotli==1.2.0
|
| 18 |
+
# via gradio
|
| 19 |
+
certifi==2026.1.4
|
| 20 |
+
# via
|
| 21 |
+
# httpcore
|
| 22 |
+
# httpx
|
| 23 |
+
# requests
|
| 24 |
+
# sentry-sdk
|
| 25 |
+
charset-normalizer==3.4.4
|
| 26 |
+
# via requests
|
| 27 |
+
click==8.3.1
|
| 28 |
+
# via
|
| 29 |
+
# rich-toolkit
|
| 30 |
+
# typer
|
| 31 |
+
# uvicorn
|
| 32 |
+
colorama==0.4.6 ; sys_platform == 'win32'
|
| 33 |
+
# via
|
| 34 |
+
# click
|
| 35 |
+
# tqdm
|
| 36 |
+
# uvicorn
|
| 37 |
+
contourpy==1.3.3
|
| 38 |
+
# via matplotlib
|
| 39 |
+
cuda-bindings==12.9.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 40 |
+
# via torch
|
| 41 |
+
cuda-pathfinder==1.3.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 42 |
+
# via cuda-bindings
|
| 43 |
+
cycler==0.12.1
|
| 44 |
+
# via matplotlib
|
| 45 |
+
dnspython==2.8.0
|
| 46 |
+
# via email-validator
|
| 47 |
+
email-validator==2.3.0
|
| 48 |
+
# via
|
| 49 |
+
# fastapi
|
| 50 |
+
# pydantic
|
| 51 |
+
fastapi==0.128.3
|
| 52 |
+
# via gradio
|
| 53 |
+
fastapi-cli==0.0.20
|
| 54 |
+
# via fastapi
|
| 55 |
+
fastapi-cloud-cli==0.11.0
|
| 56 |
+
# via fastapi-cli
|
| 57 |
+
fastar==0.8.0
|
| 58 |
+
# via fastapi-cloud-cli
|
| 59 |
+
ffmpy==1.0.0
|
| 60 |
+
# via gradio
|
| 61 |
+
filelock==3.20.3
|
| 62 |
+
# via
|
| 63 |
+
# huggingface-hub
|
| 64 |
+
# torch
|
| 65 |
+
# transformers
|
| 66 |
+
fonttools==4.61.1
|
| 67 |
+
# via matplotlib
|
| 68 |
+
fsspec==2026.2.0
|
| 69 |
+
# via
|
| 70 |
+
# gradio-client
|
| 71 |
+
# huggingface-hub
|
| 72 |
+
# torch
|
| 73 |
+
gradio==6.5.1
|
| 74 |
+
# via
|
| 75 |
+
# huggingface-space
|
| 76 |
+
# spaces
|
| 77 |
+
gradio-client==2.0.3
|
| 78 |
+
# via gradio
|
| 79 |
+
groovy==0.1.2
|
| 80 |
+
# via gradio
|
| 81 |
+
h11==0.16.0
|
| 82 |
+
# via
|
| 83 |
+
# httpcore
|
| 84 |
+
# uvicorn
|
| 85 |
+
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
| 86 |
+
# via huggingface-hub
|
| 87 |
+
httpcore==1.0.9
|
| 88 |
+
# via httpx
|
| 89 |
+
httptools==0.7.1
|
| 90 |
+
# via uvicorn
|
| 91 |
+
httpx==0.28.1
|
| 92 |
+
# via
|
| 93 |
+
# fastapi
|
| 94 |
+
# fastapi-cloud-cli
|
| 95 |
+
# gradio
|
| 96 |
+
# gradio-client
|
| 97 |
+
# safehttpx
|
| 98 |
+
# spaces
|
| 99 |
+
hub-sdk==0.0.24
|
| 100 |
+
# via ultralytics
|
| 101 |
+
huggingface-hub==0.36.2
|
| 102 |
+
# via
|
| 103 |
+
# gradio
|
| 104 |
+
# gradio-client
|
| 105 |
+
# mivolo
|
| 106 |
+
# timm
|
| 107 |
+
# tokenizers
|
| 108 |
+
# transformers
|
| 109 |
+
idna==3.11
|
| 110 |
+
# via
|
| 111 |
+
# anyio
|
| 112 |
+
# email-validator
|
| 113 |
+
# httpx
|
| 114 |
+
# requests
|
| 115 |
+
importlib-metadata==8.7.1
|
| 116 |
+
# via opentelemetry-api
|
| 117 |
+
jinja2==3.1.6
|
| 118 |
+
# via
|
| 119 |
+
# fastapi
|
| 120 |
+
# gradio
|
| 121 |
+
# torch
|
| 122 |
+
kiwisolver==1.4.9
|
| 123 |
+
# via matplotlib
|
| 124 |
+
lapx==0.9.4
|
| 125 |
+
# via mivolo
|
| 126 |
+
markdown-it-py==4.0.0
|
| 127 |
+
# via rich
|
| 128 |
+
markupsafe==3.0.3
|
| 129 |
+
# via
|
| 130 |
+
# gradio
|
| 131 |
+
# jinja2
|
| 132 |
+
matplotlib==3.10.8
|
| 133 |
+
# via
|
| 134 |
+
# seaborn
|
| 135 |
+
# ultralytics
|
| 136 |
+
mdurl==0.1.2
|
| 137 |
+
# via markdown-it-py
|
| 138 |
+
mivolo @ git+https://github.com/WildChlamydia/MiVOLO.git@b185dce6ed5061aae3a95b0a9513d7725718ac71
|
| 139 |
+
# via face-age-inference
|
| 140 |
+
mpmath==1.3.0
|
| 141 |
+
# via sympy
|
| 142 |
+
networkx==3.6.1
|
| 143 |
+
# via torch
|
| 144 |
+
numpy==2.4.2
|
| 145 |
+
# via
|
| 146 |
+
# contourpy
|
| 147 |
+
# gradio
|
| 148 |
+
# lapx
|
| 149 |
+
# matplotlib
|
| 150 |
+
# opencv-python
|
| 151 |
+
# opencv-python-headless
|
| 152 |
+
# pandas
|
| 153 |
+
# scipy
|
| 154 |
+
# seaborn
|
| 155 |
+
# torchvision
|
| 156 |
+
# transformers
|
| 157 |
+
# ultralytics
|
| 158 |
+
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 159 |
+
# via
|
| 160 |
+
# nvidia-cudnn-cu12
|
| 161 |
+
# nvidia-cusolver-cu12
|
| 162 |
+
# torch
|
| 163 |
+
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 164 |
+
# via torch
|
| 165 |
+
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 166 |
+
# via torch
|
| 167 |
+
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 168 |
+
# via torch
|
| 169 |
+
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 170 |
+
# via torch
|
| 171 |
+
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 172 |
+
# via torch
|
| 173 |
+
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 174 |
+
# via torch
|
| 175 |
+
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 176 |
+
# via torch
|
| 177 |
+
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 178 |
+
# via torch
|
| 179 |
+
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 180 |
+
# via
|
| 181 |
+
# nvidia-cusolver-cu12
|
| 182 |
+
# torch
|
| 183 |
+
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 184 |
+
# via torch
|
| 185 |
+
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 186 |
+
# via torch
|
| 187 |
+
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 188 |
+
# via
|
| 189 |
+
# nvidia-cufft-cu12
|
| 190 |
+
# nvidia-cusolver-cu12
|
| 191 |
+
# nvidia-cusparse-cu12
|
| 192 |
+
# torch
|
| 193 |
+
nvidia-nvshmem-cu12==3.4.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 194 |
+
# via torch
|
| 195 |
+
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 196 |
+
# via torch
|
| 197 |
+
opencv-python==4.13.0.92
|
| 198 |
+
# via ultralytics
|
| 199 |
+
opencv-python-headless==4.13.0.92
|
| 200 |
+
# via face-age-inference
|
| 201 |
+
opentelemetry-api==1.39.1
|
| 202 |
+
# via face-age-inference
|
| 203 |
+
orjson==3.11.7
|
| 204 |
+
# via gradio
|
| 205 |
+
packaging==26.0
|
| 206 |
+
# via
|
| 207 |
+
# gradio
|
| 208 |
+
# gradio-client
|
| 209 |
+
# huggingface-hub
|
| 210 |
+
# matplotlib
|
| 211 |
+
# spaces
|
| 212 |
+
# transformers
|
| 213 |
+
pandas==3.0.0
|
| 214 |
+
# via
|
| 215 |
+
# gradio
|
| 216 |
+
# seaborn
|
| 217 |
+
# ultralytics
|
| 218 |
+
pillow==12.1.0
|
| 219 |
+
# via
|
| 220 |
+
# face-age-inference
|
| 221 |
+
# gradio
|
| 222 |
+
# matplotlib
|
| 223 |
+
# pillow-heif
|
| 224 |
+
# torchvision
|
| 225 |
+
# ultralytics
|
| 226 |
+
pillow-heif==1.2.0
|
| 227 |
+
# via face-age-inference
|
| 228 |
+
psutil==5.9.8
|
| 229 |
+
# via
|
| 230 |
+
# spaces
|
| 231 |
+
# ultralytics
|
| 232 |
+
py-cpuinfo==9.0.0
|
| 233 |
+
# via ultralytics
|
| 234 |
+
pydantic==2.12.5
|
| 235 |
+
# via
|
| 236 |
+
# fastapi
|
| 237 |
+
# fastapi-cloud-cli
|
| 238 |
+
# gradio
|
| 239 |
+
# pydantic-extra-types
|
| 240 |
+
# pydantic-settings
|
| 241 |
+
# spaces
|
| 242 |
+
pydantic-core==2.41.5
|
| 243 |
+
# via pydantic
|
| 244 |
+
pydantic-extra-types==2.11.0
|
| 245 |
+
# via fastapi
|
| 246 |
+
pydantic-settings==2.12.0
|
| 247 |
+
# via
|
| 248 |
+
# face-age-inference
|
| 249 |
+
# fastapi
|
| 250 |
+
pydub==0.25.1
|
| 251 |
+
# via gradio
|
| 252 |
+
pygments==2.19.2
|
| 253 |
+
# via rich
|
| 254 |
+
pyparsing==3.3.2
|
| 255 |
+
# via matplotlib
|
| 256 |
+
python-dateutil==2.9.0.post0
|
| 257 |
+
# via
|
| 258 |
+
# matplotlib
|
| 259 |
+
# pandas
|
| 260 |
+
python-dotenv==1.2.1
|
| 261 |
+
# via
|
| 262 |
+
# pydantic-settings
|
| 263 |
+
# uvicorn
|
| 264 |
+
python-multipart==0.0.22
|
| 265 |
+
# via
|
| 266 |
+
# fastapi
|
| 267 |
+
# gradio
|
| 268 |
+
pytz==2025.2
|
| 269 |
+
# via gradio
|
| 270 |
+
pyyaml==6.0.3
|
| 271 |
+
# via
|
| 272 |
+
# gradio
|
| 273 |
+
# huggingface-hub
|
| 274 |
+
# timm
|
| 275 |
+
# transformers
|
| 276 |
+
# ultralytics
|
| 277 |
+
# uvicorn
|
| 278 |
+
regex==2026.1.15
|
| 279 |
+
# via transformers
|
| 280 |
+
requests==2.32.5
|
| 281 |
+
# via
|
| 282 |
+
# hub-sdk
|
| 283 |
+
# huggingface-hub
|
| 284 |
+
# spaces
|
| 285 |
+
# transformers
|
| 286 |
+
# ultralytics
|
| 287 |
+
rich==14.3.2
|
| 288 |
+
# via
|
| 289 |
+
# rich-toolkit
|
| 290 |
+
# typer
|
| 291 |
+
rich-toolkit==0.18.1
|
| 292 |
+
# via
|
| 293 |
+
# fastapi-cli
|
| 294 |
+
# fastapi-cloud-cli
|
| 295 |
+
rignore==0.7.6
|
| 296 |
+
# via fastapi-cloud-cli
|
| 297 |
+
safehttpx==0.1.7
|
| 298 |
+
# via gradio
|
| 299 |
+
safetensors==0.7.0
|
| 300 |
+
# via
|
| 301 |
+
# timm
|
| 302 |
+
# transformers
|
| 303 |
+
scipy==1.17.0
|
| 304 |
+
# via ultralytics
|
| 305 |
+
seaborn==0.13.2
|
| 306 |
+
# via ultralytics
|
| 307 |
+
semantic-version==2.10.0
|
| 308 |
+
# via gradio
|
| 309 |
+
sentry-sdk==2.52.0
|
| 310 |
+
# via fastapi-cloud-cli
|
| 311 |
+
setuptools==81.0.0
|
| 312 |
+
# via torch
|
| 313 |
+
shellingham==1.5.4
|
| 314 |
+
# via typer
|
| 315 |
+
six==1.17.0
|
| 316 |
+
# via python-dateutil
|
| 317 |
+
spaces==0.47.0
|
| 318 |
+
# via huggingface-space
|
| 319 |
+
starlette==0.52.1
|
| 320 |
+
# via
|
| 321 |
+
# fastapi
|
| 322 |
+
# gradio
|
| 323 |
+
sympy==1.14.0
|
| 324 |
+
# via torch
|
| 325 |
+
thop==0.1.1.post2209072238
|
| 326 |
+
# via ultralytics
|
| 327 |
+
timm==0.8.13.dev0
|
| 328 |
+
# via mivolo
|
| 329 |
+
tokenizers==0.22.2
|
| 330 |
+
# via transformers
|
| 331 |
+
tomlkit==0.13.3
|
| 332 |
+
# via gradio
|
| 333 |
+
torch==2.10.0
|
| 334 |
+
# via
|
| 335 |
+
# face-age-inference
|
| 336 |
+
# thop
|
| 337 |
+
# timm
|
| 338 |
+
# torchvision
|
| 339 |
+
# ultralytics
|
| 340 |
+
torchvision==0.25.0
|
| 341 |
+
# via
|
| 342 |
+
# timm
|
| 343 |
+
# ultralytics
|
| 344 |
+
tqdm==4.67.3
|
| 345 |
+
# via
|
| 346 |
+
# huggingface-hub
|
| 347 |
+
# transformers
|
| 348 |
+
# ultralytics
|
| 349 |
+
transformers==4.57.6
|
| 350 |
+
# via face-age-inference
|
| 351 |
+
triton==3.6.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 352 |
+
# via torch
|
| 353 |
+
typer==0.21.1
|
| 354 |
+
# via
|
| 355 |
+
# fastapi-cli
|
| 356 |
+
# fastapi-cloud-cli
|
| 357 |
+
# gradio
|
| 358 |
+
typing-extensions==4.15.0
|
| 359 |
+
# via
|
| 360 |
+
# anyio
|
| 361 |
+
# fastapi
|
| 362 |
+
# gradio
|
| 363 |
+
# gradio-client
|
| 364 |
+
# huggingface-hub
|
| 365 |
+
# opentelemetry-api
|
| 366 |
+
# pydantic
|
| 367 |
+
# pydantic-core
|
| 368 |
+
# pydantic-extra-types
|
| 369 |
+
# rich-toolkit
|
| 370 |
+
# spaces
|
| 371 |
+
# starlette
|
| 372 |
+
# torch
|
| 373 |
+
# typer
|
| 374 |
+
# typing-inspection
|
| 375 |
+
typing-inspection==0.4.2
|
| 376 |
+
# via
|
| 377 |
+
# fastapi
|
| 378 |
+
# pydantic
|
| 379 |
+
# pydantic-settings
|
| 380 |
+
tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
|
| 381 |
+
# via pandas
|
| 382 |
+
ultralytics==8.1.0
|
| 383 |
+
# via mivolo
|
| 384 |
+
urllib3==2.6.3
|
| 385 |
+
# via
|
| 386 |
+
# requests
|
| 387 |
+
# sentry-sdk
|
| 388 |
+
uvicorn==0.40.0
|
| 389 |
+
# via
|
| 390 |
+
# fastapi
|
| 391 |
+
# fastapi-cli
|
| 392 |
+
# fastapi-cloud-cli
|
| 393 |
+
# gradio
|
| 394 |
+
uvloop==0.22.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
|
| 395 |
+
# via uvicorn
|
| 396 |
+
watchfiles==1.1.1
|
| 397 |
+
# via uvicorn
|
| 398 |
+
websockets==16.0
|
| 399 |
+
# via uvicorn
|
| 400 |
+
zipp==3.23.0
|
| 401 |
+
# via importlib-metadata
|