Spaces:

oddrationale
/

tangobot

Build error

App Files Files

github-actions[bot] commited on 11 days ago

Commit

a2b95a9

0 Parent(s):

Deploy from 128291a769737147011181c09a08b5186e167d8e

Browse files

Files changed (8) hide show

README.md +77 -0
app.py +99 -0
face_age_inference/__init__.py +52 -0
face_age_inference/config.py +155 -0
face_age_inference/engine.py +429 -0
face_age_inference/image.py +186 -0
face_age_inference/types.py +74 -0
requirements.txt +401 -0

README.md ADDED Viewed

	@@ -0,0 +1,77 @@

+---
+title: TangoBot
+emoji: 📸
+colorFrom: red
+colorTo: yellow
+sdk: gradio
+python_version: "3.12"
+app_file: app.py
+pinned: false
+---
+# TangoBot
+Gradio web UI for face detection and age estimation, designed for HuggingFace Spaces deployment. This demo showcases TangoBot's photo analysis capabilities for [Tango Charities](https://www.tangocharities.org/).
+## About Tango Charities
+Tango Charities is a volunteer-led nonprofit fighting hunger through Feed The City events. Since 2015, they've mobilized 110,000+ volunteers to provide 8+ million meals across 73 cities. TangoBot helps the team count attendees in event photos and identify youth participation.
+## Features
+- Interactive web interface for image upload
+- Real-time face detection and age estimation
+- Webcam and clipboard support
+- Embeds inference directly (no external HTTP calls)
+## Running Locally
+```bash
+cd apps/huggingface-space
+uv run gradio app.py
+```
+Opens at http://localhost:7860
+## Usage
+1. Upload an image (or use webcam/clipboard)
+2. View the annotated image with face bounding boxes
+3. See detection results:
+   - Number of faces detected
+   - Count of people estimated under 18
+## Color Coding
+Bounding boxes are color-coded:
+- **Green**: Under 18 (minors)
+- **Blue**: 18 and older (adults)
+## Deploying to HuggingFace Spaces
+1. Create a new Space on HuggingFace (Gradio SDK)
+2. Copy the package source and app files:
+   ```
+   packages/face-age-inference/src/face_age_inference/ -> face_age_inference/
+   apps/huggingface-space/app.py -> app.py
+   apps/huggingface-space/requirements.txt -> requirements.txt
+   ```
+3. Push to the Space repository
+Models are automatically downloaded from HuggingFace Hub on first run (~450MB).
+## Requirements
+See `requirements.txt` for dependencies. Key packages:
+- `gradio>=5.0.0` - Web UI framework
+- `torch>=2.9.1` - PyTorch
+- `transformers>=4.51.0` - HuggingFace Transformers
+- `ultralytics>=8.3.230` - YOLO implementation
+## Configuration
+The app uses default settings from `face-age-inference`. To customize, modify environment variables:
+```bash
+FACE_AGE_DEVICE=cuda:0 uv run gradio app.py
+```

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""Gradio app for face detection and age estimation.
+This app embeds the face-age-inference engine directly for deployment
+on HuggingFace Spaces (which cannot make external HTTP calls).
+"""
+import cv2
+import gradio as gr
+import numpy as np
+import spaces
+from face_age_inference import (
+    FaceAgeInferenceEngine,
+    InferenceError,
+)
+# Initialize the inference engine once at startup
+engine: FaceAgeInferenceEngine | None = None
+def get_engine() -> FaceAgeInferenceEngine:
+    """Get or create the inference engine singleton."""
+    global engine
+    if engine is None:
+        engine = FaceAgeInferenceEngine()
+    return engine
+@spaces.GPU
+def predict(image: np.ndarray | None) -> tuple[np.ndarray | None, str]:
+    """Run face detection and age estimation on an image.
+    Args:
+        image: Input image as RGB numpy array from Gradio.
+    Returns:
+        Tuple of (annotated image, results text).
+    """
+    if image is None:
+        return None, "Please upload an image."
+    try:
+        # Convert RGB to BGR for OpenCV processing
+        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        # Run inference
+        engine = get_engine()
+        result = engine.predict(image_bgr)
+        face_count = len(result.ages)
+        # Convert annotated image back to RGB for Gradio
+        annotated_rgb = cv2.cvtColor(result.annotated_image, cv2.COLOR_BGR2RGB)
+        # Format results text
+        if face_count == 0:
+            results_text = "No faces detected."
+        else:
+            # Count minors (under 18, excluding unknown ages)
+            minors_count = sum(
+                1 for age in result.ages if not np.isnan(age) and age < 18
+            )
+            face_word = "face" if face_count == 1 else "faces"
+            minor_word = "person" if minors_count == 1 else "people"
+            results_text = (
+                f"Detected {face_count} {face_word}.\n"
+                f"Estimated {minors_count} {minor_word} under 18."
+            )
+        return annotated_rgb, results_text
+    except InferenceError as e:
+        return None, f"Error: {e}"
+    except Exception as e:
+        return None, f"Unexpected error: {e}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(
+        label="Upload Image",
+        type="numpy",
+        sources=["upload", "webcam", "clipboard"],
+    ),
+    outputs=[
+        gr.Image(label="Annotated Image", type="numpy"),
+        gr.Textbox(label="Results", lines=2),
+    ],
+    title="Face Detection & Age Estimation",
+    description=(
+        "Upload an image to detect faces and estimate ages. "
+        "Faces are highlighted with bounding boxes: "
+        "**green** for minors (under 18), **blue** for adults."
+    ),
+    examples=[],
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0")

face_age_inference/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""Face detection and age estimation inference package.
+This package provides a reusable inference engine for detecting faces
+and estimating ages using YOLO Face-Person Detector and MiVOLO v2 models.
+Models are automatically downloaded from HuggingFace Hub on first use.
+Example usage:
+    from face_age_inference import FaceAgeInferenceEngine, decode_image
+    engine = FaceAgeInferenceEngine()
+    image_bgr = decode_image(image_bytes)
+    result = engine.predict(image_bgr)
+    print(f"Detected {len(result.ages)} faces with ages: {result.ages}")
+"""
+from .config import Settings, settings
+from .engine import FaceAgeInferenceEngine, get_inference_engine
+from .image import (
+    choose_bbox_color,
+    compute_scaled_line_width,
+    decode_image,
+    draw_face_annotations,
+    encode_image_to_base64,
+    normalize_file_extension,
+)
+from .types import (
+    DEFAULT_MEDIA_TYPE,
+    SUPPORTED_IMAGE_MEDIA_TYPES,
+    InferenceError,
+    InferenceOutput,
+)
+__all__ = [
+    # Config
+    "Settings",
+    "settings",
+    # Engine
+    "FaceAgeInferenceEngine",
+    "get_inference_engine",
+    # Image utilities
+    "decode_image",
+    "encode_image_to_base64",
+    "compute_scaled_line_width",
+    "normalize_file_extension",
+    "choose_bbox_color",
+    "draw_face_annotations",
+    # Types
+    "InferenceError",
+    "InferenceOutput",
+    "SUPPORTED_IMAGE_MEDIA_TYPES",
+    "DEFAULT_MEDIA_TYPE",
+]

face_age_inference/config.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""Application configuration using Pydantic Settings.
+Configuration is loaded from environment variables with the FACE_AGE_ prefix.
+Falls back to sensible defaults for local development.
+Environment variables:
+    FACE_AGE_DEVICE: PyTorch device (default: cpu)
+    FACE_AGE_DETECTOR_MODEL_ID: HuggingFace model ID for face/person detector
+    FACE_AGE_MIVOLO_MODEL_ID: HuggingFace model ID for MiVOLO v2 age estimator
+    FACE_AGE_CONFIDENCE_THRESHOLD: Detection confidence threshold
+    FACE_AGE_IOU_THRESHOLD: Detection IoU threshold
+    FACE_AGE_ANNOTATION_FORMAT: Output image format
+    FACE_AGE_MIVOLO_BATCH_SIZE: Max batch size for MiVOLO forward passes
+"""
+import torch
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+# Type aliases
+type DeviceSpec = str
+type Probability = float
+# Default HuggingFace model IDs
+_DEFAULT_DETECTOR_MODEL_ID: str = "iitolstykh/YOLO-Face-Person-Detector"
+_DEFAULT_MIVOLO_MODEL_ID: str = "iitolstykh/mivolo_v2"
+def _detect_best_device() -> str:
+    """Auto-detect the best available device for inference.
+    Returns:
+        Device string: 'cuda' if available, 'mps' on Apple Silicon, else 'cpu'.
+    """
+    if torch.cuda.is_available():
+        return "cuda"
+    if torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+# Threshold bounds
+MIN_PROBABILITY: Probability = 0.0
+MAX_PROBABILITY: Probability = 1.0
+class Settings(BaseSettings):
+    """Runtime configuration for face-age-inference.
+    Attributes:
+        device: PyTorch device specification (e.g., 'cpu', 'cuda:0', 'mps').
+        detector_model_id: HuggingFace model ID for YOLO face/person detector.
+        mivolo_model_id: HuggingFace model ID for MiVOLO v2 age estimator.
+        confidence_threshold: Minimum confidence for face detection (0-1).
+        iou_threshold: IoU threshold for non-maximum suppression (0-1).
+        annotation_format: Image format for annotated outputs ('.jpg' or '.png').
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="FACE_AGE_",
+    )
+    device: DeviceSpec = Field(
+        default_factory=_detect_best_device,
+        description="PyTorch device identifier for inference (e.g., 'cpu', 'cuda:0', 'mps')",
+    )
+    detector_model_id: str = Field(
+        default=_DEFAULT_DETECTOR_MODEL_ID,
+        description="HuggingFace model ID for YOLO face/person detector",
+    )
+    mivolo_model_id: str = Field(
+        default=_DEFAULT_MIVOLO_MODEL_ID,
+        description="HuggingFace model ID for MiVOLO v2 age estimator",
+    )
+    confidence_threshold: Probability = Field(
+        default=0.15,
+        ge=MIN_PROBABILITY,
+        le=MAX_PROBABILITY,
+        description="Minimum confidence score for face detection (0.0 to 1.0)",
+    )
+    iou_threshold: Probability = Field(
+        default=0.4,
+        ge=MIN_PROBABILITY,
+        le=MAX_PROBABILITY,
+        description="IoU threshold for non-maximum suppression (0.0 to 1.0)",
+    )
+    annotation_format: str = Field(
+        default=".jpg",
+        description="Image format for annotated outputs ('.jpg' or '.png')",
+    )
+    mivolo_batch_size: int = Field(
+        default=8,
+        ge=1,
+        description="Max batch size for MiVOLO forward passes (reduce if you hit OOM)",
+    )
+    @field_validator("annotation_format")
+    @classmethod
+    def validate_annotation_format(cls, value: str) -> str:
+        """Ensure annotation format is supported.
+        Args:
+            value: The format string to validate.
+        Returns:
+            Validated format string.
+        Raises:
+            ValueError: If format is not supported.
+        """
+        normalized = value.lower()
+        if normalized not in {".jpg", ".jpeg", ".png"}:
+            raise ValueError(
+                f"Unsupported annotation format: {value}. "
+                "Must be one of: .jpg, .jpeg, .png"
+            )
+        # Normalize .jpeg to .jpg for consistency
+        return ".jpg" if normalized == ".jpeg" else normalized
+    @field_validator("device")
+    @classmethod
+    def validate_device(cls, value: str) -> str:
+        """Validate device specification format.
+        Args:
+            value: Device specification string.
+        Returns:
+            Validated device string.
+        Raises:
+            ValueError: If device format is invalid.
+        """
+        valid_prefixes = ("cpu", "cuda", "mps")
+        if not any(value.startswith(prefix) for prefix in valid_prefixes):
+            raise ValueError(
+                f"Invalid device specification: {value}. "
+                f"Must start with one of: {', '.join(valid_prefixes)}"
+            )
+        return value
+# Global settings instance
+settings = Settings()
+__all__ = [
+    "Settings",
+    "settings",
+]

face_age_inference/engine.py ADDED Viewed

	@@ -0,0 +1,429 @@

+"""Face detection and age estimation inference engine.
+This module implements the FaceAgeInferenceEngine class that coordinates
+face detection and age estimation using YOLO Face-Person Detector and MiVOLO v2.
+"""
+import time
+from contextlib import contextmanager
+from functools import lru_cache
+import numpy as np
+import torch
+from opentelemetry import metrics, trace
+from transformers import (
+    AutoConfig,
+    AutoImageProcessor,
+    AutoModel,
+    AutoModelForImageClassification,
+)
+from .config import Settings, settings
+from .image import compute_scaled_line_width, draw_face_annotations
+from .types import BoundingBox, InferenceError, InferenceOutput
+# Type alias for detection results
+type FaceDetections = list[BoundingBox]
+type PersonDetections = list[BoundingBox]
+# Get tracer for this module
+tracer = trace.get_tracer(__name__)
+# Get meter and create metrics instruments
+# Uses no-op provider when running standalone, real provider when ml-api sets one
+meter = metrics.get_meter(__name__)
+_inference_duration = meter.create_histogram(
+    "inference.duration_ms",
+    unit="ms",
+    description="Total inference time in milliseconds",
+)
+_yolo_duration = meter.create_histogram(
+    "inference.yolo_duration_ms",
+    unit="ms",
+    description="YOLO face detection time in milliseconds",
+)
+_mivolo_duration = meter.create_histogram(
+    "inference.mivolo_duration_ms",
+    unit="ms",
+    description="MiVOLO age estimation time in milliseconds",
+)
+_faces_detected = meter.create_counter(
+    "inference.faces_detected",
+    description="Total number of faces detected",
+)
+_inference_errors = meter.create_counter(
+    "inference.errors",
+    description="Number of inference errors",
+)
+@contextmanager
+def _telemetry_span(name: str, histogram=None):
+    """Start a span and optionally record elapsed time to a histogram."""
+    start = time.perf_counter()
+    with tracer.start_as_current_span(name) as span:
+        yield span
+    if histogram is not None:
+        elapsed_ms = (time.perf_counter() - start) * 1000
+        histogram.record(elapsed_ms)
+def _compute_iou(box1: BoundingBox, box2: BoundingBox) -> float:
+    """Compute intersection over union between two bounding boxes.
+    Args:
+        box1: First bounding box (x1, y1, x2, y2).
+        box2: Second bounding box (x1, y1, x2, y2).
+    Returns:
+        IoU value between 0 and 1.
+    """
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    if x2 <= x1 or y2 <= y1:
+        return 0.0
+    intersection = (x2 - x1) * (y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union = area1 + area2 - intersection
+    return intersection / union if union > 0 else 0.0
+def _face_inside_person(face: BoundingBox, person: BoundingBox) -> bool:
+    """Check if a face bounding box is inside a person bounding box.
+    Args:
+        face: Face bounding box (x1, y1, x2, y2).
+        person: Person bounding box (x1, y1, x2, y2).
+    Returns:
+        True if face center is inside person box.
+    """
+    face_cx = (face[0] + face[2]) / 2
+    face_cy = (face[1] + face[3]) / 2
+    return person[0] <= face_cx <= person[2] and person[1] <= face_cy <= person[3]
+class FaceAgeInferenceEngine:
+    """Inference engine coordinating detection and age estimation.
+    Uses YOLO Face-Person Detector for detection and MiVOLO v2 for age estimation.
+    Models are automatically downloaded from HuggingFace Hub on first use.
+    """
+    def __init__(self, service_settings: Settings | None = None) -> None:
+        """Initialize inference models.
+        Effectful: downloads models from HuggingFace Hub if not cached.
+        Args:
+            service_settings: Configuration object (uses global if None).
+        Raises:
+            InferenceError: If models cannot be loaded.
+        """
+        self.settings = service_settings or settings
+        # Determine torch dtype and device
+        self.device = torch.device(self.settings.device)
+        self.dtype = torch.float16 if "cuda" in self.settings.device else torch.float32
+        try:
+            # Load YOLO Face-Person Detector from HuggingFace Hub
+            self.detector = AutoModel.from_pretrained(
+                self.settings.detector_model_id,
+                trust_remote_code=True,
+                dtype=self.dtype,
+            ).to(self.device)
+            # Load MiVOLO v2 config, model, and image processor
+            self.mivolo_config = AutoConfig.from_pretrained(
+                self.settings.mivolo_model_id,
+                trust_remote_code=True,
+            )
+            self.mivolo = AutoModelForImageClassification.from_pretrained(
+                self.settings.mivolo_model_id,
+                trust_remote_code=True,
+                dtype=self.dtype,
+            ).to(self.device)
+            self.image_processor = AutoImageProcessor.from_pretrained(
+                self.settings.mivolo_model_id,
+                trust_remote_code=True,
+            )
+        except Exception as exc:
+            raise InferenceError(
+                f"Failed to load models from HuggingFace Hub: {exc}"
+            ) from exc
+    def _extract_detections(self, results) -> tuple[FaceDetections, PersonDetections]:
+        """Extract face and person bounding boxes from YOLO results.
+        Args:
+            results: YOLO detection results.
+        Returns:
+            Tuple of (face_boxes, person_boxes) where each box is (x1, y1, x2, y2).
+        """
+        faces: FaceDetections = []
+        persons: PersonDetections = []
+        for box in results.boxes:
+            cls_id = int(box.cls.item())
+            cls_name = results.names[cls_id].lower()
+            coords = box.xyxy[0].cpu().numpy()
+            bbox: BoundingBox = (
+                int(coords[0]),
+                int(coords[1]),
+                int(coords[2]),
+                int(coords[3]),
+            )
+            if cls_name == "face":
+                faces.append(bbox)
+            elif cls_name == "person":
+                persons.append(bbox)
+        return faces, persons
+    def _match_faces_to_persons(
+        self,
+        faces: FaceDetections,
+        persons: PersonDetections,
+    ) -> list[tuple[BoundingBox, BoundingBox | None]]:
+        """Match each face to its corresponding person bounding box.
+        Args:
+            faces: List of face bounding boxes.
+            persons: List of person bounding boxes.
+        Returns:
+            List of (face, person) pairs. Person may be None if no match found.
+        """
+        matched: list[tuple[BoundingBox, BoundingBox | None]] = []
+        for face in faces:
+            best_person: BoundingBox | None = None
+            best_overlap = 0.0
+            for person in persons:
+                if _face_inside_person(face, person):
+                    overlap = _compute_iou(face, person)
+                    if overlap > best_overlap or best_person is None:
+                        best_person = person
+                        best_overlap = overlap
+            matched.append((face, best_person))
+        return matched
+    def _crop_regions(
+        self,
+        image_bgr: np.ndarray,
+        matched_pairs: list[tuple[BoundingBox, BoundingBox | None]],
+    ) -> tuple[list[np.ndarray], list[np.ndarray | None]]:
+        """Crop face and body regions from image.
+        Args:
+            image_bgr: Input image in BGR format.
+            matched_pairs: List of (face, person) bounding box pairs.
+        Returns:
+            Tuple of (face_crops, body_crops). Body crop may be None if no person matched.
+        """
+        face_crops: list[np.ndarray] = []
+        body_crops: list[np.ndarray | None] = []
+        h, w = image_bgr.shape[:2]
+        for face, person in matched_pairs:
+            # Crop face (clamp to image bounds)
+            x1, y1, x2, y2 = face
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(w, x2), min(h, y2)
+            face_crop = image_bgr[y1:y2, x1:x2]
+            face_crops.append(face_crop)
+            # Crop body if available
+            if person is not None:
+                px1, py1, px2, py2 = person
+                px1, py1 = max(0, px1), max(0, py1)
+                px2, py2 = min(w, px2), min(h, py2)
+                body_crop = image_bgr[py1:py2, px1:px2]
+                body_crops.append(body_crop)
+            else:
+                body_crops.append(None)
+        return face_crops, body_crops
+    def _run_mivolo(
+        self,
+        face_crops: list[np.ndarray],
+        body_crops: list[np.ndarray | None],
+    ) -> list[float]:
+        """Run MiVOLO v2 age estimation on cropped regions.
+        Uses chunked batching to avoid OOM on group photos with many faces.
+        Args:
+            face_crops: List of face crop images (BGR).
+            body_crops: List of body crop images (BGR), may contain None.
+        Returns:
+            List of estimated ages.
+        """
+        if not face_crops:
+            return []
+        batch_size = max(1, int(self.settings.mivolo_batch_size))
+        def _run_batch(
+            batch_faces: list[np.ndarray],
+            batch_bodies: list[np.ndarray | None],
+        ) -> list[float]:
+            faces_input = self.image_processor(images=batch_faces)["pixel_values"]
+            faces_input = faces_input.to(dtype=self.dtype, device=self.device)
+            valid_body_indices: list[int] = []
+            valid_body_images: list[np.ndarray] = []
+            for i, body_crop in enumerate(batch_bodies):
+                if body_crop is not None:
+                    valid_body_indices.append(i)
+                    valid_body_images.append(body_crop)
+            body_input = torch.zeros_like(faces_input)
+            if valid_body_images:
+                valid_body_input = self.image_processor(images=valid_body_images)[
+                    "pixel_values"
+                ]
+                valid_body_input = valid_body_input.to(dtype=self.dtype, device=self.device)
+                for tensor_idx, batch_idx in enumerate(valid_body_indices):
+                    body_input[batch_idx] = valid_body_input[tensor_idx]
+            with torch.no_grad():
+                output = self.mivolo(faces_input=faces_input, body_input=body_input)
+            return output.age_output.cpu().flatten().tolist()
+        ages: list[float] = []
+        for start in range(0, len(face_crops), batch_size):
+            ages.extend(
+                _run_batch(
+                    face_crops[start : start + batch_size],
+                    body_crops[start : start + batch_size],
+                )
+            )
+        return ages
+    def _run_yolo_detection(
+        self,
+        image_bgr: np.ndarray,
+    ) -> tuple[FaceDetections, PersonDetections]:
+        """Run YOLO face/person detection with telemetry."""
+        with _telemetry_span("inference.yolo_detection", _yolo_duration) as det_span:
+            results = self.detector(
+                image_bgr,
+                conf=self.settings.confidence_threshold,
+                iou=self.settings.iou_threshold,
+            )[0]
+            faces, persons = self._extract_detections(results)
+            det_span.set_attribute("faces_detected", len(faces))
+            det_span.set_attribute("persons_detected", len(persons))
+        _faces_detected.add(len(faces))
+        return faces, persons
+    def _run_mivolo_with_metrics(
+        self,
+        face_crops: list[np.ndarray],
+        body_crops: list[np.ndarray | None],
+    ) -> list[float]:
+        """Run MiVOLO v2 age estimation with telemetry."""
+        with _telemetry_span("inference.mivolo_age", _mivolo_duration) as age_span:
+            ages = self._run_mivolo(face_crops, body_crops)
+            age_span.set_attribute("ages_estimated", len(ages))
+        return ages
+    def predict(self, image_bgr: np.ndarray) -> InferenceOutput:
+        """Run face detection and age estimation on an image.
+        Effectful: calls ML models, renders annotations.
+        Args:
+            image_bgr: Input image in BGR format.
+        Returns:
+            Inference results with annotated image.
+        Raises:
+            InferenceError: If inference or annotation fails.
+        """
+        if image_bgr.size == 0:
+            raise InferenceError("Decoded image is empty.")
+        with _telemetry_span("inference.predict", _inference_duration) as span:
+            span.set_attribute("image.height", image_bgr.shape[0])
+            span.set_attribute("image.width", image_bgr.shape[1])
+            try:
+                # 1. Run face+person detection
+                faces, persons = self._run_yolo_detection(image_bgr)
+                # 2. Match faces to persons
+                matched_pairs = self._match_faces_to_persons(faces, persons)
+                # 3. Crop face and body regions
+                face_crops, body_crops = self._crop_regions(image_bgr, matched_pairs)
+                # 4. Run MiVOLO v2 on crops
+                ages = self._run_mivolo_with_metrics(face_crops, body_crops)
+                # 5. Compute annotation parameters
+                line_width = compute_scaled_line_width(image_bgr.shape)
+                # 6. Draw annotations (face boxes only with age labels)
+                annotated = draw_face_annotations(image_bgr, faces, ages, line_width)
+                span.set_attribute("total_faces", len(faces))
+            except InferenceError:
+                _inference_errors.add(1, {"error_type": "inference_error"})
+                raise
+            except Exception as exc:
+                _inference_errors.add(1, {"error_type": "unknown_error"})
+                span.record_exception(exc)
+                raise InferenceError(
+                    "Unable to run inference on the provided image."
+                ) from exc
+            return InferenceOutput(
+                ages=tuple(ages),
+                annotated_image=annotated,
+            )
+@lru_cache(maxsize=1)
+def get_inference_engine() -> FaceAgeInferenceEngine:
+    """Get or create singleton inference engine.
+    Cached to avoid reloading heavy ML models.
+    Returns:
+        Initialized inference engine.
+    """
+    return FaceAgeInferenceEngine()
+__all__ = [
+    "FaceAgeInferenceEngine",
+    "get_inference_engine",
+]

face_age_inference/image.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""Image processing utilities for face-age-inference.
+This module handles image decoding, encoding, and annotation operations.
+"""
+import base64
+import math
+from io import BytesIO
+import cv2
+import numpy as np
+from PIL import Image
+from pillow_heif import register_heif_opener
+from .types import (
+    ADULT_COLOR,
+    AGE_THRESHOLD,
+    BASE_LINE_WIDTH,
+    DEFAULT_MEDIA_TYPE,
+    MEDIA_TYPE_BY_EXTENSION,
+    MIN_LINE_WIDTH,
+    MINOR_COLOR,
+    REFERENCE_DIAGONAL,
+    BoundingBox,
+    Color,
+    ImageShape,
+    InferenceError,
+)
+register_heif_opener()
+# ============================================================================
+# PURE FUNCTIONS: No side effects
+# ============================================================================
+def compute_scaled_line_width(
+    img_shape: ImageShape,
+    *,
+    reference_diagonal: float = REFERENCE_DIAGONAL,
+    base_line_width: int = BASE_LINE_WIDTH,
+    min_width: int = MIN_LINE_WIDTH,
+) -> int:
+    """Calculate line width scaled to image dimensions.
+    Pure function: deterministic output based only on inputs.
+    Args:
+        img_shape: Image dimensions (height, width, ...).
+        reference_diagonal: Reference diagonal for scaling.
+        base_line_width: Base width before scaling.
+        min_width: Minimum allowed width.
+    Returns:
+        Scaled line width, at least min_width.
+    """
+    height, width = img_shape[:2]
+    diagonal = math.hypot(width, height)
+    scale_factor = diagonal / reference_diagonal
+    return max(min_width, int(base_line_width * scale_factor))
+def normalize_file_extension(extension: str) -> str:
+    """Ensure extension starts with a dot.
+    Pure function: no side effects, deterministic.
+    """
+    return extension if extension.startswith(".") else f".{extension}"
+def choose_bbox_color(age: float | None, *, threshold: float = AGE_THRESHOLD) -> Color:
+    """Determine bounding box color based on age.
+    Pure function: deterministic color selection.
+    Args:
+        age: Estimated age (None if unavailable).
+        threshold: Age threshold for color change.
+    Returns:
+        Color tuple (BGR format).
+    """
+    if age is not None and age < threshold:
+        return MINOR_COLOR
+    return ADULT_COLOR
+# ============================================================================
+# IMAGE PROCESSING: I/O operations
+# ============================================================================
+def decode_image(data: bytes) -> np.ndarray:
+    """Decode image bytes to BGR numpy array.
+    Effectful: performs I/O (reads bytes, opens image).
+    Args:
+        data: Raw image bytes.
+    Returns:
+        BGR numpy array.
+    Raises:
+        InferenceError: If image cannot be decoded.
+    """
+    if not data:
+        raise InferenceError("Uploaded file is empty.")
+    try:
+        with Image.open(BytesIO(data)) as pil_image:
+            rgb = pil_image.convert("RGB")
+    except Exception as exc:
+        raise InferenceError("Unable to read the provided image.") from exc
+    return cv2.cvtColor(np.array(rgb), cv2.COLOR_RGB2BGR)
+def encode_image_to_base64(image: np.ndarray, file_extension: str) -> tuple[str, str]:
+    """Encode BGR image to base64 string.
+    Effectful: performs encoding operation.
+    Args:
+        image: BGR numpy array.
+        file_extension: Desired output format.
+    Returns:
+        Tuple of (media_type, base64_string).
+    Raises:
+        InferenceError: If encoding fails.
+    """
+    ext = normalize_file_extension(file_extension)
+    media_type = MEDIA_TYPE_BY_EXTENSION.get(ext.lower(), DEFAULT_MEDIA_TYPE)
+    encode_ext = ".jpg" if media_type == "image/jpeg" else ".png"
+    success, buffer = cv2.imencode(encode_ext, image)
+    if not success:
+        raise InferenceError("Unable to encode annotated image.")
+    return media_type, base64.b64encode(buffer).decode("utf-8")
+def draw_face_annotations(
+    image: np.ndarray,
+    faces: list[BoundingBox],
+    ages: list[float],
+    line_width: int,
+) -> np.ndarray:
+    """Draw bounding boxes and age labels on image.
+    Effectful: modifies image array (returns copy to maintain immutability at API level).
+    Args:
+        image: Source BGR image.
+        faces: List of face bounding boxes (x1, y1, x2, y2).
+        ages: List of estimated ages corresponding to each face.
+        line_width: Thickness of bounding boxes.
+    Returns:
+        Annotated image copy.
+    Raises:
+        InferenceError: If annotation fails.
+    """
+    try:
+        annotated = image.copy()
+        for face, age in zip(faces, ages, strict=True):
+            x1, y1, x2, y2 = face
+            color = choose_bbox_color(age)
+            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, line_width)
+        return annotated
+    except Exception as exc:
+        raise InferenceError("Unable to draw annotated detections.") from exc
+__all__ = [
+    "compute_scaled_line_width",
+    "normalize_file_extension",
+    "choose_bbox_color",
+    "decode_image",
+    "encode_image_to_base64",
+    "draw_face_annotations",
+]

face_age_inference/types.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""Type definitions for face detection and age estimation.
+This module defines the core data types and constants used throughout
+the face-age-inference package.
+"""
+from dataclasses import dataclass
+from typing import Final
+import numpy as np
+# Type aliases using modern Python 3.12 style
+type ImageShape = tuple[int, ...]
+type BoundingBox = tuple[int, int, int, int]
+type Color = tuple[int, int, int]
+# Constants for supported media types
+SUPPORTED_IMAGE_MEDIA_TYPES: Final[dict[str, str]] = {
+    "image/jpeg": ".jpg",
+    "image/png": ".png",
+    "image/jpg": ".jpg",
+    "image/heic": ".jpg",
+    "image/heif": ".jpg",
+    "image/heic-sequence": ".jpg",
+}
+MEDIA_TYPE_BY_EXTENSION: Final[dict[str, str]] = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+}
+DEFAULT_MEDIA_TYPE: Final[str] = "image/jpeg"
+# Annotation constants
+REFERENCE_DIAGONAL: Final[float] = 4600.0
+BASE_LINE_WIDTH: Final[int] = 5
+MIN_LINE_WIDTH: Final[int] = 1
+MINOR_COLOR: Final[Color] = (0, 255, 0)  # Green for minors
+ADULT_COLOR: Final[Color] = (255, 0, 0)  # Blue for adults
+AGE_THRESHOLD: Final[float] = 18.0
+class InferenceError(RuntimeError):
+    """Raised when an unrecoverable inference issue occurs."""
+@dataclass(slots=True, frozen=True)
+class InferenceOutput:
+    """Immutable output from face detection and age estimation."""
+    ages: tuple[float, ...]
+    annotated_image: np.ndarray
+    def __post_init__(self) -> None:
+        """Validate invariants."""
+        if any(age < 0 for age in self.ages if not age != age):  # NaN allowed
+            raise ValueError("Age estimates must be non-negative (NaN is allowed)")
+__all__ = [
+    "ImageShape",
+    "BoundingBox",
+    "Color",
+    "SUPPORTED_IMAGE_MEDIA_TYPES",
+    "MEDIA_TYPE_BY_EXTENSION",
+    "DEFAULT_MEDIA_TYPE",
+    "REFERENCE_DIAGONAL",
+    "BASE_LINE_WIDTH",
+    "MIN_LINE_WIDTH",
+    "MINOR_COLOR",
+    "ADULT_COLOR",
+    "AGE_THRESHOLD",
+    "InferenceError",
+    "InferenceOutput",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,401 @@

+# This file was autogenerated by uv via the following command:
+#    uv export --package huggingface-space --no-dev --no-hashes --no-emit-workspace
+aiofiles==24.1.0
+    # via gradio
+annotated-doc==0.0.4
+    # via fastapi
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.12.1
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+    #   watchfiles
+audioop-lts==0.2.2 ; python_full_version >= '3.13'
+    # via gradio
+brotli==1.2.0
+    # via gradio
+certifi==2026.1.4
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+    #   sentry-sdk
+charset-normalizer==3.4.4
+    # via requests
+click==8.3.1
+    # via
+    #   rich-toolkit
+    #   typer
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+    #   uvicorn
+contourpy==1.3.3
+    # via matplotlib
+cuda-bindings==12.9.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+cuda-pathfinder==1.3.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via cuda-bindings
+cycler==0.12.1
+    # via matplotlib
+dnspython==2.8.0
+    # via email-validator
+email-validator==2.3.0
+    # via
+    #   fastapi
+    #   pydantic
+fastapi==0.128.3
+    # via gradio
+fastapi-cli==0.0.20
+    # via fastapi
+fastapi-cloud-cli==0.11.0
+    # via fastapi-cli
+fastar==0.8.0
+    # via fastapi-cloud-cli
+ffmpy==1.0.0
+    # via gradio
+filelock==3.20.3
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+fonttools==4.61.1
+    # via matplotlib
+fsspec==2026.2.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==6.5.1
+    # via
+    #   huggingface-space
+    #   spaces
+gradio-client==2.0.3
+    # via gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httptools==0.7.1
+    # via uvicorn
+httpx==0.28.1
+    # via
+    #   fastapi
+    #   fastapi-cloud-cli
+    #   gradio
+    #   gradio-client
+    #   safehttpx
+    #   spaces
+hub-sdk==0.0.24
+    # via ultralytics
+huggingface-hub==0.36.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   mivolo
+    #   timm
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   email-validator
+    #   httpx
+    #   requests
+importlib-metadata==8.7.1
+    # via opentelemetry-api
+jinja2==3.1.6
+    # via
+    #   fastapi
+    #   gradio
+    #   torch
+kiwisolver==1.4.9
+    # via matplotlib
+lapx==0.9.4
+    # via mivolo
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+matplotlib==3.10.8
+    # via
+    #   seaborn
+    #   ultralytics
+mdurl==0.1.2
+    # via markdown-it-py
+mivolo @ git+https://github.com/WildChlamydia/MiVOLO.git@b185dce6ed5061aae3a95b0a9513d7725718ac71
+    # via face-age-inference
+mpmath==1.3.0
+    # via sympy
+networkx==3.6.1
+    # via torch
+numpy==2.4.2
+    # via
+    #   contourpy
+    #   gradio
+    #   lapx
+    #   matplotlib
+    #   opencv-python
+    #   opencv-python-headless
+    #   pandas
+    #   scipy
+    #   seaborn
+    #   torchvision
+    #   transformers
+    #   ultralytics
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.4.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+opencv-python==4.13.0.92
+    # via ultralytics
+opencv-python-headless==4.13.0.92
+    # via face-age-inference
+opentelemetry-api==1.39.1
+    # via face-age-inference
+orjson==3.11.7
+    # via gradio
+packaging==26.0
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   matplotlib
+    #   spaces
+    #   transformers
+pandas==3.0.0
+    # via
+    #   gradio
+    #   seaborn
+    #   ultralytics
+pillow==12.1.0
+    # via
+    #   face-age-inference
+    #   gradio
+    #   matplotlib
+    #   pillow-heif
+    #   torchvision
+    #   ultralytics
+pillow-heif==1.2.0
+    # via face-age-inference
+psutil==5.9.8
+    # via
+    #   spaces
+    #   ultralytics
+py-cpuinfo==9.0.0
+    # via ultralytics
+pydantic==2.12.5
+    # via
+    #   fastapi
+    #   fastapi-cloud-cli
+    #   gradio
+    #   pydantic-extra-types
+    #   pydantic-settings
+    #   spaces
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-extra-types==2.11.0
+    # via fastapi
+pydantic-settings==2.12.0
+    # via
+    #   face-age-inference
+    #   fastapi
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via rich
+pyparsing==3.3.2
+    # via matplotlib
+python-dateutil==2.9.0.post0
+    # via
+    #   matplotlib
+    #   pandas
+python-dotenv==1.2.1
+    # via
+    #   pydantic-settings
+    #   uvicorn
+python-multipart==0.0.22
+    # via
+    #   fastapi
+    #   gradio
+pytz==2025.2
+    # via gradio
+pyyaml==6.0.3
+    # via
+    #   gradio
+    #   huggingface-hub
+    #   timm
+    #   transformers
+    #   ultralytics
+    #   uvicorn
+regex==2026.1.15
+    # via transformers
+requests==2.32.5
+    # via
+    #   hub-sdk
+    #   huggingface-hub
+    #   spaces
+    #   transformers
+    #   ultralytics
+rich==14.3.2
+    # via
+    #   rich-toolkit
+    #   typer
+rich-toolkit==0.18.1
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+rignore==0.7.6
+    # via fastapi-cloud-cli
+safehttpx==0.1.7
+    # via gradio
+safetensors==0.7.0
+    # via
+    #   timm
+    #   transformers
+scipy==1.17.0
+    # via ultralytics
+seaborn==0.13.2
+    # via ultralytics
+semantic-version==2.10.0
+    # via gradio
+sentry-sdk==2.52.0
+    # via fastapi-cloud-cli
+setuptools==81.0.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+spaces==0.47.0
+    # via huggingface-space
+starlette==0.52.1
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.14.0
+    # via torch
+thop==0.1.1.post2209072238
+    # via ultralytics
+timm==0.8.13.dev0
+    # via mivolo
+tokenizers==0.22.2
+    # via transformers
+tomlkit==0.13.3
+    # via gradio
+torch==2.10.0
+    # via
+    #   face-age-inference
+    #   thop
+    #   timm
+    #   torchvision
+    #   ultralytics
+torchvision==0.25.0
+    # via
+    #   timm
+    #   ultralytics
+tqdm==4.67.3
+    # via
+    #   huggingface-hub
+    #   transformers
+    #   ultralytics
+transformers==4.57.6
+    # via face-age-inference
+triton==3.6.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.21.1
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+    #   gradio
+typing-extensions==4.15.0
+    # via
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   opentelemetry-api
+    #   pydantic
+    #   pydantic-core
+    #   pydantic-extra-types
+    #   rich-toolkit
+    #   spaces
+    #   starlette
+    #   torch
+    #   typer
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
+    # via pandas
+ultralytics==8.1.0
+    # via mivolo
+urllib3==2.6.3
+    # via
+    #   requests
+    #   sentry-sdk
+uvicorn==0.40.0
+    # via
+    #   fastapi
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+    #   gradio
+uvloop==0.22.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
+    # via uvicorn
+watchfiles==1.1.1
+    # via uvicorn
+websockets==16.0
+    # via uvicorn
+zipp==3.23.0
+    # via importlib-metadata