Spaces:
Configuration error
Configuration error
File size: 4,085 Bytes
08f1adc 2461f82 08f1adc 2461f82 08f1adc 2461f82 08f1adc 2461f82 08f1adc 2461f82 08f1adc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | """Backend runtime settings.
These settings drive the FastAPI process itself: where to find the trained
artifacts, what to advertise as the model version, whether to warm up at
boot. They are intentionally separate from ``captioning.config.AppConfig``,
which owns the *ML* configuration (architecture, decode strategy, CORS
origins). Keeping the two layers split lets ops change deployment paths
without touching research configs, and vice versa.
Override any field via environment variable, prefixed with ``BACKEND_``::
BACKEND_CONFIG_PATH=configs/base.yaml
BACKEND_WEIGHTS_PATH=models/v1.0.0/model.h5
BACKEND_TOKENIZER_DIR=models/v1.0.0
BACKEND_MODEL_VERSION=v1.0.0
BACKEND_WARMUP=true
BACKEND_WEIGHTS_HUB_REPO=your-username/captioning-weights
BACKEND_WEIGHTS_HUB_REVISION=v1.0.0
"""
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class BackendSettings(BaseSettings):
"""Settings for the FastAPI inference service."""
config_path: Path = Field(
default=Path("configs/base.yaml"),
description="Path to the YAML AppConfig consumed by the ML package.",
)
weights_path: Path = Field(
default=Path("models/v1.0.0/model.h5"),
description="Path to the trained Keras weights file (used when weights_hub_repo is unset).",
)
tokenizer_dir: Path = Field(
default=Path("models/v1.0.0"),
description=(
"Directory containing vocab.pkl / vocab.json artifacts "
"(used when weights_hub_repo is unset)."
),
)
model_version: str = Field(
default="v1.0.0",
description="Semantic version surfaced in /healthz and caption responses.",
)
api_version: str = Field(
default="0.1.0",
description="FastAPI app version (shown in OpenAPI docs).",
)
warmup: bool = Field(
default=True,
description="Run one dummy inference at startup so the first request is fast.",
)
request_id_header: str = Field(
default="x-request-id",
description="HTTP header used for request correlation IDs.",
)
# ---- HuggingFace Hub weights pull (WS-A4) -------------------------------
# When ``weights_hub_repo`` is set, ``lifespan`` calls
# ``huggingface_hub.snapshot_download`` and resolves ``weights_path`` and
# ``tokenizer_dir`` to paths inside the downloaded snapshot. This lets the
# Docker image stay small and lets us rotate weights without rebuilding.
weights_hub_repo: str | None = Field(
default=None,
description="HuggingFace Hub repo id (e.g. 'user/captioning-weights'). None = use local paths.",
)
weights_hub_revision: str = Field(
default="main",
description="Git ref/tag/commit to pin (recommended: pin a tag like 'v1.0.0').",
)
weights_hub_filename: str = Field(
default="model.h5",
description="Filename of the weights file inside the Hub snapshot.",
)
weights_cache_dir: Path | None = Field(
default=None,
description="Local cache dir for snapshot_download. None = HF Hub default ($HF_HOME).",
)
model_config = SettingsConfigDict(
env_prefix="BACKEND_",
case_sensitive=False,
extra="ignore",
)
@field_validator("config_path", "weights_path", "tokenizer_dir")
@classmethod
def _expand_user(cls, value: Path) -> Path:
return value.expanduser()
@field_validator("weights_cache_dir")
@classmethod
def _expand_optional_user(cls, value: Path | None) -> Path | None:
return value.expanduser() if value is not None else None
@lru_cache(maxsize=1)
def get_backend_settings() -> BackendSettings:
"""Return a process-wide ``BackendSettings`` instance.
Cached so env-var parsing happens once. Tests that need to override env
can call ``get_backend_settings.cache_clear()`` between cases.
"""
return BackendSettings()
|