Spaces:

apoorvrajdev
/

image-captioning-api

Configuration error

App Files Files Community

apoorvrajdev commited on 26 days ago

Commit

08f1adc

1 Parent(s): 2ab9a5b

feat(api): build production-grade FastAPI inference backend

Browse files

Files changed (17) hide show

backend/app/__init__.py +1 -0
backend/app/api/__init__.py +5 -0
backend/app/api/routes.py +119 -0
backend/app/core/__init__.py +18 -0
backend/app/core/config.py +79 -0
backend/app/core/logging.py +87 -0
backend/app/main.py +105 -0
backend/app/schemas/__init__.py +5 -0
backend/app/schemas/caption.py +65 -0
backend/app/services/__init__.py +5 -0
backend/app/services/predictor_service.py +82 -0
backend/app/utils/__init__.py +5 -0
backend/app/utils/image.py +60 -0
models/v1.0.0/vocab.json +54 -0
scripts/bootstrap_dev_artifacts.py +154 -0
src/captioning/inference/predictor.py +23 -1
src/captioning/preprocessing/tokenizer.py +12 -2

backend/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """FastAPI inference backend for the captioning ML package."""

backend/app/api/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""HTTP routes for the captioning service."""
+from app.api.routes import router
+__all__ = ["router"]

backend/app/api/routes.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""HTTP routes: ``/healthz`` and ``/v1/captions``.
+Routes are intentionally thin: validate inputs, delegate to the
+``PredictorService``, shape the response. No model code, no TF imports.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
+from app.core.config import BackendSettings, get_backend_settings
+from app.core.logging import current_request_id
+from app.schemas.caption import CaptionResponse, ErrorResponse, HealthResponse
+from app.services.predictor_service import PredictorService
+from app.utils.image import ALLOWED_CONTENT_TYPES, ImageDecodeError
+from captioning.utils import get_logger
+log = get_logger(__name__)
+router = APIRouter()
+def get_predictor_service(request: Request) -> PredictorService:
+    """Resolve the singleton ``PredictorService`` from app state.
+    Returns 503 instead of crashing if the lifespan hasn't finished loading
+    weights yet (which can happen if ``/v1/captions`` is hit during a
+    rolling restart).
+    """
+    service: PredictorService | None = getattr(request.app.state, "predictor_service", None)
+    if service is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Predictor is not ready yet.",
+        )
+    return service
+@router.get(
+    "/healthz",
+    response_model=HealthResponse,
+    tags=["health"],
+    summary="Liveness + readiness probe",
+)
+async def healthz(
+    request: Request,
+    settings: BackendSettings = Depends(get_backend_settings),
+) -> HealthResponse:
+    """Return readiness state. Always 200 — readiness is conveyed by ``model_loaded``."""
+    service: PredictorService | None = getattr(request.app.state, "predictor_service", None)
+    return HealthResponse(
+        status="ok" if service is not None else "loading",
+        model_loaded=service is not None,
+        model_version=service.model_version if service is not None else settings.model_version,
+        api_version=settings.api_version,
+        timestamp=datetime.now(timezone.utc),
+    )
+@router.post(
+    "/v1/captions",
+    response_model=CaptionResponse,
+    tags=["captions"],
+    status_code=status.HTTP_200_OK,
+    summary="Generate a caption for an uploaded image",
+    responses={
+        400: {"model": ErrorResponse, "description": "Empty upload."},
+        413: {"model": ErrorResponse, "description": "Image exceeds size limit."},
+        415: {"model": ErrorResponse, "description": "Unsupported image content type."},
+        422: {"model": ErrorResponse, "description": "Image bytes could not be decoded."},
+        503: {"model": ErrorResponse, "description": "Predictor not ready."},
+    },
+)
+async def caption_image(
+    image: UploadFile = File(
+        ...,
+        description="Image file to caption. Allowed: JPEG, PNG, WebP, BMP.",
+    ),
+    service: PredictorService = Depends(get_predictor_service),
+) -> CaptionResponse:
+    """Accept a multipart image upload and return a generated caption."""
+    if image.content_type not in ALLOWED_CONTENT_TYPES:
+        raise HTTPException(
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+            detail=(
+                f"Unsupported content type: {image.content_type!r}. "
+                f"Allowed: {sorted(ALLOWED_CONTENT_TYPES)}."
+            ),
+        )
+    payload = await image.read()
+    if not payload:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Empty file upload.",
+        )
+    if len(payload) > service.max_upload_bytes:
+        raise HTTPException(
+            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+            detail=(f"Image is {len(payload)} bytes; limit is {service.max_upload_bytes}."),
+        )
+    try:
+        caption, latency_ms = await service.caption_image_bytes(payload)
+    except ImageDecodeError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=str(exc),
+        ) from exc
+    return CaptionResponse(
+        caption=caption,
+        model_version=service.model_version,
+        decode_strategy=service.decode_strategy,
+        latency_ms=round(latency_ms, 2),
+        request_id=current_request_id(),
+    )

backend/app/core/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Core: backend settings and HTTP-layer logging glue."""
+from app.core.config import BackendSettings, get_backend_settings
+from app.core.logging import (
+    REQUEST_ID_HEADER,
+    RequestContextMiddleware,
+    configure_app_logging,
+    current_request_id,
+)
+__all__ = [
+    "REQUEST_ID_HEADER",
+    "BackendSettings",
+    "RequestContextMiddleware",
+    "configure_app_logging",
+    "current_request_id",
+    "get_backend_settings",
+]

backend/app/core/config.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Backend runtime settings.
+These settings drive the FastAPI process itself: where to find the trained
+artifacts, what to advertise as the model version, whether to warm up at
+boot. They are intentionally separate from ``captioning.config.AppConfig``,
+which owns the *ML* configuration (architecture, decode strategy, CORS
+origins). Keeping the two layers split lets ops change deployment paths
+without touching research configs, and vice versa.
+Override any field via environment variable, prefixed with ``BACKEND_``::
+    BACKEND_CONFIG_PATH=configs/base.yaml
+    BACKEND_WEIGHTS_PATH=models/v1.0.0/model.h5
+    BACKEND_TOKENIZER_DIR=models/v1.0.0
+    BACKEND_MODEL_VERSION=v1.0.0
+    BACKEND_WARMUP=true
+"""
+from __future__ import annotations
+from functools import lru_cache
+from pathlib import Path
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class BackendSettings(BaseSettings):
+    """Settings for the FastAPI inference service."""
+    config_path: Path = Field(
+        default=Path("configs/base.yaml"),
+        description="Path to the YAML AppConfig consumed by the ML package.",
+    )
+    weights_path: Path = Field(
+        default=Path("models/v1.0.0/model.h5"),
+        description="Path to the trained Keras weights file.",
+    )
+    tokenizer_dir: Path = Field(
+        default=Path("models/v1.0.0"),
+        description="Directory containing vocab.pkl / vocab.json artifacts.",
+    )
+    model_version: str = Field(
+        default="v1.0.0",
+        description="Semantic version surfaced in /healthz and caption responses.",
+    )
+    api_version: str = Field(
+        default="0.1.0",
+        description="FastAPI app version (shown in OpenAPI docs).",
+    )
+    warmup: bool = Field(
+        default=True,
+        description="Run one dummy inference at startup so the first request is fast.",
+    )
+    request_id_header: str = Field(
+        default="x-request-id",
+        description="HTTP header used for request correlation IDs.",
+    )
+    model_config = SettingsConfigDict(
+        env_prefix="BACKEND_",
+        case_sensitive=False,
+        extra="ignore",
+    )
+    @field_validator("config_path", "weights_path", "tokenizer_dir")
+    @classmethod
+    def _expand_user(cls, value: Path) -> Path:
+        return value.expanduser()
+@lru_cache(maxsize=1)
+def get_backend_settings() -> BackendSettings:
+    """Return a process-wide ``BackendSettings`` instance.
+    Cached so env-var parsing happens once. Tests that need to override env
+    can call ``get_backend_settings.cache_clear()`` between cases.
+    """
+    return BackendSettings()

backend/app/core/logging.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""HTTP-layer logging glue.
+The ML package already configures structlog (``captioning.utils.logging``).
+The FastAPI process has two extra needs on top of that:
+1. **Request correlation** — every log line emitted while handling a
+   request should carry the same ``request_id`` so logs can be grouped.
+   We bind it once via ``structlog.contextvars`` so any ``log.info(...)``
+   downstream automatically inherits it without threading the id through
+   function signatures.
+2. **Access logs as structured events** — uvicorn's default access log is
+   a plain string. Re-emitting one structured ``request_finished`` event
+   per request keeps the log stream homogeneous and indexable.
+"""
+from __future__ import annotations
+import time
+import uuid
+from collections.abc import Awaitable, Callable
+import structlog
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+from captioning.utils.logging import configure_logging, get_logger
+log = get_logger(__name__)
+REQUEST_ID_HEADER = "x-request-id"
+def configure_app_logging() -> None:
+    """Initialise structlog for the FastAPI process.
+    Idempotent — delegates to the ML package's ``configure_logging`` so dev
+    gets pretty colourised output and ``APP_ENV=production`` flips to JSON.
+    """
+    configure_logging()
+class RequestContextMiddleware(BaseHTTPMiddleware):
+    """Bind a request id to structlog and log start/finish events.
+    The id comes from the inbound ``x-request-id`` header when present
+    (so an upstream gateway can stitch traces), or a fresh ``uuid4`` hex
+    otherwise. Either way it's echoed back on the response.
+    """
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        request_id = request.headers.get(REQUEST_ID_HEADER) or uuid.uuid4().hex
+        structlog.contextvars.clear_contextvars()
+        structlog.contextvars.bind_contextvars(
+            request_id=request_id,
+            method=request.method,
+            path=request.url.path,
+        )
+        start = time.perf_counter()
+        log.info("request_started")
+        try:
+            response = await call_next(request)
+        except Exception:
+            duration_ms = (time.perf_counter() - start) * 1000
+            log.exception("request_failed", duration_ms=round(duration_ms, 2))
+            raise
+        duration_ms = (time.perf_counter() - start) * 1000
+        log.info(
+            "request_finished",
+            status=response.status_code,
+            duration_ms=round(duration_ms, 2),
+        )
+        response.headers[REQUEST_ID_HEADER] = request_id
+        return response
+def current_request_id() -> str:
+    """Return the request id bound to the current contextvars, or ``""``."""
+    return str(structlog.contextvars.get_contextvars().get("request_id", ""))

backend/app/main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""FastAPI application entrypoint.
+Run locally with::
+    uvicorn --app-dir backend app.main:app --host 0.0.0.0 --port 8000 --reload
+Lifespan order:
+    1. Load YAML ``AppConfig`` (research-side hyperparameters).
+    2. Load weights + tokenizer into a ``CaptionPredictor`` singleton.
+    3. Optionally warmup so the first request doesn't pay TF's lazy build cost.
+    4. Wrap the predictor in a ``PredictorService`` and stash on app state.
+The singleton lives on ``app.state.predictor_service``; routes pull it
+through a ``Depends`` so tests can override the dependency cleanly.
+"""
+from __future__ import annotations
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.api.routes import router
+from app.core.config import BackendSettings, get_backend_settings
+from app.core.logging import RequestContextMiddleware, configure_app_logging
+from app.services.predictor_service import PredictorService
+from captioning.config import load_config
+from captioning.config.schema import AppConfig
+from captioning.inference import CaptionPredictor
+from captioning.utils import get_logger
+log = get_logger(__name__)
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncIterator[None]:
+    """Load the predictor at startup, release it at shutdown."""
+    settings: BackendSettings = app.state.backend_settings
+    config: AppConfig = app.state.app_config
+    log.info(
+        "predictor_loading",
+        weights=str(settings.weights_path),
+        tokenizer_dir=str(settings.tokenizer_dir),
+        model_version=settings.model_version,
+    )
+    predictor = CaptionPredictor.from_artifacts(
+        weights_path=settings.weights_path,
+        tokenizer_dir=settings.tokenizer_dir,
+        config=config,
+    )
+    if settings.warmup:
+        predictor.warmup()
+    app.state.predictor_service = PredictorService(
+        predictor=predictor,
+        model_version=settings.model_version,
+        max_upload_bytes=config.serve.max_upload_bytes,
+    )
+    log.info("predictor_ready", model_version=settings.model_version)
+    try:
+        yield
+    finally:
+        app.state.predictor_service = None
+        log.info("predictor_unloaded")
+def create_app() -> FastAPI:
+    """Build the FastAPI app. Factory form so tests can construct fresh apps."""
+    configure_app_logging()
+    settings = get_backend_settings()
+    config = load_config(settings.config_path)
+    app = FastAPI(
+        title="Image Captioning API",
+        version=settings.api_version,
+        description=(
+            "Production-grade inference service for the IEEE-published "
+            "CNN+Transformer image captioning model."
+        ),
+        lifespan=lifespan,
+    )
+    app.state.backend_settings = settings
+    app.state.app_config = config
+    app.state.predictor_service = None
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=config.serve.cors_allowed_origins,
+        allow_methods=["GET", "POST", "OPTIONS"],
+        allow_headers=["*"],
+        allow_credentials=False,
+    )
+    app.add_middleware(RequestContextMiddleware)
+    app.include_router(router)
+    return app
+app = create_app()

backend/app/schemas/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Pydantic request/response schemas."""
+from app.schemas.caption import CaptionResponse, ErrorResponse, HealthResponse
+__all__ = ["CaptionResponse", "ErrorResponse", "HealthResponse"]

backend/app/schemas/caption.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""Pydantic request/response models for the captioning API.
+Schemas live separately from routes so the OpenAPI spec is stable even
+when handler logic changes. Every field is annotated with an example so
+``/docs`` is self-explanatory to anyone reviewing the portfolio.
+"""
+from __future__ import annotations
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, Field
+class HealthResponse(BaseModel):
+    """Liveness + readiness payload for ``GET /healthz``."""
+    status: str = Field(..., description="``ok`` once the predictor is loaded.")
+    model_loaded: bool = Field(..., description="True after weights + tokenizer are in memory.")
+    model_version: str = Field(..., description="Semantic version of the served model.")
+    api_version: str = Field(..., description="Backend release version.")
+    timestamp: datetime = Field(..., description="Server time the response was built (UTC).")
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra={
+            "example": {
+                "status": "ok",
+                "model_loaded": True,
+                "model_version": "v1.0.0",
+                "api_version": "0.1.0",
+                "timestamp": "2026-05-09T12:00:00Z",
+            }
+        },
+    )
+class CaptionResponse(BaseModel):
+    """Successful response from ``POST /v1/captions``."""
+    caption: str = Field(..., description="Generated caption text (without start/end tokens).")
+    model_version: str = Field(..., description="Model version that produced this caption.")
+    decode_strategy: str = Field(..., description="Decoding strategy used (e.g. ``greedy``).")
+    latency_ms: float = Field(..., description="Inference time in milliseconds.")
+    request_id: str = Field(..., description="Correlation id; matches the ``x-request-id`` header.")
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra={
+            "example": {
+                "caption": "a man riding a surfboard on a wave",
+                "model_version": "v1.0.0",
+                "decode_strategy": "greedy",
+                "latency_ms": 187.42,
+                "request_id": "8f1c2e3b4d5a4f8e9b0c1d2e3f4a5b6c",
+            }
+        },
+    )
+class ErrorResponse(BaseModel):
+    """Uniform error envelope returned by every non-2xx status."""
+    detail: str = Field(..., description="Human-readable error message.")
+    request_id: str = Field(default="", description="Correlation id for log lookup.")

backend/app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Service layer wrapping the ML predictor."""
+from app.services.predictor_service import PredictorService
+__all__ = ["PredictorService"]

backend/app/services/predictor_service.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Service layer wrapping the ML ``CaptionPredictor``.
+Why this exists between the route and the predictor:
+    * **Off-loop execution** — TensorFlow inference is sync and CPU-bound.
+      Running it inline blocks the event loop, so requests queue up
+      sequentially and event-loop-bound work (CORS, metrics, /healthz)
+      stalls. We push the call to a worker thread via ``anyio.to_thread``.
+    * **Stable seam for testing** — routes depend on this class, not on
+      the concrete predictor. Tests can substitute a stub service that
+      returns canned captions without loading TensorFlow.
+    * **Future extension point** — Phase 4 will add a request batcher and
+      per-model registry behind the same ``caption_image_bytes`` API.
+This class never re-implements inference; it delegates entirely to the
+existing ``CaptionPredictor`` abstraction.
+"""
+from __future__ import annotations
+import time
+from anyio import to_thread
+from app.utils.image import bytes_to_tensor
+from captioning.inference import CaptionPredictor
+from captioning.utils import get_logger
+log = get_logger(__name__)
+class PredictorService:
+    """Holds the singleton predictor and exposes async inference."""
+    def __init__(
+        self,
+        *,
+        predictor: CaptionPredictor,
+        model_version: str,
+        max_upload_bytes: int,
+    ) -> None:
+        """Args:
+        predictor: A ready ``CaptionPredictor`` (weights already loaded).
+        model_version: Semver string surfaced in responses & health.
+        max_upload_bytes: Hard cap enforced at the route layer.
+        """
+        self._predictor = predictor
+        self._model_version = model_version
+        self._max_upload_bytes = max_upload_bytes
+    @property
+    def model_version(self) -> str:
+        return self._model_version
+    @property
+    def decode_strategy(self) -> str:
+        return self._predictor.decode_strategy
+    @property
+    def max_upload_bytes(self) -> int:
+        return self._max_upload_bytes
+    async def caption_image_bytes(self, image_bytes: bytes) -> tuple[str, float]:
+        """Decode bytes, run inference, and return (caption, latency_ms).
+        Both the decode and the predict are offloaded to a worker thread so
+        the event loop stays responsive. Latency is measured around the
+        predict call only — decode timing belongs to a separate span if we
+        ever need it.
+        """
+        tensor = await to_thread.run_sync(bytes_to_tensor, image_bytes)
+        start = time.perf_counter()
+        caption: str = await to_thread.run_sync(self._predictor.predict_tensor, tensor)
+        latency_ms = (time.perf_counter() - start) * 1000
+        log.info(
+            "inference_completed",
+            model_version=self._model_version,
+            decode_strategy=self.decode_strategy,
+            latency_ms=round(latency_ms, 2),
+        )
+        return caption, latency_ms

backend/app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""HTTP-layer utilities (image decoding, etc.)."""
+from app.utils.image import ALLOWED_CONTENT_TYPES, ImageDecodeError, bytes_to_tensor
+__all__ = ["ALLOWED_CONTENT_TYPES", "ImageDecodeError", "bytes_to_tensor"]

backend/app/utils/image.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Image-decoding utilities for the HTTP boundary.
+The ML package's ``inference/image_loader.py`` reads from disk; the API
+receives bytes in memory from a multipart upload. This module bridges the
+two: it decodes raw bytes and runs them through the *same*
+``preprocess_image_tensor`` the training pipeline uses, so train/serve
+parity is preserved by construction.
+TensorFlow imports are deferred until first call to keep app import cheap
+(e.g. when running ``ruff`` or constructing the app for tests with stub
+predictors).
+"""
+from __future__ import annotations
+from typing import Any
+ALLOWED_CONTENT_TYPES: frozenset[str] = frozenset(
+    {
+        "image/jpeg",
+        "image/jpg",
+        "image/png",
+        "image/webp",
+        "image/bmp",
+    }
+)
+class ImageDecodeError(ValueError):
+    """Raised when uploaded bytes are not a recognisable image."""
+def bytes_to_tensor(image_bytes: bytes) -> Any:
+    """Decode an in-memory image into a model-ready tensor.
+    Args:
+        image_bytes: Raw bytes from a multipart upload (JPEG/PNG/WebP/BMP).
+    Returns:
+        ``tf.Tensor`` of shape ``[299, 299, 3]``, dtype ``float32``, with
+        the InceptionV3 normalisation applied — i.e. exactly what
+        ``CaptionPredictor.predict_tensor`` expects.
+    Raises:
+        ImageDecodeError: If the bytes can't be decoded as an image.
+    """
+    import tensorflow as tf
+    from captioning.preprocessing.image import preprocess_image_tensor
+    try:
+        decoded = tf.io.decode_image(
+            image_bytes,
+            channels=3,
+            expand_animations=False,
+        )
+    except (tf.errors.InvalidArgumentError, tf.errors.UnknownError) as exc:
+        raise ImageDecodeError(f"Could not decode image bytes: {exc}") from exc
+    return preprocess_image_tensor(decoded)

models/v1.0.0/vocab.json ADDED Viewed

	@@ -0,0 +1,54 @@

+[
+  "",
+  "[UNK]",
+  "a",
+  "[start]",
+  "[end]",
+  "on",
+  "of",
+  "in",
+  "wooden",
+  "table",
+  "standing",
+  "sitting",
+  "woman",
+  "with",
+  "wave",
+  "two",
+  "tree",
+  "top",
+  "surfboard",
+  "street",
+  "stove",
+  "soccer",
+  "small",
+  "riding",
+  "refrigerator",
+  "red",
+  "plate",
+  "person",
+  "people",
+  "park",
+  "mountain",
+  "man",
+  "kitchen",
+  "kicking",
+  "holding",
+  "her",
+  "group",
+  "front",
+  "food",
+  "driving",
+  "down",
+  "dog",
+  "city",
+  "child",
+  "cat",
+  "bus",
+  "branch",
+  "birds",
+  "beach",
+  "ball",
+  "arms",
+  "and"
+]

scripts/bootstrap_dev_artifacts.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""Generate development-only model artifacts so the FastAPI backend can boot.
+Why this script exists:
+    The Phase 2 backend lifespan loads weights + tokenizer from
+    ``models/v1.0.0/``. Until Phase 1 training has been run end-to-end on
+    COCO, those files don't exist and ``uvicorn`` fails on startup with
+    ``FileNotFoundError``. This script produces a *valid* but
+    *not meaningfully trained* set of artefacts so:
+      * the entire backend pipeline (lifespan, /healthz, /v1/captions,
+        multipart upload, predictor wiring) can be exercised;
+      * mypy/ruff/pytest stay green;
+      * a recruiter reviewing the repo can run ``uvicorn`` and hit the API.
+Captions returned by the bootstrapped model will be *gibberish* — every
+weight is initialised by Keras's default initialiser and never trained.
+That's deliberate and clearly documented; the goal is to verify the
+serving system, not produce real predictions.
+Usage::
+    python -m scripts.bootstrap_dev_artifacts \\
+        --config configs/base.yaml \\
+        --output-dir models/v1.0.0
+The script is idempotent — running it twice overwrites the previous
+artefacts. To replace dev artefacts with real Phase 1 outputs, run
+``scripts/train.py`` and copy ``model.h5`` + ``vocab.pkl`` into the same
+directory.
+"""
+from __future__ import annotations
+from pathlib import Path
+import click
+from captioning.config import load_config
+from captioning.models.factory import build_caption_model
+from captioning.preprocessing.tokenizer import CaptionTokenizer
+from captioning.utils import configure_logging, get_logger
+log = get_logger(__name__)
+# A tiny synthetic corpus. Wrapped in [start] ... [end] to mirror exactly the
+# pre-processed format the real training pipeline produces in cell 4. The
+# vocabulary that comes out of fitting on this is small (~50 tokens), but
+# that's fine: the model's vocab_size is taken from the fitted tokenizer at
+# build time, so weights and decode tables stay in lockstep.
+_DEV_CORPUS: list[str] = [
+    "[start] a man riding a surfboard on a wave [end]",
+    "[start] a woman holding a small dog in her arms [end]",
+    "[start] a group of people standing on a beach [end]",
+    "[start] a cat sitting on top of a wooden table [end]",
+    "[start] a plate of food on a wooden table [end]",
+    "[start] a red bus driving down a city street [end]",
+    "[start] a child kicking a soccer ball in a park [end]",
+    "[start] two birds sitting on a tree branch [end]",
+    "[start] a kitchen with a stove and a refrigerator [end]",
+    "[start] a person standing in front of a mountain [end]",
+]
+@click.command()
+@click.option(
+    "--config",
+    "config_path",
+    default=Path("configs/base.yaml"),
+    show_default=True,
+    type=click.Path(exists=True, path_type=Path),
+    help="App config YAML. Architecture hyperparameters are read from `model.*`.",
+)
+@click.option(
+    "--output-dir",
+    default=Path("models/v1.0.0"),
+    show_default=True,
+    type=click.Path(path_type=Path),
+    help="Directory that will contain model.h5, vocab.pkl, vocab.json.",
+)
+def main(config_path: Path, output_dir: Path) -> None:
+    """Create model.h5 + vocab.pkl + vocab.json under ``output-dir``."""
+    configure_logging()
+    config = load_config(config_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    weights_filename = config.train.weights_filename
+    weights_path = output_dir / weights_filename
+    log.info("bootstrap_starting", output_dir=str(output_dir))
+    # 1. Fit a tiny tokenizer on the synthetic corpus and save it.
+    tokenizer = CaptionTokenizer(
+        vocab_size=config.model.vocabulary_size,
+        max_length=config.model.max_length,
+    )
+    tokenizer.fit(_DEV_CORPUS)
+    tokenizer.save(output_dir)
+    log.info(
+        "tokenizer_saved",
+        directory=str(output_dir),
+        vocabulary_size=tokenizer.vocabulary_size,
+    )
+    # 2. Build the model with the *fitted* vocab size so the weights file
+    #    matches the tokenizer that will be loaded next to it. Augmentation
+    #    is left at its default (enabled) so the variable tree matches what
+    #    a real Phase 1 ``model.fit`` produces — the predictor builds with
+    #    the same defaults on load.
+    model = build_caption_model(config, vocab_size=tokenizer.vocabulary_size)
+    # 3. Force a forward pass so all variables are created before save. The
+    #    sequence of calls mirrors ``CaptionPredictor._dummy_pass`` exactly,
+    #    keeping save/load symmetric.
+    import tensorflow as tf
+    dummy_img = tf.zeros((1, 299, 299, 3), dtype=tf.float32)
+    dummy_caps = tf.zeros((1, config.model.max_length), dtype=tf.int64)
+    img_embed = model.cnn_model(dummy_img)
+    encoded = model.encoder(img_embed, training=False)
+    _ = model.decoder(
+        dummy_caps[:, :-1],
+        encoded,
+        training=False,
+        mask=tf.cast(dummy_caps[:, 1:] != 0, tf.int32),
+    )
+    if getattr(model, "image_aug", None) is not None:
+        _ = model.image_aug(dummy_img, training=False)
+    # 4. Mark the parent Model as built so HDF5 save/load round-trips. Real
+    #    Phase 1 weights satisfy this implicitly via ``model.fit``; the
+    #    bootstrap doesn't fit, so we set the flag explicitly. Predictor's
+    #    ``_dummy_pass`` does the symmetric thing on load.
+    model.built = True
+    # 5. Save randomly-initialised weights. The file is structurally identical
+    #    to a real Phase 1 checkpoint; only the values inside are untrained.
+    model.save_weights(str(weights_path))
+    log.info(
+        "weights_saved",
+        path=str(weights_path),
+        warning="weights are randomly initialised; outputs will be gibberish",
+    )
+    click.echo(
+        "\nDevelopment artefacts written:\n"
+        f"  weights : {weights_path}\n"
+        f"  vocab   : {output_dir / 'vocab.pkl'}\n"
+        f"  vocab   : {output_dir / 'vocab.json'}\n"
+        "\nThese are SMOKE-TEST artefacts only. Replace with real Phase 1 "
+        "outputs before drawing any inference about model quality."
+    )
+if __name__ == "__main__":
+    main()

src/captioning/inference/predictor.py CHANGED Viewed

@@ -115,7 +115,19 @@ class CaptionPredictor:
     @staticmethod
     def _dummy_pass(model, config: AppConfig) -> None:
-        """Force-build the model so ``load_weights`` knows variable shapes."""
         import tensorflow as tf
         dummy_img = tf.zeros((1, 299, 299, 3), dtype=tf.float32)
@@ -129,3 +141,13 @@ class CaptionPredictor:
             training=False,
             mask=tf.cast(dummy_caps[:, 1:] != 0, tf.int32),
         )

     @staticmethod
     def _dummy_pass(model, config: AppConfig) -> None:
+        """Force-build the model so ``load_weights`` knows variable shapes.
+        ``ImageCaptioningModel`` has no top-level ``call()`` — it overrides
+        ``train_step``/``test_step`` instead. Keras therefore won't mark the
+        parent ``Model`` as ``built`` even after every sublayer has its
+        variables created, and the HDF5 ``load_weights`` path refuses to
+        proceed against an unbuilt subclassed model. We work around this by
+        (a) calling each sublayer once so its variables are real (shape-
+        matched to the saved checkpoint) and (b) flipping ``model.built``
+        so the loader walks the sublayer scopes inside the file. The actual
+        weights loaded are still those from the checkpoint — this is purely
+        a Keras bookkeeping flag.
+        """
         import tensorflow as tf
         dummy_img = tf.zeros((1, 299, 299, 3), dtype=tf.float32)
             training=False,
             mask=tf.cast(dummy_caps[:, 1:] != 0, tf.int32),
         )
+        # Augmentation pipeline is tracked as a sublayer of the parent Model
+        # even though inference never invokes it; building it once keeps the
+        # variable tree identical to what `model.fit` produced when Phase 1
+        # weights were saved.
+        if getattr(model, "image_aug", None) is not None:
+            _ = model.image_aug(dummy_img, training=False)
+        # Sublayers are now built; mark the parent built so HDF5 load_weights
+        # accepts the file. Safe because every variable that the checkpoint
+        # references is already materialised on a tracked sublayer.
+        model.built = True

src/captioning/preprocessing/tokenizer.py CHANGED Viewed

@@ -154,12 +154,22 @@ class CaptionTokenizer:
         directory = Path(directory)
         pkl = directory / VOCAB_PICKLE_FILENAME
         if pkl.is_file():
             with pkl.open("rb") as f:
                 vocab = pickle.load(f)
-        else:
-            with (directory / VOCAB_JSON_FILENAME).open(encoding="utf-8") as f:
                 vocab = json.load(f)
         tok = cls(vocab_size=vocab_size, max_length=max_length)
         layer = tf.keras.layers.TextVectorization(

         directory = Path(directory)
         pkl = directory / VOCAB_PICKLE_FILENAME
+        js = directory / VOCAB_JSON_FILENAME
         if pkl.is_file():
             with pkl.open("rb") as f:
                 vocab = pickle.load(f)
+        elif js.is_file():
+            with js.open(encoding="utf-8") as f:
                 vocab = json.load(f)
+        else:
+            raise FileNotFoundError(
+                f"No tokenizer vocabulary found in {directory!s}. "
+                f"Expected '{VOCAB_PICKLE_FILENAME}' (preferred) or "
+                f"'{VOCAB_JSON_FILENAME}'. Train the model with "
+                "`python -m scripts.train --config configs/base.yaml` to "
+                "produce the artefacts, or point BACKEND_TOKENIZER_DIR at a "
+                "directory that contains them."
+            )
         tok = cls(vocab_size=vocab_size, max_length=max_length)
         layer = tf.keras.layers.TextVectorization(