Spaces:
Configuration error
Configuration error
| """HTTP routes: ``/healthz`` and ``/v1/captions``. | |
| Routes are intentionally thin: validate inputs, delegate to the | |
| ``PredictorService``, shape the response. No model code, no TF imports. | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status | |
| from app.core.config import BackendSettings, get_backend_settings | |
| from app.core.logging import current_request_id | |
| from app.schemas.caption import CaptionResponse, ErrorResponse, HealthResponse | |
| from app.services.predictor_service import PredictorService | |
| from app.utils.image import ALLOWED_CONTENT_TYPES, ImageDecodeError | |
| from captioning.utils import get_logger | |
| log = get_logger(__name__) | |
| router = APIRouter() | |
| def get_predictor_service(request: Request) -> PredictorService: | |
| """Resolve the singleton ``PredictorService`` from app state. | |
| Returns 503 instead of crashing if the lifespan hasn't finished loading | |
| weights yet (which can happen if ``/v1/captions`` is hit during a | |
| rolling restart). | |
| """ | |
| service: PredictorService | None = getattr(request.app.state, "predictor_service", None) | |
| if service is None: | |
| raise HTTPException( | |
| status_code=status.HTTP_503_SERVICE_UNAVAILABLE, | |
| detail="Predictor is not ready yet.", | |
| ) | |
| return service | |
| async def healthz( | |
| request: Request, | |
| settings: BackendSettings = Depends(get_backend_settings), | |
| ) -> HealthResponse: | |
| """Return readiness state. Always 200 — readiness is conveyed by ``model_loaded``.""" | |
| service: PredictorService | None = getattr(request.app.state, "predictor_service", None) | |
| return HealthResponse( | |
| status="ok" if service is not None else "loading", | |
| model_loaded=service is not None, | |
| model_version=service.model_version if service is not None else settings.model_version, | |
| api_version=settings.api_version, | |
| timestamp=datetime.now(timezone.utc), | |
| ) | |
| async def caption_image( | |
| image: UploadFile = File( | |
| ..., | |
| description="Image file to caption. Allowed: JPEG, PNG, WebP, BMP.", | |
| ), | |
| service: PredictorService = Depends(get_predictor_service), | |
| ) -> CaptionResponse: | |
| """Accept a multipart image upload and return a generated caption.""" | |
| if image.content_type not in ALLOWED_CONTENT_TYPES: | |
| raise HTTPException( | |
| status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, | |
| detail=( | |
| f"Unsupported content type: {image.content_type!r}. " | |
| f"Allowed: {sorted(ALLOWED_CONTENT_TYPES)}." | |
| ), | |
| ) | |
| payload = await image.read() | |
| if not payload: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="Empty file upload.", | |
| ) | |
| if len(payload) > service.max_upload_bytes: | |
| raise HTTPException( | |
| status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, | |
| detail=(f"Image is {len(payload)} bytes; limit is {service.max_upload_bytes}."), | |
| ) | |
| try: | |
| caption, latency_ms = await service.caption_image_bytes(payload) | |
| except ImageDecodeError as exc: | |
| raise HTTPException( | |
| status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, | |
| detail=str(exc), | |
| ) from exc | |
| return CaptionResponse( | |
| caption=caption, | |
| model_version=service.model_version, | |
| decode_strategy=service.decode_strategy, | |
| latency_ms=round(latency_ms, 2), | |
| request_id=current_request_id(), | |
| ) | |