version: "3.9" services: # ─── LiteLLM Proxy Gateway ─────────────────────────────────────────────── litellm: # BUG FIX #12: Was "main-latest" which points to the HEAD of the main branch # and pulls in breaking changes on every `docker compose pull`. LiteLLM ships # multiple commits per day; /model/delete field names, config.yaml keys, and # routing behaviour have all changed between minor versions without notice. # # Pin to a specific stable version. To upgrade: review release notes at # https://docs.litellm.ai/release_notes then bump the version and redeploy. image: ghcr.io/berriai/litellm:main-v1.81.14-stable container_name: ai_gateway_litellm restart: unless-stopped volumes: - ./litellm/config.yaml:/app/config.yaml:ro - litellm_data:/app/data environment: - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-sk-gateway-master-key} # BUG FIX: Added LITELLM_SALT_KEY. If DATABASE_URL is ever configured, # LiteLLM uses this key to encrypt provider API keys at rest in the DB. # Without it, stored keys are written as plaintext. # Set a random value in .env and NEVER change it after first use — # changing it makes all previously stored encrypted keys unreadable. - LITELLM_SALT_KEY=${LITELLM_SALT_KEY:-} # Optional: set DATABASE_URL in .env for model persistence across restarts. # Must be a PostgreSQL direct connection URL (port 5432, not pooler port 6432). # Example: DATABASE_URL=postgresql://user:pass@host:5432/dbname?sslmode=require # Leave blank (default) to run without a database — models are re-registered # from SQLite on each backend startup via the /model/new API. - DATABASE_URL=${DATABASE_URL:-} - PORT=4000 command: > --config /app/config.yaml --port 4000 --num_workers 4 healthcheck: # LiteLLM image has no curl (GitHub issue #9295). Use wget + /health/liveliness. test: ["CMD-SHELL", "wget --quiet --tries=1 -O /dev/null http://localhost:4000/health/liveliness || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 60s networks: - gateway_net # ─── Backend API ───────────────────────────────────────────────────────── backend: build: context: ./backend dockerfile: Dockerfile container_name: ai_gateway_backend restart: unless-stopped environment: - NODE_ENV=production - PORT=3001 - LITELLM_BASE_URL=http://litellm:4000 - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-sk-gateway-master-key} - DB_PATH=/app/data/gateway.db - JWT_SECRET=${JWT_SECRET:-super-secret-jwt-key-change-in-production} - GATEWAY_PUBLIC_URL=${GATEWAY_PUBLIC_URL:-http://localhost} - LOG_LEVEL=${LOG_LEVEL:-http} volumes: - backend_data:/app/data depends_on: litellm: condition: service_healthy networks: - gateway_net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:3001/api/health"] interval: 20s timeout: 5s retries: 3 start_period: 10s # ─── Frontend ───────────────────────────────────────────────────────────── frontend: build: context: ./frontend dockerfile: Dockerfile args: - VITE_API_BASE=/api - VITE_APP_NAME=AI Gateway Hub container_name: ai_gateway_frontend restart: unless-stopped networks: - gateway_net depends_on: - backend healthcheck: test: ["CMD-SHELL", "wget --quiet --tries=1 -O /dev/null http://localhost:80/ || exit 1"] interval: 20s timeout: 5s retries: 3 start_period: 10s # ─── Nginx Reverse Proxy ────────────────────────────────────────────────── nginx: image: nginx:1.25-alpine container_name: ai_gateway_nginx restart: unless-stopped ports: - "${HTTP_PORT:-80}:80" # To enable HTTPS: add certs to nginx/ssl/ and uncomment below # - "${HTTPS_PORT:-443}:443" volumes: - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro - nginx_logs:/var/log/nginx # BUG FIX: Previously used plain depends_on (only waits for container to # exist, not for services to be healthy). LiteLLM needs ~60s to initialise; # nginx starting too early forwards /v1/ requests to a not-yet-ready # upstream and returns 502 to users for the entire startup window. # Fix: require both backend and frontend to pass their healthchecks before # nginx starts. LiteLLM health is already transitively guaranteed because # backend depends_on litellm with condition: service_healthy. depends_on: backend: condition: service_healthy frontend: condition: service_healthy networks: - gateway_net volumes: litellm_data: backend_data: nginx_logs: networks: gateway_net: driver: bridge