version: '3.8'

services:
  # Main Speech-to-Text application
  speech-to-text:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - APP_ENV=${APP_ENV:-dev}
    container_name: indian-speech-to-text
    restart: unless-stopped
    ports:
      - "${GRADIO_SERVER_PORT:-7860}:7860"
    environment:
      - APP_ENV=${APP_ENV:-dev}
      - DEBUG=${DEBUG:-false}
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
      - GRADIO_SERVER_NAME=0.0.0.0
      - GRADIO_SERVER_PORT=7860
      - GRADIO_SHARE=${GRADIO_SHARE:-false}
      - DEFAULT_MODEL=${DEFAULT_MODEL:-distil-whisper}
      - DEFAULT_LANGUAGE=${DEFAULT_LANGUAGE:-hindi}
      - MAX_AUDIO_LENGTH=${MAX_AUDIO_LENGTH:-300}
      - ENABLE_GPU=${ENABLE_GPU:-true}
      - MODEL_CACHE_DIR=/app/models
      - HF_HOME=/app/models
      - TRANSFORMERS_CACHE=/app/models
      - TORCH_HOME=/app/models
      - HF_TOKEN=${HF_TOKEN:-}
      - HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-}
    env_file:
      - ./configs/envs/.env.${APP_ENV:-dev}
    volumes:
      # Persistent model storage
      - models_cache:/app/models
      # Logs
      - ./logs:/app/logs
      # Temporary files
      - ./temp:/app/temp
    networks:
      - speech-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7860/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    deploy:
      resources:
        limits:
          memory: 8G
        reservations:
          memory: 4G
    # GPU support (uncomment if using NVIDIA GPU)
    # runtime: nvidia
    # environment:
    #   - NVIDIA_VISIBLE_DEVICES=all

  # Model downloader service (runs once to download models)
  model-downloader:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: model-downloader
    environment:
      - MODEL_CACHE_DIR=/app/models
      - HF_HOME=/app/models
      - TRANSFORMERS_CACHE=/app/models
      - HF_TOKEN=${HF_TOKEN:-}
      - HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-}
    volumes:
      - models_cache:/app/models
    networks:
      - speech-network
    command: python scripts/download_models.py --priority-only
    profiles:
      - download
    deploy:
      resources:
        limits:
          memory: 4G
        reservations:
          memory: 2G

  # Optional: Redis for caching (if needed for scaling)
  redis:
    image: redis:7-alpine
    container_name: speech-redis
    restart: unless-stopped
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    networks:
      - speech-network
    profiles:
      - cache
    command: redis-server --appendonly yes
    deploy:
      resources:
        limits:
          memory: 512M
        reservations:
          memory: 256M

  # Optional: Nginx reverse proxy for production
  nginx:
    image: nginx:alpine
    container_name: speech-nginx
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./nginx/ssl:/etc/nginx/ssl:ro
    networks:
      - speech-network
    profiles:
      - production
    depends_on:
      - speech-to-text
    deploy:
      resources:
        limits:
          memory: 256M
        reservations:
          memory: 128M

volumes:
  models_cache:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: ${PWD}/models
  redis_data:
    driver: local

networks:
  speech-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16