version: '3.8' services: # Main Speech-to-Text application speech-to-text: build: context: . dockerfile: Dockerfile args: - APP_ENV=${APP_ENV:-dev} container_name: indian-speech-to-text restart: unless-stopped ports: - "${GRADIO_SERVER_PORT:-7860}:7860" environment: - APP_ENV=${APP_ENV:-dev} - DEBUG=${DEBUG:-false} - LOG_LEVEL=${LOG_LEVEL:-INFO} - GRADIO_SERVER_NAME=0.0.0.0 - GRADIO_SERVER_PORT=7860 - GRADIO_SHARE=${GRADIO_SHARE:-false} - DEFAULT_MODEL=${DEFAULT_MODEL:-distil-whisper} - DEFAULT_LANGUAGE=${DEFAULT_LANGUAGE:-hindi} - MAX_AUDIO_LENGTH=${MAX_AUDIO_LENGTH:-300} - ENABLE_GPU=${ENABLE_GPU:-true} - MODEL_CACHE_DIR=/app/models - HF_HOME=/app/models - TRANSFORMERS_CACHE=/app/models - TORCH_HOME=/app/models - HF_TOKEN=${HF_TOKEN:-} - HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-} env_file: - ./configs/envs/.env.${APP_ENV:-dev} volumes: # Persistent model storage - models_cache:/app/models # Logs - ./logs:/app/logs # Temporary files - ./temp:/app/temp networks: - speech-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:7860/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: resources: limits: memory: 8G reservations: memory: 4G # GPU support (uncomment if using NVIDIA GPU) # runtime: nvidia # environment: # - NVIDIA_VISIBLE_DEVICES=all # Model downloader service (runs once to download models) model-downloader: build: context: . dockerfile: Dockerfile container_name: model-downloader environment: - MODEL_CACHE_DIR=/app/models - HF_HOME=/app/models - TRANSFORMERS_CACHE=/app/models - HF_TOKEN=${HF_TOKEN:-} - HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-} volumes: - models_cache:/app/models networks: - speech-network command: python scripts/download_models.py --priority-only profiles: - download deploy: resources: limits: memory: 4G reservations: memory: 2G # Optional: Redis for caching (if needed for scaling) redis: image: redis:7-alpine container_name: speech-redis restart: unless-stopped ports: - "6379:6379" volumes: - redis_data:/data networks: - speech-network profiles: - cache command: redis-server --appendonly yes deploy: resources: limits: memory: 512M reservations: memory: 256M # Optional: Nginx reverse proxy for production nginx: image: nginx:alpine container_name: speech-nginx restart: unless-stopped ports: - "80:80" - "443:443" volumes: - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro - ./nginx/ssl:/etc/nginx/ssl:ro networks: - speech-network profiles: - production depends_on: - speech-to-text deploy: resources: limits: memory: 256M reservations: memory: 128M volumes: models_cache: driver: local driver_opts: type: none o: bind device: ${PWD}/models redis_data: driver: local networks: speech-network: driver: bridge ipam: config: - subnet: 172.20.0.0/16