# Dockerfile FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 # Set environment variables ENV PYTHONUNBUFFERED=1 \ DEBIAN_FRONTEND=noninteractive \ CUDA_HOME=/usr/local/cuda \ PATH=/usr/local/cuda/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \ NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ HF_HOME=/app/models \ NUMBA_CACHE_DIR=/tmp/numba_cache \ MODEL_PORT=10000 \ API_PORT=8000 # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-dev \ build-essential \ git \ ffmpeg \ libsndfile1 \ libsox-fmt-all \ libavcodec-extra \ libsm6 \ libxext6 \ libxrender-dev \ curl \ && rm -rf /var/lib/apt/lists/* # Configure Python and pip RUN python3 -m pip install --upgrade pip setuptools wheel uv WORKDIR /app # Create required directories RUN mkdir -p /tmp/numba_cache && \ chmod 777 /tmp/numba_cache && \ mkdir -p /app/glm-4-voice-decoder && \ mkdir -p /app/cosyvoice && \ mkdir -p /app/third_party/Matcha-TTS # Install Python dependencies COPY requirements.txt . RUN python3 -m uv pip install --no-cache-dir -r requirements.txt --prerelease=allow && \ python3 -m uv pip install \ torchaudio==2.1.0+cu121 \ flash-attn==2.5.0 \ resampy==0.4.2 \ soundfile==0.12.1 \ --extra-index-url https://download.pytorch.org/whl/cu121 # Copy application files COPY . . # Set permissions RUN chmod +x /app/third_party/Matcha-TTS/setup.sh && \ chmod -R 777 /tmp # Expose ports EXPOSE 8000 10000 # Healthcheck HEALTHCHECK --interval=30s --timeout=30s --start-period=5m \ CMD curl --fail http://localhost:8000/api/voice_chat || exit 1 # Startup command CMD ["sh", "-c", \ "python3 -m uvicorn server:app --host 0.0.0.0 --port $API_PORT & \ python3 -m uvicorn model_server:app --host 0.0.0.0 --port $MODEL_PORT"]