# Dockerfile
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04

# Set environment variables
ENV PYTHONUNBUFFERED=1 \
    DEBIAN_FRONTEND=noninteractive \
    CUDA_HOME=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
    NVIDIA_VISIBLE_DEVICES=all \
    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
    HF_HOME=/app/models \
    NUMBA_CACHE_DIR=/tmp/numba_cache \
    MODEL_PORT=10000 \
    API_PORT=8000

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    python3-dev \
    build-essential \
    git \
    ffmpeg \
    libsndfile1 \
    libsox-fmt-all \
    libavcodec-extra \
    libsm6 \
    libxext6 \
    libxrender-dev \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Configure Python and pip
RUN python3 -m pip install --upgrade pip setuptools wheel uv

WORKDIR /app

# Create required directories
RUN mkdir -p /tmp/numba_cache && \
    chmod 777 /tmp/numba_cache && \
    mkdir -p /app/glm-4-voice-decoder && \
    mkdir -p /app/cosyvoice && \
    mkdir -p /app/third_party/Matcha-TTS

# Install Python dependencies
COPY requirements.txt .
RUN python3 -m uv pip install --no-cache-dir -r requirements.txt --prerelease=allow && \
    python3 -m uv pip install \
    torchaudio==2.1.0+cu121 \
    flash-attn==2.5.0 \
    resampy==0.4.2 \
    soundfile==0.12.1 \
    --extra-index-url https://download.pytorch.org/whl/cu121

# Copy application files
COPY . .

# Set permissions
RUN chmod +x /app/third_party/Matcha-TTS/setup.sh && \
    chmod -R 777 /tmp

# Expose ports
EXPOSE 8000 10000

# Healthcheck
HEALTHCHECK --interval=30s --timeout=30s --start-period=5m \
    CMD curl --fail http://localhost:8000/api/voice_chat || exit 1

# Startup command
CMD ["sh", "-c", \
    "python3 -m uvicorn server:app --host 0.0.0.0 --port $API_PORT & \ python3 -m uvicorn model_server:app --host 0.0.0.0 --port $MODEL_PORT"]