# Use NVIDIA PyTorch container as base
FROM nvcr.io/nvidia/pytorch:24.07-py3

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    git \
    ffmpeg \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Install Flash Attention 2 first (requires specific order)
RUN pip install flash-attn --no-build-isolation

# Clone and install VibeVoice
RUN git clone https://github.com/microsoft/VibeVoice.git /app/VibeVoice
WORKDIR /app/VibeVoice
RUN pip install -e .

# Go back to app directory
WORKDIR /app

# Copy requirements and install additional dependencies
COPY requirements-docker.txt /app/
RUN pip install -r requirements-docker.txt

# Copy application code
COPY handler.py /app/
COPY README.md /app/

# Set environment variables
ENV TRANSFORMERS_CACHE=/tmp/model_cache
ENV HF_HOME=/tmp/model_cache
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:2048,expandable_segments:True

# Expose port for health checks
EXPOSE 8000

# Command to run (HF Endpoints will override this)
CMD ["python", "-c", "print('VibeVoice container ready')"]