# Use NVIDIA PyTorch container as base FROM nvcr.io/nvidia/pytorch:24.07-py3 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ git \ ffmpeg \ build-essential \ && rm -rf /var/lib/apt/lists/* # Install Flash Attention 2 first (requires specific order) RUN pip install flash-attn --no-build-isolation # Clone and install VibeVoice RUN git clone https://github.com/microsoft/VibeVoice.git /app/VibeVoice WORKDIR /app/VibeVoice RUN pip install -e . # Go back to app directory WORKDIR /app # Copy requirements and install additional dependencies COPY requirements-docker.txt /app/ RUN pip install -r requirements-docker.txt # Copy application code COPY handler.py /app/ COPY README.md /app/ # Set environment variables ENV TRANSFORMERS_CACHE=/tmp/model_cache ENV HF_HOME=/tmp/model_cache ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:2048,expandable_segments:True # Expose port for health checks EXPOSE 8000 # Command to run (HF Endpoints will override this) CMD ["python", "-c", "print('VibeVoice container ready')"]