| # Use NVIDIA PyTorch container as base | |
| FROM nvcr.io/nvidia/pytorch:24.07-py3 | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| git \ | |
| ffmpeg \ | |
| build-essential \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Flash Attention 2 first (requires specific order) | |
| RUN pip install flash-attn --no-build-isolation | |
| # Clone and install VibeVoice | |
| RUN git clone https://github.com/microsoft/VibeVoice.git /app/VibeVoice | |
| WORKDIR /app/VibeVoice | |
| RUN pip install -e . | |
| # Go back to app directory | |
| WORKDIR /app | |
| # Copy requirements and install additional dependencies | |
| COPY requirements-docker.txt /app/ | |
| RUN pip install -r requirements-docker.txt | |
| # Copy application code | |
| COPY handler.py /app/ | |
| COPY README.md /app/ | |
| # Set environment variables | |
| ENV TRANSFORMERS_CACHE=/tmp/model_cache | |
| ENV HF_HOME=/tmp/model_cache | |
| ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:2048,expandable_segments:True | |
| # Expose port for health checks | |
| EXPOSE 8000 | |
| # Command to run (HF Endpoints will override this) | |
| CMD ["python", "-c", "print('VibeVoice container ready')"] |