FROM python:3.11-slim WORKDIR /app # Prevent Python from buffering stdout/stderr ENV PYTHONUNBUFFERED=1 # Set HuggingFace cache to persistent storage ENV HF_HOME=/data/.huggingface ENV HF_HUB_CACHE=/data/.huggingface/hub # Install system dependencies RUN apt-get update && apt-get install -y \ ffmpeg \ && rm -rf /var/lib/apt/lists/* # Install AVX2-optimized llama-cpp-python wheel (2-3x faster!) # Fallback to PyPI if wheel fails RUN pip install --no-cache-dir \ https://huggingface.co/datasets/AIencoder/llama-cpp-wheels/resolve/main/llama_cpp_python-0.3.16%2Bavx2-cp311-cp311-manylinux_2_31_x86_64.whl \ || pip install --no-cache-dir llama-cpp-python # Install Python dependencies RUN pip install --no-cache-dir \ gradio>=5.0.0 \ faster-whisper \ huggingface_hub # Copy application COPY app.py /app/app.py # Expose port EXPOSE 7860 # Run the app directly CMD ["python", "app.py"]