FROM python:3.11-slim WORKDIR /app # Prevent Python from buffering stdout/stderr ENV PYTHONUNBUFFERED=1 # Set HuggingFace cache to persistent storage ENV HF_HOME=/data/.huggingface ENV HF_HUB_CACHE=/data/.huggingface/hub # Install system dependencies RUN apt-get update && apt-get install -y \ ffmpeg \ libopenblas0 \ && rm -rf /var/lib/apt/lists/* # Install AVX512 + OpenBLAS optimized llama-cpp-python wheel (MAXIMUM SPEED!) RUN pip install --no-cache-dir \ https://huggingface.co/datasets/AIencoder/llama-cpp-wheels/resolve/main/llama_cpp_python-0.3.16%2Bcpuavx512_openblas-cp311-cp311-manylinux_2_31_x86_64.whl \ || pip install --no-cache-dir llama-cpp-python # Install Python dependencies RUN pip install --no-cache-dir \ gradio>=5.0.0 \ faster-whisper \ huggingface_hub # Copy application COPY app.py /app/app.py # Expose port EXPOSE 7860 # Run the app directly CMD ["python", "app.py"]