FROM vllm/vllm-openai:v0.8.5 # The official vllm/vllm-openai image hard-codes # ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] # Clearing it makes our CMD ["python3", "app.py"] actually run. ENTRYPOINT [] USER root # git-lfs : pulling model weights from HF # ffmpeg / libsndfile1 : librosa needs them to decode mp3/m4a/ogg uploads RUN apt-get update && \ apt-get install -y --no-install-recommends git-lfs ffmpeg libsndfile1 && \ git lfs install && \ rm -rf /var/lib/apt/lists/* RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" # Force Python stdout/stderr to be unbuffered so that progress bars (tqdm in # snapshot_download), vLLM INFO logs and our own prints show up in HF Space # logs immediately. Without this the container looks "stuck" at the startup # banner because everything sits in a 4-8 KB block buffer until the process # exits or the buffer fills up. ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=utf-8 WORKDIR /app COPY --chown=user ./requirements.txt requirements.txt # --prefer-binary : never try to build any wheel from source (flash_attn # would OOM the build container). The base image already # provides torch / vllm / flash_attn / transformers. # # NOTE: deliberately NO `--upgrade` here. The base image ships a carefully # pinned dependency tree (transformers 4.x compatible with # huggingface-hub<1.0). Using --upgrade lets pip pull huggingface-hub 1.x # whenever any package's metadata allows it, which then breaks # `import vllm` at container start. RUN pip install --no-cache-dir --prefer-binary -r requirements.txt # Force-upgrade gradio specifically. # # The vllm/vllm-openai:v0.8.5 base image ships an old gradio (pre-4.4) that # is missing newer kwargs like `show_copy_button` on gr.Textbox. Listing # `gradio>=4.44` in requirements.txt is NOT enough: without --upgrade, pip # leaves any already-installed gradio in place as long as it parses as # >=4.44 (and even when it doesn't, the resolver sometimes keeps it for # compatibility reasons inside this image). # # We isolate this to a single line and use: # --upgrade : actually replace the existing gradio # --upgrade-strategy only-if-needed # : do NOT cascade-upgrade gradio's deps # (fastapi / pydantic / starlette / # uvicorn) unless gradio strictly # requires it -- those are tightly # coupled to vllm's OpenAI server and # bumping them can break `import vllm`. # --prefer-binary : never trigger a source build. RUN pip install --no-cache-dir --prefer-binary \ --upgrade --upgrade-strategy only-if-needed \ "gradio>=4.44,<6" COPY --chown=user . /app CMD ["python3", "app.py"]