Spaces:
Running on L40S
Running on L40S
| FROM vllm/vllm-openai:v0.8.5 | |
| # The official vllm/vllm-openai image hard-codes | |
| # ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] | |
| # Clearing it makes our CMD ["python3", "app.py"] actually run. | |
| ENTRYPOINT [] | |
| USER root | |
| # git-lfs : pulling model weights from HF | |
| # ffmpeg / libsndfile1 : librosa needs them to decode mp3/m4a/ogg uploads | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends git-lfs ffmpeg libsndfile1 && \ | |
| git lfs install && \ | |
| rm -rf /var/lib/apt/lists/* | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV PATH="/home/user/.local/bin:$PATH" | |
| # Force Python stdout/stderr to be unbuffered so that progress bars (tqdm in | |
| # snapshot_download), vLLM INFO logs and our own prints show up in HF Space | |
| # logs immediately. Without this the container looks "stuck" at the startup | |
| # banner because everything sits in a 4-8 KB block buffer until the process | |
| # exits or the buffer fills up. | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONIOENCODING=utf-8 | |
| WORKDIR /app | |
| COPY --chown=user ./requirements.txt requirements.txt | |
| # --prefer-binary : never try to build any wheel from source (flash_attn | |
| # would OOM the build container). The base image already | |
| # provides torch / vllm / flash_attn / transformers. | |
| # | |
| # NOTE: deliberately NO `--upgrade` here. The base image ships a carefully | |
| # pinned dependency tree (transformers 4.x compatible with | |
| # huggingface-hub<1.0). Using --upgrade lets pip pull huggingface-hub 1.x | |
| # whenever any package's metadata allows it, which then breaks | |
| # `import vllm` at container start. | |
| RUN pip install --no-cache-dir --prefer-binary -r requirements.txt | |
| # Force-upgrade gradio specifically. | |
| # | |
| # The vllm/vllm-openai:v0.8.5 base image ships an old gradio (pre-4.4) that | |
| # is missing newer kwargs like `show_copy_button` on gr.Textbox. Listing | |
| # `gradio>=4.44` in requirements.txt is NOT enough: without --upgrade, pip | |
| # leaves any already-installed gradio in place as long as it parses as | |
| # >=4.44 (and even when it doesn't, the resolver sometimes keeps it for | |
| # compatibility reasons inside this image). | |
| # | |
| # We isolate this to a single line and use: | |
| # --upgrade : actually replace the existing gradio | |
| # --upgrade-strategy only-if-needed | |
| # : do NOT cascade-upgrade gradio's deps | |
| # (fastapi / pydantic / starlette / | |
| # uvicorn) unless gradio strictly | |
| # requires it -- those are tightly | |
| # coupled to vllm's OpenAI server and | |
| # bumping them can break `import vllm`. | |
| # --prefer-binary : never trigger a source build. | |
| RUN pip install --no-cache-dir --prefer-binary \ | |
| --upgrade --upgrade-strategy only-if-needed \ | |
| "gradio>=4.44,<6" | |
| COPY --chown=user . /app | |
| CMD ["python3", "app.py"] |