SongPrep / Dockerfile
root
update env
48d9a6b
Raw
History Blame Contribute Delete
2.95 kB
FROM vllm/vllm-openai:v0.8.5
# The official vllm/vllm-openai image hard-codes
# ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
# Clearing it makes our CMD ["python3", "app.py"] actually run.
ENTRYPOINT []
USER root
# git-lfs : pulling model weights from HF
# ffmpeg / libsndfile1 : librosa needs them to decode mp3/m4a/ogg uploads
RUN apt-get update && \
apt-get install -y --no-install-recommends git-lfs ffmpeg libsndfile1 && \
git lfs install && \
rm -rf /var/lib/apt/lists/*
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"
# Force Python stdout/stderr to be unbuffered so that progress bars (tqdm in
# snapshot_download), vLLM INFO logs and our own prints show up in HF Space
# logs immediately. Without this the container looks "stuck" at the startup
# banner because everything sits in a 4-8 KB block buffer until the process
# exits or the buffer fills up.
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=utf-8
WORKDIR /app
COPY --chown=user ./requirements.txt requirements.txt
# --prefer-binary : never try to build any wheel from source (flash_attn
# would OOM the build container). The base image already
# provides torch / vllm / flash_attn / transformers.
#
# NOTE: deliberately NO `--upgrade` here. The base image ships a carefully
# pinned dependency tree (transformers 4.x compatible with
# huggingface-hub<1.0). Using --upgrade lets pip pull huggingface-hub 1.x
# whenever any package's metadata allows it, which then breaks
# `import vllm` at container start.
RUN pip install --no-cache-dir --prefer-binary -r requirements.txt
# Force-upgrade gradio specifically.
#
# The vllm/vllm-openai:v0.8.5 base image ships an old gradio (pre-4.4) that
# is missing newer kwargs like `show_copy_button` on gr.Textbox. Listing
# `gradio>=4.44` in requirements.txt is NOT enough: without --upgrade, pip
# leaves any already-installed gradio in place as long as it parses as
# >=4.44 (and even when it doesn't, the resolver sometimes keeps it for
# compatibility reasons inside this image).
#
# We isolate this to a single line and use:
# --upgrade : actually replace the existing gradio
# --upgrade-strategy only-if-needed
# : do NOT cascade-upgrade gradio's deps
# (fastapi / pydantic / starlette /
# uvicorn) unless gradio strictly
# requires it -- those are tightly
# coupled to vllm's OpenAI server and
# bumping them can break `import vllm`.
# --prefer-binary : never trigger a source build.
RUN pip install --no-cache-dir --prefer-binary \
--upgrade --upgrade-strategy only-if-needed \
"gradio>=4.44,<6"
COPY --chown=user . /app
CMD ["python3", "app.py"]