MightyOctopus's picture
Update Dockerfile
b3bb53a verified
raw
history blame contribute delete
952 Bytes
FROM vllm/vllm-openai:latest
WORKDIR /app
# Python deps
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# ===== Model to bake into the image =====
ARG MODEL_REPO=Qwen/Qwen1.5-4B-Chat-AWQ
ENV MODEL_DIR=/app/models/model
ENV SERVED_MODEL_NAME=${MODEL_REPO}
# Hugging Face Token
ARG HF_TOKEN
ENV HUGGINGFACE_HUB_TOKEN=${HF_TOKEN}
# Make the download speed much faster
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Download the model in the image
RUN mkdir -p "${MODEL_DIR}" && \
if [ -n "$HUGGINGFACE_HUB_TOKEN" ]; then \
huggingface-cli login --token "$HUGGINGFACE_HUB_TOKEN" --add-to-git-credential; \
fi && \
hf download "${MODEL_REPO}" --local-dir "${MODEL_DIR}" && \
ls -lh "${MODEL_DIR}"
# App code + entrypoint
COPY app.py .
COPY entrypoint.sh .
RUN chmod +x /app/entrypoint.sh
# Expose ports
EXPOSE 8000 7860
# Start both: vLLM (background) + Gradio (PID 1)
ENTRYPOINT ["/app/entrypoint.sh"]