vllm-llama2 / Dockerfile
binary1ne's picture
Update Dockerfile
28e546f verified
raw
history blame
569 Bytes
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
# Install CPU-only PyTorch + vLLM
RUN pip install --no-cache-dir torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir vllm
# Expose port
EXPOSE 7860
# Env variables
ENV VLLM_HOST=0.0.0.0
ENV VLLM_PORT=7860
ENV HUGGING_FACE_HUB_TOKEN=<your_hf_token>
# Command to run vLLM on CPU
CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --device cpu --host $VLLM_HOST --port $VLLM_PORT"]