Spaces:
Paused
Paused
File size: 1,505 Bytes
2781894 947c6a5 2781894 947c6a5 2781894 947c6a5 0f12a3c 2781894 947c6a5 2781894 ee83eff 947c6a5 ee83eff 2781894 9e6d168 2781894 9e6d168 2781894 9e6d168 2781894 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# Step 1 β Base image with Python
FROM python:3.10-slim
# Step 2 β Install system dependencies
RUN apt-get update && apt-get install -y \
git \
build-essential \
ninja-build \
cmake \
python3-dev \
gcc-12 \
g++-12 \
g++ \
libnuma-dev \
&& rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
# Step 3 β Environment for CPU build
ENV VLLM_TARGET_DEVICE=cpu
# Step 4 β Clone vLLM repo
WORKDIR /workspace
RUN git clone https://github.com/vllm-project/vllm.git
# Step 5 β Install Python build tools
RUN pip install --upgrade pip
RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy
RUN pip install --upgrade pip setuptools wheel packaging setuptools_scm
# Step 6 β Install PyTorch CPU before building vLLM
RUN pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
# Step 7 β Install vLLM requirements for CPU
WORKDIR /workspace/vllm
RUN pip install -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
# Step 8 β Build and install vLLM from source
RUN python setup.py install
# Step 9 β Expose Hugging Face-friendly port
EXPOSE 7860
# Step 10 β Start API server with a model from HF Hub
CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
"--model", "unsloth/Llama-3.2-3B-bnb-4bit", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--trust-remote-code"]
|