Spaces:
Paused
Paused
| # Step 1 β Base image with Python | |
| FROM python:3.10-slim | |
| # Step 2 β Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| git \ | |
| build-essential \ | |
| ninja-build \ | |
| cmake \ | |
| python3-dev \ | |
| gcc-12 \ | |
| g++-12 \ | |
| g++ \ | |
| libnuma-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 | |
| # Step 3 β Environment for CPU build | |
| ENV VLLM_TARGET_DEVICE=cpu | |
| # Step 4 β Clone vLLM repo | |
| WORKDIR /workspace | |
| RUN git clone https://github.com/vllm-project/vllm.git | |
| # Step 5 β Install Python build tools | |
| RUN pip install --upgrade pip | |
| RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy | |
| RUN pip install --upgrade pip setuptools wheel packaging setuptools_scm | |
| # Step 6 β Install PyTorch CPU before building vLLM | |
| RUN pip install torch --extra-index-url https://download.pytorch.org/whl/cpu | |
| # Step 7 β Install vLLM requirements for CPU | |
| WORKDIR /workspace/vllm | |
| RUN pip install -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu | |
| # Step 8 β Build and install vLLM from source | |
| RUN python setup.py install | |
| # Step 9 β Expose Hugging Face-friendly port | |
| EXPOSE 7860 | |
| # Step 10 β Start API server with a model from HF Hub | |
| CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \ | |
| "--model", "unsloth/Llama-3.2-3B-bnb-4bit", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--trust-remote-code"] | |