Spaces:
Paused
Paused
| FROM harshmanvar/vllm-cpu-only:v1 | |
| # Set writable Hugging Face cache directory | |
| ENV TRANSFORMERS_CACHE=/workspace/hf_cache | |
| # Set Hugging Face cache dir | |
| ENV HF_HOME=/workspace/hf_cache | |
| RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME | |
| RUN mkdir -p /workspace/models && chmod -R 777 /workspace/models | |
| # Install git & git-lfs | |
| RUN apt-get update && apt-get install -y git git-lfs && \ | |
| git lfs install && \ | |
| apt-get clean && rm -rf /var/lib/apt/lists/* | |
| RUN pip show vllm | |
| RUN pip list | |
| # Step 10 — Start API server with a model from HF Hub | |
| # CMD ["python", "-m", "vllm.entrypoints.openai.api_server", "--model", "unsloth/Llama-3.2-3B-bnb-4bit", "--host", "0.0.0.0","--port", "7860"] | |
| RUN vllm -v | |
| RUN git lfs install && \ | |
| git clone https://huggingface.co/unsloth/Llama-3.2-3B-bnb-4bit /workspace/models | |
| CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \ | |
| "--model", "/workspace/models", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--trust-remote-code", \ | |
| "--device", "cpu"] | |
| # CMD ["vllm", "serve", "unsloth/Llama-3.2-3B-bnb-4bit", "--host", "0.0.0.0", "--port", "7860", "--trust-remote-code", "--device", "cpu"] | |