Spaces:
Paused
Paused
| FROM harshmanvar/vllm-cpu-only:v1 | |
| # Set writable Hugging Face cache directory | |
| ENV TRANSFORMERS_CACHE=/workspace/hf_cache | |
| ENV HF_HOME=/workspace/hf_cache | |
| RUN mkdir -p /workspace/hf_cache && chmod -R 777 /workspace/hf_cache | |
| RUN pip show vllm | |
| RUN pip list | |
| # Step 10 — Start API server with a model from HF Hub | |
| CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \ | |
| "--model", "unsloth/Llama-3.2-3B-bnb-4bit", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--trust-remote-code"] |