Spaces:
Paused
Paused
| # # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed) | |
| # FROM openeuler/vllm-cpu:0.8.5-oe2403lts | |
| # # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference) | |
| # ENV VLLM_TARGET_DEVICE=cpu | |
| # ENV PYTHONUNBUFFERED=1 | |
| # # Set working directory | |
| # WORKDIR /workspace | |
| # # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc. | |
| # # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}. | |
| # #RUN pip3 install --upgrade pip \ | |
| # # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \ | |
| # # && pip3 install transformers accelerate unsloth | |
| # # (Optional) Install unsloth_zoo or other utilities if needed: | |
| # # RUN pip3 install unsloth-zoo | |
| # # RUN pip3 install --upgrade pip \ | |
| # # && pip3 install transformers accelerate unsloth | |
| # # Copy an example inference script into the container | |
| # # (This script should load the model and do a sample generation.) | |
| # # COPY inference.py /workspace/inference.py | |
| # # Default command: run the inference script to verify setup | |
| # CMD ["python3", "/workspace/inference.py"] | |
| # FROM openeuler/vllm-cpu:0.8.5-oe2403lts | |
| FROM openeuler/vllm-cpu:latest | |
| ENV VLLM_TARGET_DEVICE=cpu | |
| ENV PYTHONUNBUFFERED=1 | |
| WORKDIR /workspace | |
| # # Install system packages | |
| # RUN yum install -y \ | |
| # gcc \ | |
| # gcc-c++ \ | |
| # cmake \ | |
| # python-pip \ | |
| # python3-devel \ | |
| # ninja-build.aarch64 \ | |
| # numactl-devel.aarch64 \ | |
| # git \ | |
| # && yum clean all | |
| RUN yum install -y \ | |
| gcc \ | |
| gcc-c++ \ | |
| cmake \ | |
| python3-pip \ | |
| python3-devel \ | |
| ninja-build \ | |
| numactl-devel \ | |
| git \ | |
| && yum clean all | |
| # Install Python packages | |
| RUN pip3 install --upgrade pip \ | |
| && pip3 install numpy | |
| RUN pip show vllm | |
| RUN pip list | |
| # Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model | |
| CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \ | |
| "--served-model-name", "llama-3.2-3b-instruct", \ | |
| "--model", "unsloth/Llama-3.2-3B-Instruct", \ | |
| "--trust-remote-code", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--max-model-len", "4096", \ | |
| "--enforce-eager", \ | |
| "--dtype", "float32"] | |