# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed) # FROM openeuler/vllm-cpu:0.8.5-oe2403lts # # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference) # ENV VLLM_TARGET_DEVICE=cpu # ENV PYTHONUNBUFFERED=1 # # Set working directory # WORKDIR /workspace # # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc. # # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}. # #RUN pip3 install --upgrade pip \ # # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \ # # && pip3 install transformers accelerate unsloth # # (Optional) Install unsloth_zoo or other utilities if needed: # # RUN pip3 install unsloth-zoo # # RUN pip3 install --upgrade pip \ # # && pip3 install transformers accelerate unsloth # # Copy an example inference script into the container # # (This script should load the model and do a sample generation.) # # COPY inference.py /workspace/inference.py # # Default command: run the inference script to verify setup # CMD ["python3", "/workspace/inference.py"] # FROM openeuler/vllm-cpu:0.8.5-oe2403lts FROM openeuler/vllm-cpu:latest ENV VLLM_TARGET_DEVICE=cpu ENV PYTHONUNBUFFERED=1 WORKDIR /workspace # # Install system packages # RUN yum install -y \ # gcc \ # gcc-c++ \ # cmake \ # python-pip \ # python3-devel \ # ninja-build.aarch64 \ # numactl-devel.aarch64 \ # git \ # && yum clean all RUN yum install -y \ gcc \ gcc-c++ \ cmake \ python3-pip \ python3-devel \ ninja-build \ numactl-devel \ git \ && yum clean all # Install Python packages RUN pip3 install --upgrade pip \ && pip3 install numpy RUN pip show vllm RUN pip list # Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \ "--served-model-name", "llama-3.2-3b-instruct", \ "--model", "unsloth/Llama-3.2-3B-Instruct", \ "--trust-remote-code", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--max-model-len", "4096", \ "--enforce-eager", \ "--dtype", "float32"]