Spaces:
Paused
Paused
File size: 1,951 Bytes
e140fec 3ac3871 e140fec 3ac3871 e140fec 3ac3871 e140fec f61524f e140fec 3ac3871 e140fec 3ac3871 e140fec f61524f e140fec 3664d44 e140fec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
# ENV VLLM_TARGET_DEVICE=cpu
# ENV PYTHONUNBUFFERED=1
# # Set working directory
# WORKDIR /workspace
# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
# #RUN pip3 install --upgrade pip \
# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
# # && pip3 install transformers accelerate unsloth
# # (Optional) Install unsloth_zoo or other utilities if needed:
# # RUN pip3 install unsloth-zoo
# # RUN pip3 install --upgrade pip \
# # && pip3 install transformers accelerate unsloth
# # Copy an example inference script into the container
# # (This script should load the model and do a sample generation.)
# # COPY inference.py /workspace/inference.py
# # Default command: run the inference script to verify setup
# CMD ["python3", "/workspace/inference.py"]
FROM openeuler/vllm-cpu:0.8.5-oe2403lts
ENV VLLM_TARGET_DEVICE=cpu
ENV PYTHONUNBUFFERED=1
WORKDIR /workspace
# Install system packages
RUN yum install -y \
gcc \
gcc-c++ \
cmake \
python-pip \
python3-devel \
ninja-build.aarch64 \
numactl-devel.aarch64 \
git \
&& yum clean all
# Install Python packages
RUN pip3 install --upgrade pip \
&& pip3 install numpy
# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
"--served-model-name", "llama-3.2-3b-instruct", \
"--model", "unsloth/Llama-3.2-3B-Instruct", \
"--trust-remote-code", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--max-model-len", "4096", \
"--enforce-eager", \
"--dtype", "float32"]
|