Spaces:
Paused
Paused
File size: 2,206 Bytes
e140fec 3ac3871 e140fec 3ac3871 e140fec 3ac3871 e140fec f61524f e140fec 3ac3871 e140fec 3ac3871 e140fec f61524f e140fec 3df17c1 3664d44 e140fec ecce21f e140fec ecce21f e140fec ecce21f e140fec ecce21f e140fec e9d4b65 4a405ba e140fec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
# ENV VLLM_TARGET_DEVICE=cpu
# ENV PYTHONUNBUFFERED=1
# # Set working directory
# WORKDIR /workspace
# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
# #RUN pip3 install --upgrade pip \
# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
# # && pip3 install transformers accelerate unsloth
# # (Optional) Install unsloth_zoo or other utilities if needed:
# # RUN pip3 install unsloth-zoo
# # RUN pip3 install --upgrade pip \
# # && pip3 install transformers accelerate unsloth
# # Copy an example inference script into the container
# # (This script should load the model and do a sample generation.)
# # COPY inference.py /workspace/inference.py
# # Default command: run the inference script to verify setup
# CMD ["python3", "/workspace/inference.py"]
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
FROM openeuler/vllm-cpu:latest
ENV VLLM_TARGET_DEVICE=cpu
ENV PYTHONUNBUFFERED=1
WORKDIR /workspace
# # Install system packages
# RUN yum install -y \
# gcc \
# gcc-c++ \
# cmake \
# python-pip \
# python3-devel \
# ninja-build.aarch64 \
# numactl-devel.aarch64 \
# git \
# && yum clean all
RUN yum install -y \
gcc \
gcc-c++ \
cmake \
python3-pip \
python3-devel \
ninja-build \
numactl-devel \
git \
&& yum clean all
# Install Python packages
RUN pip3 install --upgrade pip \
&& pip3 install numpy
RUN pip show vllm
RUN pip list
# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
"--served-model-name", "llama-3.2-3b-instruct", \
"--model", "unsloth/Llama-3.2-3B-Instruct", \
"--trust-remote-code", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--max-model-len", "4096", \
"--enforce-eager", \
"--dtype", "float32"]
|