Spaces:

binary1ne
/

vllm-llama2

Paused

File size: 2,206 Bytes

e140fec
 
3ac3871
e140fec
 
 
3ac3871
e140fec
 
3ac3871
e140fec
 
f61524f
e140fec
 
 
3ac3871
e140fec
 
3ac3871
e140fec
 
 
 
 
 
f61524f
e140fec
 
3df17c1
 
 
3664d44
e140fec
 
 
 
 
ecce21f
 
 
 
 
 
 
 
 
 
 
 
e140fec
 
 
 
ecce21f
e140fec
ecce21f
 
e140fec
 
ecce21f
e140fec
 
e9d4b65
4a405ba
 
 
 
e140fec

# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts

# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
# ENV VLLM_TARGET_DEVICE=cpu
# ENV PYTHONUNBUFFERED=1

# # Set working directory
# WORKDIR /workspace

# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.

# #RUN pip3 install --upgrade pip \
# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
# # && pip3 install transformers accelerate unsloth

# # (Optional) Install unsloth_zoo or other utilities if needed:
# # RUN pip3 install unsloth-zoo

# # RUN pip3 install --upgrade pip \
#  # && pip3 install transformers accelerate unsloth

# # Copy an example inference script into the container
# # (This script should load the model and do a sample generation.)
# # COPY inference.py /workspace/inference.py

# # Default command: run the inference script to verify setup
# CMD ["python3", "/workspace/inference.py"]
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
FROM openeuler/vllm-cpu:latest


ENV VLLM_TARGET_DEVICE=cpu
ENV PYTHONUNBUFFERED=1

WORKDIR /workspace

# # Install system packages
# RUN yum install -y \
#     gcc \
#     gcc-c++ \
#     cmake \
#     python-pip \
#     python3-devel \
#     ninja-build.aarch64 \
#     numactl-devel.aarch64 \
#     git \
#  && yum clean all

RUN yum install -y \
    gcc \
    gcc-c++ \
    cmake \
    python3-pip \
    python3-devel \
    ninja-build \
    numactl-devel \
    git \
 && yum clean all
 
# Install Python packages
RUN pip3 install --upgrade pip \
  && pip3 install numpy
  
RUN pip show vllm

RUN pip list

# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
     "--served-model-name", "llama-3.2-3b-instruct", \
     "--model", "unsloth/Llama-3.2-3B-Instruct", \
     "--trust-remote-code", \
     "--host", "0.0.0.0", \
     "--port", "7860", \
     "--max-model-len", "4096", \
     "--enforce-eager", \
     "--dtype", "float32"]