# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts

# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
# ENV VLLM_TARGET_DEVICE=cpu
# ENV PYTHONUNBUFFERED=1

# # Set working directory
# WORKDIR /workspace

# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.

# #RUN pip3 install --upgrade pip \
# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
# # && pip3 install transformers accelerate unsloth

# # (Optional) Install unsloth_zoo or other utilities if needed:
# # RUN pip3 install unsloth-zoo

# # RUN pip3 install --upgrade pip \
#  # && pip3 install transformers accelerate unsloth

# # Copy an example inference script into the container
# # (This script should load the model and do a sample generation.)
# # COPY inference.py /workspace/inference.py

# # Default command: run the inference script to verify setup
# CMD ["python3", "/workspace/inference.py"]
# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
FROM openeuler/vllm-cpu:latest


ENV VLLM_TARGET_DEVICE=cpu
ENV PYTHONUNBUFFERED=1

WORKDIR /workspace

# # Install system packages
# RUN yum install -y \
#     gcc \
#     gcc-c++ \
#     cmake \
#     python-pip \
#     python3-devel \
#     ninja-build.aarch64 \
#     numactl-devel.aarch64 \
#     git \
#  && yum clean all

RUN yum install -y \
    gcc \
    gcc-c++ \
    cmake \
    python3-pip \
    python3-devel \
    ninja-build \
    numactl-devel \
    git \
 && yum clean all
 
# Install Python packages
RUN pip3 install --upgrade pip \
  && pip3 install numpy
  
RUN pip show vllm

RUN pip list

# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
     "--served-model-name", "llama-3.2-3b-instruct", \
     "--model", "unsloth/Llama-3.2-3B-Instruct", \
     "--trust-remote-code", \
     "--host", "0.0.0.0", \
     "--port", "7860", \
     "--max-model-len", "4096", \
     "--enforce-eager", \
     "--dtype", "float32"]