Spaces:

binary1ne
/

vllm-llama2

Paused

App Files Files Community

binary1ne commited on Aug 14, 2025

Commit

e140fec

verified ·

1 Parent(s): f61524f

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +54 -21

Dockerfile CHANGED Viewed

@@ -1,29 +1,62 @@
-# Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
-FROM openeuler/vllm-cpu:0.8.5-oe2403lts
-# Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
-ENV VLLM_TARGET_DEVICE=cpu
-ENV PYTHONUNBUFFERED=1
-# Set working directory
-WORKDIR /workspace
-# Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
-# Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
-#RUN pip3 install --upgrade pip \
-# && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
-# && pip3 install transformers accelerate unsloth
-# (Optional) Install unsloth_zoo or other utilities if needed:
-# RUN pip3 install unsloth-zoo
-RUN pip3 install --upgrade pip \
- && pip3 install transformers accelerate unsloth
-# Copy an example inference script into the container
-# (This script should load the model and do a sample generation.)
-COPY inference.py /workspace/inference.py
-# Default command: run the inference script to verify setup
-CMD ["python3", "/workspace/inference.py"]

+# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
+# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
+# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
+# ENV VLLM_TARGET_DEVICE=cpu
+# ENV PYTHONUNBUFFERED=1
+# # Set working directory
+# WORKDIR /workspace
+# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
+# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
+# #RUN pip3 install --upgrade pip \
+# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
+# # && pip3 install transformers accelerate unsloth
+# # (Optional) Install unsloth_zoo or other utilities if needed:
+# # RUN pip3 install unsloth-zoo
+# # RUN pip3 install --upgrade pip \
+#  # && pip3 install transformers accelerate unsloth
+# # Copy an example inference script into the container
+# # (This script should load the model and do a sample generation.)
+# # COPY inference.py /workspace/inference.py
+# # Default command: run the inference script to verify setup
+# CMD ["python3", "/workspace/inference.py"]
+FROM openeuler/vllm-cpu:0.8.5-oe2403lts
+ENV VLLM_TARGET_DEVICE=cpu
+ENV PYTHONUNBUFFERED=1
+WORKDIR /workspace
+# Install system packages
+RUN yum install -y \
+    gcc \
+    gcc-c++ \
+    cmake \
+    python-pip \
+    python3-devel \
+    ninja-build.aarch64 \
+    numactl-devel.aarch64 \
+    git \
+ && yum clean all
+# Install Python packages
+RUN pip3 install --upgrade pip \
+ && pip3 install numpy
+# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
+CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
+     "--served-model-name", "llama-3.2-3b-instruct", \
+     "--model", "unsloth/Llama-3.2-3B-Instruct", \
+     "--trust-remote-code", \
+     "--host", "0.0.0.0", \
+     "--port", "7860", \
+     "--max-model-len", "4096", \
+     "--enforce-eager", \
+     "--dtype", "float32"]