binary1ne commited on
Commit
e140fec
·
verified ·
1 Parent(s): f61524f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +54 -21
Dockerfile CHANGED
@@ -1,29 +1,62 @@
1
- # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
2
- FROM openeuler/vllm-cpu:0.8.5-oe2403lts
3
 
4
- # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
5
- ENV VLLM_TARGET_DEVICE=cpu
6
- ENV PYTHONUNBUFFERED=1
7
 
8
- # Set working directory
9
- WORKDIR /workspace
10
 
11
- # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
12
- # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
13
 
14
- #RUN pip3 install --upgrade pip \
15
- # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
16
- # && pip3 install transformers accelerate unsloth
17
 
18
- # (Optional) Install unsloth_zoo or other utilities if needed:
19
- # RUN pip3 install unsloth-zoo
20
 
21
- RUN pip3 install --upgrade pip \
22
- && pip3 install transformers accelerate unsloth
 
 
 
 
23
 
24
- # Copy an example inference script into the container
25
- # (This script should load the model and do a sample generation.)
26
- COPY inference.py /workspace/inference.py
27
 
28
- # Default command: run the inference script to verify setup
29
- CMD ["python3", "/workspace/inference.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
2
+ # FROM openeuler/vllm-cpu:0.8.5-oe2403lts
3
 
4
+ # # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
5
+ # ENV VLLM_TARGET_DEVICE=cpu
6
+ # ENV PYTHONUNBUFFERED=1
7
 
8
+ # # Set working directory
9
+ # WORKDIR /workspace
10
 
11
+ # # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
12
+ # # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.
13
 
14
+ # #RUN pip3 install --upgrade pip \
15
+ # # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
16
+ # # && pip3 install transformers accelerate unsloth
17
 
18
+ # # (Optional) Install unsloth_zoo or other utilities if needed:
19
+ # # RUN pip3 install unsloth-zoo
20
 
21
+ # # RUN pip3 install --upgrade pip \
22
+ # # && pip3 install transformers accelerate unsloth
23
+
24
+ # # Copy an example inference script into the container
25
+ # # (This script should load the model and do a sample generation.)
26
+ # # COPY inference.py /workspace/inference.py
27
 
28
+ # # Default command: run the inference script to verify setup
29
+ # CMD ["python3", "/workspace/inference.py"]
30
+ FROM openeuler/vllm-cpu:0.8.5-oe2403lts
31
 
32
+ ENV VLLM_TARGET_DEVICE=cpu
33
+ ENV PYTHONUNBUFFERED=1
34
+
35
+ WORKDIR /workspace
36
+
37
+ # Install system packages
38
+ RUN yum install -y \
39
+ gcc \
40
+ gcc-c++ \
41
+ cmake \
42
+ python-pip \
43
+ python3-devel \
44
+ ninja-build.aarch64 \
45
+ numactl-devel.aarch64 \
46
+ git \
47
+ && yum clean all
48
+
49
+ # Install Python packages
50
+ RUN pip3 install --upgrade pip \
51
+ && pip3 install numpy
52
+
53
+ # Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
54
+ CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
55
+ "--served-model-name", "llama-3.2-3b-instruct", \
56
+ "--model", "unsloth/Llama-3.2-3B-Instruct", \
57
+ "--trust-remote-code", \
58
+ "--host", "0.0.0.0", \
59
+ "--port", "7860", \
60
+ "--max-model-len", "4096", \
61
+ "--enforce-eager", \
62
+ "--dtype", "float32"]