Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +6 -6
Dockerfile
CHANGED
|
@@ -7,18 +7,18 @@ ENV OMP_NUM_THREADS=1
|
|
| 7 |
ENV VLLM_USE_TRITON_FLASH_ATTN=0
|
| 8 |
ENV VLLM_ATTENTION_BACKEND=XFORMERS
|
| 9 |
|
| 10 |
-
RUN mkdir -p /tmp/.cache/huggingface
|
| 11 |
-
|
| 12 |
-
# e.g. install the `audio` optional dependencies
|
| 13 |
-
# NOTE: Make sure the version of vLLM matches the base image!
|
| 14 |
-
RUN uv pip install --system vllm[audio]==0.10.0
|
| 15 |
-
|
| 16 |
# Create a user and group with the specified ID
|
| 17 |
RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
|
| 18 |
|
| 19 |
# Now, switch to the newly created user
|
| 20 |
USER myuser
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
|
| 23 |
|
| 24 |
# # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
|
|
| 7 |
ENV VLLM_USE_TRITON_FLASH_ATTN=0
|
| 8 |
ENV VLLM_ATTENTION_BACKEND=XFORMERS
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Create a user and group with the specified ID
|
| 11 |
RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
|
| 12 |
|
| 13 |
# Now, switch to the newly created user
|
| 14 |
USER myuser
|
| 15 |
|
| 16 |
+
RUN mkdir -p /tmp/.cache/huggingface
|
| 17 |
+
|
| 18 |
+
# e.g. install the `audio` optional dependencies
|
| 19 |
+
# NOTE: Make sure the version of vLLM matches the base image!
|
| 20 |
+
RUN uv pip install --system vllm[audio]==0.10.0
|
| 21 |
+
|
| 22 |
ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
|
| 23 |
|
| 24 |
# # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|