zhiyucheng commited on
Commit
7019896
·
verified ·
1 Parent(s): 0a5fabd

Upload dockerfile with huggingface_hub

Browse files
Files changed (1) hide show
  1. dockerfile +20 -0
dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM lmsysorg/sglang:v0.5.9-cu130
2
+ ARG ARCH=arm64
3
+
4
+ RUN pip install --no-deps "transformers==5.2.0" "huggingface-hub==1.4.1"
5
+
6
+ # Build and install flashinfer from source with fp4 quantization fix.
7
+ # Keep this aligned with the v0.5.9 base image's flashinfer_python version.
8
+ RUN --mount=type=cache,target=/root/.cache/pip \
9
+ --mount=type=cache,target=/sgl-workspace/flashinfer-build \
10
+ bash -c 'if [ ! -d /sgl-workspace/flashinfer-build/flashinfer ]; then git clone https://github.com/flashinfer-ai/flashinfer.git --recursive /sgl-workspace/flashinfer-build/flashinfer; fi' && \
11
+ cd /sgl-workspace/flashinfer-build/flashinfer && \
12
+ git checkout v0.6.3 && \
13
+ git config user.email "build@example.com" && \
14
+ git config user.name "Build" && \
15
+ git remote add nvjullin https://github.com/nvjullin/flashinfer 2>/dev/null || true && \
16
+ git fetch nvjullin fix-fp4-quant-padding && \
17
+ git cherry-pick ce48d4fb 10307340 && \
18
+ cd flashinfer-jit-cache && \
19
+ MAX_JOBS=32 FLASHINFER_NVCC_THREADS=2 FLASHINFER_CUDA_ARCH_LIST="10.0a 10.3a" python -m build --no-isolation --skip-dependency-check --wheel && \
20
+ python -m pip install dist/*.whl