Upload dockerfile with huggingface_hub
Browse files- dockerfile +20 -0
dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM lmsysorg/sglang:v0.5.9-cu130
|
| 2 |
+
ARG ARCH=arm64
|
| 3 |
+
|
| 4 |
+
RUN pip install --no-deps "transformers==5.2.0" "huggingface-hub==1.4.1"
|
| 5 |
+
|
| 6 |
+
# Build and install flashinfer from source with fp4 quantization fix.
|
| 7 |
+
# Keep this aligned with the v0.5.9 base image's flashinfer_python version.
|
| 8 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
| 9 |
+
--mount=type=cache,target=/sgl-workspace/flashinfer-build \
|
| 10 |
+
bash -c 'if [ ! -d /sgl-workspace/flashinfer-build/flashinfer ]; then git clone https://github.com/flashinfer-ai/flashinfer.git --recursive /sgl-workspace/flashinfer-build/flashinfer; fi' && \
|
| 11 |
+
cd /sgl-workspace/flashinfer-build/flashinfer && \
|
| 12 |
+
git checkout v0.6.3 && \
|
| 13 |
+
git config user.email "build@example.com" && \
|
| 14 |
+
git config user.name "Build" && \
|
| 15 |
+
git remote add nvjullin https://github.com/nvjullin/flashinfer 2>/dev/null || true && \
|
| 16 |
+
git fetch nvjullin fix-fp4-quant-padding && \
|
| 17 |
+
git cherry-pick ce48d4fb 10307340 && \
|
| 18 |
+
cd flashinfer-jit-cache && \
|
| 19 |
+
MAX_JOBS=32 FLASHINFER_NVCC_THREADS=2 FLASHINFER_CUDA_ARCH_LIST="10.0a 10.3a" python -m build --no-isolation --skip-dependency-check --wheel && \
|
| 20 |
+
python -m pip install dist/*.whl
|