Update README.md
Browse files
README.md
CHANGED
|
@@ -41,6 +41,25 @@ output_text = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1] :])
|
|
| 41 |
print(output_text)
|
| 42 |
```
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
## Generate the Model
|
| 45 |
```bash
|
| 46 |
git clone -b ds-v32 https://github.com/intel/auto-round.git
|
|
|
|
| 41 |
print(output_text)
|
| 42 |
```
|
| 43 |
|
| 44 |
+
### VLLM Usage
|
| 45 |
+
```bash
|
| 46 |
+
# Prepare environment
|
| 47 |
+
# https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-V3_2.html#launching-deepseek-v32
|
| 48 |
+
pip install git+https://github.com/deepseek-ai/DeepGEMM.git@v2.1.1.post3 --no-build-isolation
|
| 49 |
+
git clone https://github.com/vllm-project/vllm.git
|
| 50 |
+
cd vllm && git checkout 773d7073a
|
| 51 |
+
VLLM_PRECOMPILED_WHEEL_COMMIT=7f42dc20bb2800d09faa72b26f25d54e26f1b694 VLLM_USE_PRECOMPILED=1 pip install --editable .
|
| 52 |
+
|
| 53 |
+
# Start server
|
| 54 |
+
VLLM_ALLREDUCE_USE_SYMM_MEM=0 NCCL_NVLS_ENABLE=0 VLLM_USE_FUSED_MOE_GROUPED_TOPK=0 \
|
| 55 |
+
vllm serve Intel/DeepSeek-V3.2-int4-AutoRound \
|
| 56 |
+
--tensor-parallel-size 4 \
|
| 57 |
+
--tokenizer-mode deepseek_v32 \
|
| 58 |
+
--tool-call-parser deepseek_v32 \
|
| 59 |
+
--enable-auto-tool-choice \
|
| 60 |
+
--reasoning-parser deepseek_v3
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
## Generate the Model
|
| 64 |
```bash
|
| 65 |
git clone -b ds-v32 https://github.com/intel/auto-round.git
|