Upload folder using huggingface_hub
Browse files- README.md +4 -2
- model.safetensors +1 -1
README.md
CHANGED
|
@@ -33,7 +33,9 @@ vllm serve $model_id \
|
|
| 33 |
```bash
|
| 34 |
# Multi-token prediction is supported
|
| 35 |
model_id=tiny-random/qwen3.5-moe
|
| 36 |
-
python3 -m sglang.launch_server
|
|
|
|
|
|
|
| 37 |
--tool-call-parser qwen3_coder \
|
| 38 |
--reasoning-parser qwen3 \
|
| 39 |
--speculative-algo NEXTN \
|
|
@@ -84,7 +86,7 @@ inputs = processor.apply_chat_template(
|
|
| 84 |
).to(model.device)
|
| 85 |
|
| 86 |
generated_ids = model.generate(**inputs, max_new_tokens=32)
|
| 87 |
-
output_text = processor.batch_decode(generated_ids[0])
|
| 88 |
print(output_text)
|
| 89 |
```
|
| 90 |
|
|
|
|
| 33 |
```bash
|
| 34 |
# Multi-token prediction is supported
|
| 35 |
model_id=tiny-random/qwen3.5-moe
|
| 36 |
+
python3 -m sglang.launch_server \
|
| 37 |
+
--model-path $model_id \
|
| 38 |
+
--tp-size 2 \
|
| 39 |
--tool-call-parser qwen3_coder \
|
| 40 |
--reasoning-parser qwen3 \
|
| 41 |
--speculative-algo NEXTN \
|
|
|
|
| 86 |
).to(model.device)
|
| 87 |
|
| 88 |
generated_ids = model.generate(**inputs, max_new_tokens=32)
|
| 89 |
+
output_text = processor.batch_decode(generated_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 90 |
print(output_text)
|
| 91 |
```
|
| 92 |
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10057952
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a68280ca252dadbb9875aa7455a9def12207fc29aa7bf34f10f578a4e4102cb
|
| 3 |
size 10057952
|