Add SGLang serving recipe to README

#4
by Jiminator - opened
Files changed (3) hide show
  1. README.md +1 -2
  2. generation_config.json +2 -7
  3. tokenizer_config.json +3 -2
README.md CHANGED
@@ -152,7 +152,6 @@ cd sglang
152
  pip install -e "python[all]"
153
 
154
  sglang serve \
155
- --trust-remote-code \
156
  --model-path poolside/Laguna-M.1 \
157
  --tool-call-parser poolside_v1 \
158
  --reasoning-parser poolside_v1 \
@@ -160,7 +159,7 @@ sglang serve \
160
  --host 0.0.0.0
161
  ```
162
 
163
- Quantized Laguna M.1 checkpoints are also available as [Laguna-M.1-FP8](https://huggingface.co/poolside/Laguna-M.1-FP8) and [Laguna-M.1-NVFP4](https://huggingface.co/poolside/Laguna-M.1-NVFP4). SGLang reads the checkpoint `quantization_config`, so you can use the same launch command after replacing the model ID. For more SGLang-specific deployment details, see the [SGLang Cookbook](https://docs.sglang.io/cookbook/autoregressive/Poolside/Laguna-M.1).
164
 
165
  #### Transformers
166
 
 
152
  pip install -e "python[all]"
153
 
154
  sglang serve \
 
155
  --model-path poolside/Laguna-M.1 \
156
  --tool-call-parser poolside_v1 \
157
  --reasoning-parser poolside_v1 \
 
159
  --host 0.0.0.0
160
  ```
161
 
162
+ Quantized Laguna M.1 checkpoints are also available as [Laguna-M.1-FP8](https://huggingface.co/poolside/Laguna-M.1-FP8) and [Laguna-M.1-NVFP4](https://huggingface.co/poolside/Laguna-M.1-NVFP4). SGLang reads the checkpoint `quantization_config`, so you can use the same launch command after replacing the model ID. For more SGLang-specific deployment details, see the [SGLang Cookbook](https://docs.sglang.io/cookbook/autoregressive/Poolside/Laguna-XS.2) which uses the same Laguna implementation path.
163
 
164
  #### Transformers
165
 
generation_config.json CHANGED
@@ -9,10 +9,5 @@
9
  "pad_token_id": 9,
10
  "temperature": 1.0,
11
  "top_p": 1.0,
12
- "min_p": 0.0,
13
- "tool_call_parser": "poolside_v1",
14
- "reasoning_parser": "poolside_v1",
15
- "default_chat_template_kwargs": {
16
- "enable_thinking": true
17
- }
18
- }
 
9
  "pad_token_id": 9,
10
  "temperature": 1.0,
11
  "top_p": 1.0,
12
+ "min_p": 0.0
13
+ }
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -571,5 +571,6 @@
571
  "pad_token": "〈|PAD|〉",
572
  "sep_token": "〈|SEP|〉",
573
  "tokenizer_class": "PreTrainedTokenizerFast",
574
- "unk_token": "〈|UNK|〉"
575
- }
 
 
571
  "pad_token": "〈|PAD|〉",
572
  "sep_token": "〈|SEP|〉",
573
  "tokenizer_class": "PreTrainedTokenizerFast",
574
+ "unk_token": "〈|UNK|〉",
575
+ "chat_template": "{% include 'chat_template.jinja' %}"
576
+ }