Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- README.md +4 -2
- ReasonableQwen3-4B-BF16.gguf +2 -2
- ReasonableQwen3-4B-Q3_K.gguf +2 -2
- ReasonableQwen3-4B-Q4_K.gguf +2 -2
- ReasonableQwen3-4B-Q8_0.gguf +2 -2
- adapter_config.json +66 -0
- metadata.json +17 -0
- model.safetensors +2 -2
- tokenizer.json +2 -2
- training_state.json +756 -689
- xspecial_tokens_map.json +31 -0
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
README.md
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
language: en
|
| 3 |
-
pipeline_tag: text-generation
|
| 4 |
library_name: mlx
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
tags:
|
| 6 |
- mlx
|
| 7 |
---
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
| 2 |
library_name: mlx
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
license_link: https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507/blob/main/LICENSE
|
| 5 |
+
pipeline_tag: text-generation
|
| 6 |
+
base_model: Qwen/Qwen3-4B-Thinking-2507
|
| 7 |
tags:
|
| 8 |
- mlx
|
| 9 |
---
|
ReasonableQwen3-4B-BF16.gguf
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f95bc6d806e7c98baa9308c5d0f4055d2b917bbc721bd0201d36f14bd412444
|
| 3 |
+
size 8051285184
|
ReasonableQwen3-4B-Q3_K.gguf
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f183fb995f35a3e13305a3e787bdb988f6643ee56b5c7a964d1a7b4e1d7ad4cd
|
| 3 |
+
size 2075617984
|
ReasonableQwen3-4B-Q4_K.gguf
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e162f04a69f148beaadc70cccc515d260a1973b14efde5cd13d124118a35ac2
|
| 3 |
+
size 2497280704
|
ReasonableQwen3-4B-Q8_0.gguf
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99e1cf3d4afe75e385d5ba30ba548492378b33d84b228ede020ea537fba80fa0
|
| 3 |
+
size 4280405184
|
adapter_config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"adapter_path": "adapters/turn80",
|
| 3 |
+
"alpha": [
|
| 4 |
+
1e-05
|
| 5 |
+
],
|
| 6 |
+
"batch_size": 1,
|
| 7 |
+
"beta": 0.02,
|
| 8 |
+
"config": null,
|
| 9 |
+
"data": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/strat",
|
| 10 |
+
"delta": 50.0,
|
| 11 |
+
"dpo_cpo_loss_type": "sigmoid",
|
| 12 |
+
"epochs": null,
|
| 13 |
+
"epsilon": 0.0001,
|
| 14 |
+
"epsilon_high": 0.02,
|
| 15 |
+
"fuse": true,
|
| 16 |
+
"grad_checkpoint": false,
|
| 17 |
+
"gradient_accumulation_steps": 2,
|
| 18 |
+
"group_size": 2,
|
| 19 |
+
"grpo_loss_type": "dr_grpo",
|
| 20 |
+
"importance_sampling_level": "token",
|
| 21 |
+
"iters": 1000,
|
| 22 |
+
"judge": "mlx-community/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2-4-bit",
|
| 23 |
+
"judge_config": {},
|
| 24 |
+
"learning_rate": 1e-06,
|
| 25 |
+
"list_reward_functions": false,
|
| 26 |
+
"load_in_4bits": true,
|
| 27 |
+
"load_in_6bits": false,
|
| 28 |
+
"load_in_8bits": false,
|
| 29 |
+
"lora_parameters": {
|
| 30 |
+
"rank": 64,
|
| 31 |
+
"alpha": 128,
|
| 32 |
+
"dropout": 0.0,
|
| 33 |
+
"scale": 2.0
|
| 34 |
+
},
|
| 35 |
+
"lr_schedule": null,
|
| 36 |
+
"mask_prompt": false,
|
| 37 |
+
"max_completion_length": 512,
|
| 38 |
+
"max_seq_length": 2048,
|
| 39 |
+
"model": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
|
| 40 |
+
"num_layers": -1,
|
| 41 |
+
"optimizer": "adamw",
|
| 42 |
+
"optimizer_config": {
|
| 43 |
+
"adam": {},
|
| 44 |
+
"adamw": {},
|
| 45 |
+
"muon": {},
|
| 46 |
+
"qhadam": {}
|
| 47 |
+
},
|
| 48 |
+
"reference_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
|
| 49 |
+
"resume_adapter_file": "adapters/turn80/adapters.safetensors",
|
| 50 |
+
"reward_functions": "r1_semantic_similarity_reward,r1_conditional_content_reward,r1_velocity_to_correct_thinking_reward,r1_format_reward,r1_tag_structure_reward,r1_thinking_quality_reward",
|
| 51 |
+
"reward_functions_file": null,
|
| 52 |
+
"reward_scaling": 1.0,
|
| 53 |
+
"reward_weights": "[0.25, 0.25, 0.20, 0.10, 0.10, 0.10]",
|
| 54 |
+
"save_every": 6,
|
| 55 |
+
"seed": 360,
|
| 56 |
+
"steps_per_eval": 50,
|
| 57 |
+
"steps_per_report": 1,
|
| 58 |
+
"temperature": 0.8,
|
| 59 |
+
"test": false,
|
| 60 |
+
"test_batches": 500,
|
| 61 |
+
"train": true,
|
| 62 |
+
"train_mode": "grpo",
|
| 63 |
+
"train_type": "lora",
|
| 64 |
+
"val_batches": 1,
|
| 65 |
+
"wandb": "mlx-lm-grpo-v3.16"
|
| 66 |
+
}
|
metadata.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"step": 32,
|
| 3 |
+
"epoch": 13,
|
| 4 |
+
"best_loss": -0.00902040209621191,
|
| 5 |
+
"best_loss_step": 11,
|
| 6 |
+
"reason": "regular",
|
| 7 |
+
"total_tokens": 3840,
|
| 8 |
+
"save_optimizer_state": true,
|
| 9 |
+
"training_config": {
|
| 10 |
+
"learning_rate": 3e-06,
|
| 11 |
+
"batch_size": 1,
|
| 12 |
+
"grad_accum_steps": 1
|
| 13 |
+
},
|
| 14 |
+
"current_metric": -0.00902040209621191,
|
| 15 |
+
"timestamp": "20251229_000031",
|
| 16 |
+
"save_duration_s": 53.69711899757385
|
| 17 |
+
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f2591a76dd02582af815350ec1d35ad1612ca576f1238fbeb74a1d293bcc752
|
| 3 |
+
size 8044982021
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05d47c87966b4db779200053de490f89936ed529f8ab889244e271630715fcfe
|
| 3 |
+
size 11422638
|
training_state.json
CHANGED
|
@@ -1,659 +1,659 @@
|
|
| 1 |
{
|
| 2 |
-
"global_step":
|
| 3 |
-
"num_updates":
|
| 4 |
"use_lora": false,
|
| 5 |
"rng_state": {
|
| 6 |
"python": [
|
| 7 |
3,
|
| 8 |
[
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
],
|
| 635 |
null
|
| 636 |
],
|
| 637 |
"numpy": [
|
| 638 |
"MT19937",
|
| 639 |
-
"[
|
| 640 |
624,
|
| 641 |
0,
|
| 642 |
0.0
|
| 643 |
],
|
| 644 |
-
"mlx":
|
| 645 |
"mlx_key": [
|
| 646 |
0,
|
| 647 |
-
|
| 648 |
]
|
| 649 |
},
|
| 650 |
"training_args_snapshot": {
|
| 651 |
-
"output_dir": "
|
| 652 |
-
"max_kv_size":
|
| 653 |
-
"model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507",
|
| 654 |
-
"ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507",
|
| 655 |
"draft_model_path": null,
|
| 656 |
-
"punish_reopen_think": -
|
| 657 |
"punish_reopen_answer": -9.0,
|
| 658 |
"low_band": [
|
| 659 |
0,
|
|
@@ -661,18 +661,23 @@
|
|
| 661 |
],
|
| 662 |
"mid_band": [
|
| 663 |
16,
|
| 664 |
-
|
| 665 |
],
|
| 666 |
"top_band": [
|
| 667 |
-
|
| 668 |
35
|
| 669 |
],
|
| 670 |
-
"low_mul": 0.
|
| 671 |
-
"mid_mul": 0.
|
| 672 |
-
"top_mul": 1.
|
| 673 |
-
"head_mul": 1.
|
| 674 |
-
"train_layer_start":
|
| 675 |
"train_layer_end": 35,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
"run_server": false,
|
| 677 |
"use_paged_kv_cache": true,
|
| 678 |
"kv_cache_block_size": 16,
|
|
@@ -683,18 +688,18 @@
|
|
| 683 |
"benchmark_split": "test",
|
| 684 |
"benchmark_prompt_key": "question",
|
| 685 |
"benchmark_answer_key": "answer",
|
| 686 |
-
"benchmark_samples":
|
| 687 |
"benchmark_max_new_tokens": 196,
|
| 688 |
"benchmark_temperature": 0.0,
|
| 689 |
"benchmark_top_p": 1.0,
|
| 690 |
"benchmark_top_k": 0,
|
| 691 |
"benchmark_use_chat_template": true,
|
| 692 |
"benchmark_stop_on_error": false,
|
| 693 |
-
"min_think_tokens":
|
| 694 |
"think_end_early_bias": -12.0,
|
| 695 |
"bias_answer_start_after_min_think": true,
|
| 696 |
-
"train_dataset_path": "/
|
| 697 |
-
"val_dataset_path": "/
|
| 698 |
"dataset_name": null,
|
| 699 |
"dataset_config": null,
|
| 700 |
"dataset_train_split": "train",
|
|
@@ -703,25 +708,26 @@
|
|
| 703 |
"dataset_answer_key": "completion",
|
| 704 |
"dataset_filter_keywords": [
|
| 705 |
"http://",
|
|
|
|
| 706 |
"https://",
|
| 707 |
"png",
|
| 708 |
"jpg",
|
| 709 |
-
"
|
| 710 |
-
"
|
| 711 |
],
|
| 712 |
"max_prompt_len": 350,
|
| 713 |
-
"max_gen_len":
|
| 714 |
"system_prompt": null,
|
| 715 |
"think_start_tag": "<think>",
|
| 716 |
"think_end_tag": "</think>",
|
| 717 |
"answer_start_tag": "<answer>",
|
| 718 |
"answer_end_tag": "</answer>",
|
| 719 |
-
"think_boost_tokens":
|
| 720 |
-
"think_temperature": 0.
|
| 721 |
-
"answer_temperature": 0.
|
| 722 |
-
"sampling_top_p": 0.
|
| 723 |
-
"sampling_min_p": 0.
|
| 724 |
-
"sampling_top_k":
|
| 725 |
"repetition_penalty": 1.1,
|
| 726 |
"repetition_context_size": 20,
|
| 727 |
"hard_mask_mcq_first_token": true,
|
|
@@ -735,34 +741,95 @@
|
|
| 735 |
"bias_answer_start": 6.0,
|
| 736 |
"punish_extra_think_end": -12.0,
|
| 737 |
"bias_eos_after_answer": 3.0,
|
| 738 |
-
"allow_tool_calls":
|
| 739 |
-
"tool_call_penalty":
|
| 740 |
-
"reward_content_type": "
|
| 741 |
"reward_format_weight": 0.05,
|
| 742 |
"reward_content_weight": 0.7,
|
| 743 |
"think_reward_weight": 0.25,
|
| 744 |
-
"think_len_min":
|
| 745 |
-
"think_len_max":
|
| 746 |
"non_ascii_penalty": 1.0,
|
| 747 |
"off_topic_jaccard_threshold": 0.05,
|
| 748 |
"off_topic_penalty": 1.0,
|
| 749 |
-
"ban_keywords": [
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
"
|
| 753 |
-
"
|
| 754 |
-
"
|
| 755 |
-
"
|
| 756 |
-
"
|
| 757 |
-
"
|
| 758 |
-
"
|
| 759 |
-
"
|
| 760 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
"frustrated",
|
| 762 |
-
"
|
| 763 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 764 |
],
|
| 765 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
"use_lora": false,
|
| 767 |
"lora_rank": 8,
|
| 768 |
"lora_alpha": 16.0,
|
|
@@ -777,30 +844,30 @@
|
|
| 777 |
"up_proj",
|
| 778 |
"down_proj"
|
| 779 |
],
|
| 780 |
-
"num_rollout_samples":
|
| 781 |
"ppo_batch_size": 1,
|
| 782 |
-
"grpo_beta": 0.
|
| 783 |
-
"learning_rate":
|
| 784 |
"optimizer_beta1": 0.9,
|
| 785 |
"optimizer_beta2": 0.95,
|
| 786 |
"optimizer_weight_decay": 0.05,
|
| 787 |
-
"grad_clip_norm": 0.
|
| 788 |
"save_optimizer_state": false,
|
| 789 |
"lr_schedule_config": {
|
| 790 |
"name": "cosine_decay",
|
| 791 |
"arguments": [
|
| 792 |
-
|
| 793 |
60000,
|
| 794 |
-
|
| 795 |
],
|
| 796 |
"warmup": 4000,
|
| 797 |
"warmup_init": 1e-08
|
| 798 |
},
|
| 799 |
-
"grad_accum_steps":
|
| 800 |
"num_training_steps": 45869,
|
| 801 |
-
"save_every":
|
| 802 |
-
"eval_every":
|
| 803 |
-
"seed":
|
| 804 |
"shuffle_data": true,
|
| 805 |
"use_grad_checkpointing": false,
|
| 806 |
"grad_checkpoint_layers": 0,
|
|
@@ -808,7 +875,7 @@
|
|
| 808 |
"early_stopping_threshold": 0.005,
|
| 809 |
"min_trainable_layers": 4,
|
| 810 |
"use_custom_batch_builder": true,
|
| 811 |
-
"invalid_sample_layers": "34,35",
|
| 812 |
"invalid_sample_frequency": 2,
|
| 813 |
"log_samples_every": 1,
|
| 814 |
"max_logged_samples": 50,
|
|
@@ -819,15 +886,15 @@
|
|
| 819 |
"quantized_kv_start": 10,
|
| 820 |
"verbose": true,
|
| 821 |
"use_wandb": true,
|
| 822 |
-
"wandb_project": "reasonable-qwen3-4b-mlx-
|
| 823 |
"wandb_entity": null,
|
| 824 |
"wandb_run_name": null,
|
| 825 |
-
"resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/
|
| 826 |
"allow_cross_arch_ref": false,
|
| 827 |
"align_bridge_path": null,
|
| 828 |
"align_bridge_weight": 1.0,
|
| 829 |
"align_pool": "mean",
|
| 830 |
"align_after_tag": "</think>",
|
| 831 |
-
"effective_batch_size":
|
| 832 |
}
|
| 833 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"global_step": 20,
|
| 3 |
+
"num_updates": 20,
|
| 4 |
"use_lora": false,
|
| 5 |
"rng_state": {
|
| 6 |
"python": [
|
| 7 |
3,
|
| 8 |
[
|
| 9 |
+
3216301271,
|
| 10 |
+
1638589946,
|
| 11 |
+
2537792956,
|
| 12 |
+
1791281964,
|
| 13 |
+
3760261316,
|
| 14 |
+
922243971,
|
| 15 |
+
1321160261,
|
| 16 |
+
1084778220,
|
| 17 |
+
586720584,
|
| 18 |
+
1270082830,
|
| 19 |
+
1488723237,
|
| 20 |
+
2882031569,
|
| 21 |
+
472302726,
|
| 22 |
+
2704091108,
|
| 23 |
+
2056781727,
|
| 24 |
+
2970816838,
|
| 25 |
+
2322412756,
|
| 26 |
+
1632365467,
|
| 27 |
+
1300809479,
|
| 28 |
+
3361280950,
|
| 29 |
+
3731722602,
|
| 30 |
+
2265652514,
|
| 31 |
+
66405514,
|
| 32 |
+
1998964901,
|
| 33 |
+
1328256060,
|
| 34 |
+
3229765999,
|
| 35 |
+
3472921105,
|
| 36 |
+
2861524040,
|
| 37 |
+
3758860811,
|
| 38 |
+
1614718258,
|
| 39 |
+
753983115,
|
| 40 |
+
2957522965,
|
| 41 |
+
1530571560,
|
| 42 |
+
4031589591,
|
| 43 |
+
2433316409,
|
| 44 |
+
3368174648,
|
| 45 |
+
2993816745,
|
| 46 |
+
4197347992,
|
| 47 |
+
1573952526,
|
| 48 |
+
344497567,
|
| 49 |
+
3200698952,
|
| 50 |
+
3702339252,
|
| 51 |
+
1865014020,
|
| 52 |
+
3674977437,
|
| 53 |
+
1009076463,
|
| 54 |
+
3380727117,
|
| 55 |
+
1376568460,
|
| 56 |
+
1093055879,
|
| 57 |
+
2664677007,
|
| 58 |
+
3831752380,
|
| 59 |
+
3416303751,
|
| 60 |
+
2668336320,
|
| 61 |
+
2536986437,
|
| 62 |
+
2176404736,
|
| 63 |
+
2459553842,
|
| 64 |
+
3064478178,
|
| 65 |
+
134356449,
|
| 66 |
+
1016307678,
|
| 67 |
+
2863417742,
|
| 68 |
+
4204930386,
|
| 69 |
+
4005432546,
|
| 70 |
+
3941864302,
|
| 71 |
+
1389063615,
|
| 72 |
+
299902141,
|
| 73 |
+
1534902877,
|
| 74 |
+
1090782544,
|
| 75 |
+
1342837280,
|
| 76 |
+
1509926241,
|
| 77 |
+
692845405,
|
| 78 |
+
3324579114,
|
| 79 |
+
1823107754,
|
| 80 |
+
421705736,
|
| 81 |
+
3293085396,
|
| 82 |
+
1111043475,
|
| 83 |
+
1268323190,
|
| 84 |
+
676931135,
|
| 85 |
+
204668262,
|
| 86 |
+
1893728502,
|
| 87 |
+
4037067929,
|
| 88 |
+
1849257882,
|
| 89 |
+
93068610,
|
| 90 |
+
4059570703,
|
| 91 |
+
3045179051,
|
| 92 |
+
3539616529,
|
| 93 |
+
957175217,
|
| 94 |
+
3886104598,
|
| 95 |
+
780945561,
|
| 96 |
+
643091166,
|
| 97 |
+
1423576215,
|
| 98 |
+
981472631,
|
| 99 |
+
3487959523,
|
| 100 |
+
2218847375,
|
| 101 |
+
1763709917,
|
| 102 |
+
3646797235,
|
| 103 |
+
1339613935,
|
| 104 |
+
4010160562,
|
| 105 |
+
2587755169,
|
| 106 |
+
2435965615,
|
| 107 |
+
2653610932,
|
| 108 |
+
1547879087,
|
| 109 |
+
4227811445,
|
| 110 |
+
2292992450,
|
| 111 |
+
1438139136,
|
| 112 |
+
605911038,
|
| 113 |
+
2540438098,
|
| 114 |
+
2258228213,
|
| 115 |
+
3803308885,
|
| 116 |
+
1167308659,
|
| 117 |
+
336102497,
|
| 118 |
+
80858507,
|
| 119 |
+
1067538030,
|
| 120 |
+
3633040700,
|
| 121 |
+
4117903847,
|
| 122 |
+
2873585794,
|
| 123 |
+
2524408933,
|
| 124 |
+
3246498222,
|
| 125 |
+
1283032620,
|
| 126 |
+
3918636776,
|
| 127 |
+
3851263884,
|
| 128 |
+
798301987,
|
| 129 |
+
1112440379,
|
| 130 |
+
4277952394,
|
| 131 |
+
629717336,
|
| 132 |
+
3920106767,
|
| 133 |
+
2725791487,
|
| 134 |
+
863162233,
|
| 135 |
+
2484669209,
|
| 136 |
+
524662217,
|
| 137 |
+
594795402,
|
| 138 |
+
646610193,
|
| 139 |
+
2016114842,
|
| 140 |
+
3543138824,
|
| 141 |
+
1744105850,
|
| 142 |
+
1471458831,
|
| 143 |
+
16055520,
|
| 144 |
+
3451238897,
|
| 145 |
+
2649894194,
|
| 146 |
+
28079149,
|
| 147 |
+
1717400745,
|
| 148 |
+
3843861055,
|
| 149 |
+
1729621288,
|
| 150 |
+
2382674851,
|
| 151 |
+
1482280295,
|
| 152 |
+
2484330875,
|
| 153 |
+
2652905708,
|
| 154 |
+
1852970052,
|
| 155 |
+
922832256,
|
| 156 |
+
981750678,
|
| 157 |
+
3603955815,
|
| 158 |
+
48872518,
|
| 159 |
+
4120226148,
|
| 160 |
+
3468729345,
|
| 161 |
+
1359680933,
|
| 162 |
+
2035523113,
|
| 163 |
+
1636218722,
|
| 164 |
+
1135300069,
|
| 165 |
+
843565866,
|
| 166 |
+
350364929,
|
| 167 |
+
610970006,
|
| 168 |
+
1333520763,
|
| 169 |
+
40543674,
|
| 170 |
+
4086935670,
|
| 171 |
+
3608689159,
|
| 172 |
+
127008503,
|
| 173 |
+
33165965,
|
| 174 |
+
1487923785,
|
| 175 |
+
557670011,
|
| 176 |
+
2547038282,
|
| 177 |
+
2692324307,
|
| 178 |
+
896487775,
|
| 179 |
+
3009940986,
|
| 180 |
+
884172805,
|
| 181 |
+
3134586463,
|
| 182 |
+
1896640741,
|
| 183 |
+
1951622258,
|
| 184 |
+
3818751434,
|
| 185 |
+
3774872157,
|
| 186 |
+
4265178116,
|
| 187 |
+
22018610,
|
| 188 |
+
2771183949,
|
| 189 |
+
1092356279,
|
| 190 |
+
3626563666,
|
| 191 |
+
3056832186,
|
| 192 |
+
466823777,
|
| 193 |
+
26528483,
|
| 194 |
+
3306111213,
|
| 195 |
+
2787064354,
|
| 196 |
+
1301381161,
|
| 197 |
+
1357275651,
|
| 198 |
+
3729056079,
|
| 199 |
+
3637929366,
|
| 200 |
+
118695465,
|
| 201 |
+
3561014385,
|
| 202 |
+
3485944733,
|
| 203 |
+
1880147723,
|
| 204 |
+
2851148920,
|
| 205 |
+
566721308,
|
| 206 |
+
1585692209,
|
| 207 |
+
2174728738,
|
| 208 |
+
3332856867,
|
| 209 |
+
3012044400,
|
| 210 |
+
594669592,
|
| 211 |
+
1085256034,
|
| 212 |
+
1911387417,
|
| 213 |
+
1808424278,
|
| 214 |
+
49725467,
|
| 215 |
+
3671361089,
|
| 216 |
+
3018318529,
|
| 217 |
+
1321357173,
|
| 218 |
+
1264096094,
|
| 219 |
+
2609276892,
|
| 220 |
+
3780658123,
|
| 221 |
+
3493487793,
|
| 222 |
+
1968502013,
|
| 223 |
+
1590268860,
|
| 224 |
+
1027860715,
|
| 225 |
+
3864311766,
|
| 226 |
+
3552593723,
|
| 227 |
+
3210731206,
|
| 228 |
+
2822056447,
|
| 229 |
+
2307565781,
|
| 230 |
+
3348916517,
|
| 231 |
+
4141557819,
|
| 232 |
+
1845648049,
|
| 233 |
+
3846513828,
|
| 234 |
+
4062979700,
|
| 235 |
+
1920414511,
|
| 236 |
+
419748265,
|
| 237 |
+
2517978192,
|
| 238 |
+
2209214935,
|
| 239 |
+
4000522468,
|
| 240 |
+
3350224734,
|
| 241 |
+
1274601663,
|
| 242 |
+
238404923,
|
| 243 |
+
2695642219,
|
| 244 |
+
3189244541,
|
| 245 |
+
1576264231,
|
| 246 |
+
584860670,
|
| 247 |
+
1162133805,
|
| 248 |
+
1888475678,
|
| 249 |
+
1657591314,
|
| 250 |
+
1973127602,
|
| 251 |
+
4181341606,
|
| 252 |
+
328594839,
|
| 253 |
+
2972579801,
|
| 254 |
+
64884668,
|
| 255 |
+
57352394,
|
| 256 |
+
3366585502,
|
| 257 |
+
1332667901,
|
| 258 |
+
259198561,
|
| 259 |
+
2770398609,
|
| 260 |
+
2623481028,
|
| 261 |
+
4151229829,
|
| 262 |
+
4187964972,
|
| 263 |
+
1479448580,
|
| 264 |
+
3089063047,
|
| 265 |
+
3738968187,
|
| 266 |
+
2316267302,
|
| 267 |
+
1628900130,
|
| 268 |
+
57574938,
|
| 269 |
+
3860463424,
|
| 270 |
+
2550525000,
|
| 271 |
+
1378666729,
|
| 272 |
+
440952975,
|
| 273 |
+
3063789057,
|
| 274 |
+
2162250394,
|
| 275 |
+
392699033,
|
| 276 |
+
1437494348,
|
| 277 |
+
3730138982,
|
| 278 |
+
843908175,
|
| 279 |
+
1654216796,
|
| 280 |
+
1149264807,
|
| 281 |
+
3269464133,
|
| 282 |
+
1643181417,
|
| 283 |
+
223165507,
|
| 284 |
+
1833610851,
|
| 285 |
+
2905543205,
|
| 286 |
+
1581267380,
|
| 287 |
+
552340655,
|
| 288 |
+
141518224,
|
| 289 |
+
1375368123,
|
| 290 |
+
1797843945,
|
| 291 |
+
105854190,
|
| 292 |
+
3107423202,
|
| 293 |
+
1729432723,
|
| 294 |
+
1453922408,
|
| 295 |
+
421285811,
|
| 296 |
+
4260773214,
|
| 297 |
+
1663467292,
|
| 298 |
+
3010424040,
|
| 299 |
+
998308271,
|
| 300 |
+
1531244161,
|
| 301 |
+
441858737,
|
| 302 |
+
1026159130,
|
| 303 |
+
3807950868,
|
| 304 |
+
2102254140,
|
| 305 |
+
651977202,
|
| 306 |
+
2742723439,
|
| 307 |
+
3026135341,
|
| 308 |
+
3429613629,
|
| 309 |
+
3093870412,
|
| 310 |
+
3975773274,
|
| 311 |
+
1843002010,
|
| 312 |
+
218264968,
|
| 313 |
+
213172249,
|
| 314 |
+
878259451,
|
| 315 |
+
3304491193,
|
| 316 |
+
592609509,
|
| 317 |
+
1227930501,
|
| 318 |
+
2939504209,
|
| 319 |
+
1028100527,
|
| 320 |
+
910420457,
|
| 321 |
+
4249171053,
|
| 322 |
+
2436625031,
|
| 323 |
+
280632582,
|
| 324 |
+
3824207624,
|
| 325 |
+
3808044241,
|
| 326 |
+
1540222143,
|
| 327 |
+
130394784,
|
| 328 |
+
3235395054,
|
| 329 |
+
1855681800,
|
| 330 |
+
285255351,
|
| 331 |
+
2006409572,
|
| 332 |
+
1456272851,
|
| 333 |
+
1035666983,
|
| 334 |
+
1012020147,
|
| 335 |
+
1728317729,
|
| 336 |
+
2980706887,
|
| 337 |
+
3022227688,
|
| 338 |
+
3391984528,
|
| 339 |
+
379619077,
|
| 340 |
+
2645906665,
|
| 341 |
+
1698735140,
|
| 342 |
+
821721005,
|
| 343 |
+
920711771,
|
| 344 |
+
722909650,
|
| 345 |
+
18039109,
|
| 346 |
+
2815813575,
|
| 347 |
+
1976471014,
|
| 348 |
+
354541559,
|
| 349 |
+
1674510917,
|
| 350 |
+
678839048,
|
| 351 |
+
3832607702,
|
| 352 |
+
4010081893,
|
| 353 |
+
1057864545,
|
| 354 |
+
317734621,
|
| 355 |
+
3197537114,
|
| 356 |
+
372364370,
|
| 357 |
+
2227253439,
|
| 358 |
+
36215654,
|
| 359 |
+
477947141,
|
| 360 |
+
3408831109,
|
| 361 |
+
913622025,
|
| 362 |
+
2005920223,
|
| 363 |
+
1594738861,
|
| 364 |
+
2510486329,
|
| 365 |
+
1289342545,
|
| 366 |
+
2657087163,
|
| 367 |
+
209514413,
|
| 368 |
+
2695143954,
|
| 369 |
+
1593251690,
|
| 370 |
+
1168296992,
|
| 371 |
+
3825393452,
|
| 372 |
+
3829339427,
|
| 373 |
+
3450734470,
|
| 374 |
+
3726943420,
|
| 375 |
+
2873788605,
|
| 376 |
+
4076445439,
|
| 377 |
+
1642259085,
|
| 378 |
+
2820605589,
|
| 379 |
+
1349230573,
|
| 380 |
+
2914266145,
|
| 381 |
+
193008665,
|
| 382 |
+
2400069287,
|
| 383 |
+
739496051,
|
| 384 |
+
3978596673,
|
| 385 |
+
1734592292,
|
| 386 |
+
3830180644,
|
| 387 |
+
687951849,
|
| 388 |
+
2385518411,
|
| 389 |
+
3994677982,
|
| 390 |
+
3353206377,
|
| 391 |
+
3164975800,
|
| 392 |
+
2853766442,
|
| 393 |
+
3048732068,
|
| 394 |
+
1325296937,
|
| 395 |
+
2088553967,
|
| 396 |
+
2507747299,
|
| 397 |
+
2274111218,
|
| 398 |
+
2817243984,
|
| 399 |
+
72033310,
|
| 400 |
+
2914210511,
|
| 401 |
+
1525724516,
|
| 402 |
+
1394521015,
|
| 403 |
+
186442305,
|
| 404 |
+
816713634,
|
| 405 |
+
2091595891,
|
| 406 |
+
2122879665,
|
| 407 |
+
3540000281,
|
| 408 |
+
124073308,
|
| 409 |
+
1552560750,
|
| 410 |
+
4215113149,
|
| 411 |
+
1312110692,
|
| 412 |
+
1262184467,
|
| 413 |
+
76709321,
|
| 414 |
+
3169759337,
|
| 415 |
+
1789679024,
|
| 416 |
+
1259840820,
|
| 417 |
+
1011505219,
|
| 418 |
+
1791532447,
|
| 419 |
+
3407943243,
|
| 420 |
+
772528328,
|
| 421 |
+
924508991,
|
| 422 |
+
2418186248,
|
| 423 |
+
3862012009,
|
| 424 |
+
4079302195,
|
| 425 |
+
3496350600,
|
| 426 |
+
4262944162,
|
| 427 |
+
861726770,
|
| 428 |
+
470271259,
|
| 429 |
+
2830298055,
|
| 430 |
+
4068995925,
|
| 431 |
+
1295520851,
|
| 432 |
+
1695434781,
|
| 433 |
+
4069470402,
|
| 434 |
+
1142976013,
|
| 435 |
+
492294381,
|
| 436 |
+
3873594410,
|
| 437 |
+
2525043471,
|
| 438 |
+
3995502078,
|
| 439 |
+
2809340295,
|
| 440 |
+
941315540,
|
| 441 |
+
2373864457,
|
| 442 |
+
1434156134,
|
| 443 |
+
1234185530,
|
| 444 |
+
2787227707,
|
| 445 |
+
4036418762,
|
| 446 |
+
2529809824,
|
| 447 |
+
4134811789,
|
| 448 |
+
1906789833,
|
| 449 |
+
4001687990,
|
| 450 |
+
1520529512,
|
| 451 |
+
272516287,
|
| 452 |
+
1662764451,
|
| 453 |
+
1228753286,
|
| 454 |
+
1246993659,
|
| 455 |
+
877037883,
|
| 456 |
+
64379529,
|
| 457 |
+
1609700841,
|
| 458 |
+
2449854583,
|
| 459 |
+
2937445549,
|
| 460 |
+
182121114,
|
| 461 |
+
2415096642,
|
| 462 |
+
1326797853,
|
| 463 |
+
994909645,
|
| 464 |
+
1728401166,
|
| 465 |
+
2042093922,
|
| 466 |
+
2083138154,
|
| 467 |
+
630862173,
|
| 468 |
+
1839644782,
|
| 469 |
+
1106108870,
|
| 470 |
+
3114161313,
|
| 471 |
+
1090438833,
|
| 472 |
+
4163538017,
|
| 473 |
+
3067452757,
|
| 474 |
+
3473448680,
|
| 475 |
+
2712244648,
|
| 476 |
+
3433442429,
|
| 477 |
+
3758650831,
|
| 478 |
+
1920842611,
|
| 479 |
+
2784630272,
|
| 480 |
+
3809793223,
|
| 481 |
+
485700721,
|
| 482 |
+
2394860714,
|
| 483 |
+
1191485807,
|
| 484 |
+
1463231400,
|
| 485 |
+
1786607131,
|
| 486 |
+
1627757854,
|
| 487 |
+
3264969659,
|
| 488 |
+
3230336784,
|
| 489 |
+
3221534551,
|
| 490 |
+
3582581842,
|
| 491 |
+
2120261546,
|
| 492 |
+
311188027,
|
| 493 |
+
384727679,
|
| 494 |
+
2995099829,
|
| 495 |
+
2745968316,
|
| 496 |
+
4257849614,
|
| 497 |
+
3614085722,
|
| 498 |
+
319327385,
|
| 499 |
+
3526297819,
|
| 500 |
+
1357165933,
|
| 501 |
+
4161273241,
|
| 502 |
+
509451631,
|
| 503 |
+
684166916,
|
| 504 |
+
1094549025,
|
| 505 |
+
2340900655,
|
| 506 |
+
878685951,
|
| 507 |
+
1079285739,
|
| 508 |
+
2862658053,
|
| 509 |
+
2668448333,
|
| 510 |
+
380528928,
|
| 511 |
+
4217208245,
|
| 512 |
+
2802311413,
|
| 513 |
+
2248154821,
|
| 514 |
+
3231776626,
|
| 515 |
+
211925274,
|
| 516 |
+
2385310660,
|
| 517 |
+
4135810322,
|
| 518 |
+
2183561119,
|
| 519 |
+
372329701,
|
| 520 |
+
1873336116,
|
| 521 |
+
3345280608,
|
| 522 |
+
2404818403,
|
| 523 |
+
83489739,
|
| 524 |
+
773594056,
|
| 525 |
+
3199418238,
|
| 526 |
+
1593518424,
|
| 527 |
+
2753303056,
|
| 528 |
+
3562772212,
|
| 529 |
+
935855294,
|
| 530 |
+
1627465530,
|
| 531 |
+
2356582227,
|
| 532 |
+
2412204406,
|
| 533 |
+
3612892106,
|
| 534 |
+
3582378062,
|
| 535 |
+
1915113233,
|
| 536 |
+
3492335241,
|
| 537 |
+
493539992,
|
| 538 |
+
4111854657,
|
| 539 |
+
1577508821,
|
| 540 |
+
3037703576,
|
| 541 |
+
2617037207,
|
| 542 |
+
2337542484,
|
| 543 |
+
1249034540,
|
| 544 |
+
682590757,
|
| 545 |
+
1569972856,
|
| 546 |
+
3915350413,
|
| 547 |
+
1925553231,
|
| 548 |
+
301159787,
|
| 549 |
+
2156957783,
|
| 550 |
+
3167039132,
|
| 551 |
+
1266518600,
|
| 552 |
+
103091609,
|
| 553 |
+
6603238,
|
| 554 |
+
3398481065,
|
| 555 |
+
1496671537,
|
| 556 |
+
1237235668,
|
| 557 |
+
2074950188,
|
| 558 |
+
459863858,
|
| 559 |
+
619747929,
|
| 560 |
+
3239256589,
|
| 561 |
+
196803419,
|
| 562 |
+
3252960935,
|
| 563 |
+
1459794254,
|
| 564 |
+
3967706347,
|
| 565 |
+
3765913008,
|
| 566 |
+
2459110550,
|
| 567 |
+
309720372,
|
| 568 |
+
4150471162,
|
| 569 |
+
3536834001,
|
| 570 |
+
3611312688,
|
| 571 |
+
2757603619,
|
| 572 |
+
4233589760,
|
| 573 |
+
3138192018,
|
| 574 |
+
781794088,
|
| 575 |
+
4267631241,
|
| 576 |
+
1678966961,
|
| 577 |
+
1957364830,
|
| 578 |
+
3614555434,
|
| 579 |
+
4047113479,
|
| 580 |
+
2143747495,
|
| 581 |
+
315790501,
|
| 582 |
+
3400984168,
|
| 583 |
+
3772021651,
|
| 584 |
+
2072010687,
|
| 585 |
+
2165280713,
|
| 586 |
+
198382056,
|
| 587 |
+
3607795865,
|
| 588 |
+
2619689202,
|
| 589 |
+
469410151,
|
| 590 |
+
3678263974,
|
| 591 |
+
190658400,
|
| 592 |
+
3876953403,
|
| 593 |
+
2375580782,
|
| 594 |
+
3824379237,
|
| 595 |
+
419148754,
|
| 596 |
+
225777110,
|
| 597 |
+
1767601301,
|
| 598 |
+
1715000632,
|
| 599 |
+
2103758356,
|
| 600 |
+
796280675,
|
| 601 |
+
3861354653,
|
| 602 |
+
2535720117,
|
| 603 |
+
282490129,
|
| 604 |
+
4280453973,
|
| 605 |
+
1572850125,
|
| 606 |
+
3926912307,
|
| 607 |
+
2704865264,
|
| 608 |
+
685706099,
|
| 609 |
+
3652913870,
|
| 610 |
+
4129365562,
|
| 611 |
+
2144336147,
|
| 612 |
+
4199977075,
|
| 613 |
+
3698970464,
|
| 614 |
+
1850525854,
|
| 615 |
+
1206240743,
|
| 616 |
+
3518376768,
|
| 617 |
+
480369488,
|
| 618 |
+
5189568,
|
| 619 |
+
2270418116,
|
| 620 |
+
351197706,
|
| 621 |
+
1404922079,
|
| 622 |
+
1359970593,
|
| 623 |
+
25133159,
|
| 624 |
+
2778255563,
|
| 625 |
+
1905723803,
|
| 626 |
+
3873039996,
|
| 627 |
+
2347023852,
|
| 628 |
+
4121935682,
|
| 629 |
+
1561583794,
|
| 630 |
+
232195495,
|
| 631 |
+
1099088568,
|
| 632 |
+
3128551111,
|
| 633 |
+
100
|
| 634 |
],
|
| 635 |
null
|
| 636 |
],
|
| 637 |
"numpy": [
|
| 638 |
"MT19937",
|
| 639 |
+
"[ 22934 3945700911 2671637342 1915830095 191936458 341417143\n 2033828665 3016424735 3092603257 4181149840 3903540489 3407199997\n 599541826 2915161111 4035635543 251134515 879438127 4221200028\n 3341718477 1964740185 4151591116 4046805184 1742823429 3899196843\n 3977126752 3543724328 2327062801 1184128922 1089514307 4050117415\n 1015472466 3507079033 2700213570 3972016737 3207408332 2891719017\n 1546154587 429737895 2952176649 2321914750 2326770708 4081832663\n 3297354190 444500748 162154472 3355543221 816548860 628889243\n 877321815 2061977476 1395156139 3849630021 1523738322 1104051316\n 977581919 3671535538 2034403853 1232397045 2906529406 3174458151\n 327609045 800695110 730325212 3656741643 3718097512 2622783352\n 3655636324 3572764902 3013097373 3928688896 3991021941 3433333205\n 2604449718 4017813069 3983862544 2993875402 2867479604 1919405979\n 1447318032 55998980 2659600356 1723757583 2309048280 2332754773\n 2340610215 3579702894 4087506519 1143034747 1999124858 4294869216\n 2090292457 1451010019 3291754118 3072231382 3163032322 2089768799\n 3856713334 3306547594 2858316911 4002235492 170400775 822332968\n 3807100462 3741523240 2533590367 1481466650 1515097361 1173043131\n 1958053582 879490840 1467765990 3553500818 3768887461 1673617391\n 3743627096 3372321370 1964258961 1075615557 1796674122 3169934094\n 1111906356 2422745698 1617303642 3117240162 3642405212 3725382136\n 424356741 2174048504 4083148066 2254114694 3654730646 360245068\n 2408508800 1997293263 1633562060 2370160488 1076193114 2324593264\n 1524322692 2378848004 579898346 2303552991 3630341567 3312834491\n 2113377320 2316723646 2406203326 3443671487 2356513728 4235785759\n 3794933154 2394731036 3533067374 3547899034 182309367 4037150222\n 1266964669 1540857545 1468181894 672970210 2178397386 4216413065\n 1776023572 538910188 3032357568 2661310767 226651239 2336810058\n 2481232144 1989740227 3058400564 2252623097 2231481011 4045617538\n 4214405523 323044991 150194123 413562568 1857414554 2967505370\n 3611491052 1983899648 2657489435 681805204 2718689308 1320985231\n 1565270208 3721868768 4008951371 1257675557 4199381490 946476500\n 274010468 2711249205 1013959285 2426830316 76942762 875126487\n 66353817 1262436388 3863682145 746628467 1903219241 3239203475\n 4233733020 356951432 439162742 3784468573 3677322214 3796660010\n 2244201727 3856549284 4287158967 4176780761 1777463768 1875938420\n 3953461249 1500724131 4006492612 481905502 906103538 3410482263\n 3796089346 88730692 3413547956 891379476 3684186054 3142151836\n 3732747066 979220322 2311769232 343595137 702248141 2729560778\n 1092749778 846515242 3061647230 1824084185 1732804134 1915594066\n 46179759 2322925052 2454866984 2455799685 290306103 2831162792\n 3947998984 4214485838 198062937 4114867222 829110851 3176192106\n 3213691396 3677104987 1795633846 2868638258 693255664 1109909681\n 3586774642 3850048664 1540560171 2093012119 2180130356 2926967381\n 2212844635 2651646246 3554164798 3428646428 1804065351 2689766571\n 3468826811 3201710503 518065193 1063960126 2157854088 1411588741\n 2700165928 709294759 4172628985 1958386873 3565571504 1551266744\n 347646487 1518496558 1125740967 979742107 2537714245 1347425826\n 3932583919 672046397 2828316467 2327926904 435630918 2011279811\n 3363435696 2432063174 352002172 3738840597 962065880 2334955363\n 3577848689 3548086055 817852002 172425177 2547847885 284308444\n 2287153918 67107487 2138437871 2722403611 1485321491 3718353745\n 2278818450 3723760649 4098573100 1391372742 158694335 2429291160\n 2489489920 1566561801 1519499880 488619438 298448104 2870331595\n 1904920977 1418696469 3322695722 920687220 3324370188 3483778868\n 3538706685 2249995137 2907074299 586943114 819594176 205863951\n 3705518651 2208985449 1725825929 3727896059 4019379244 4128179936\n 315765733 4059498416 1668929015 340529511 2770599421 4028214774\n 2515320837 224166688 3716930814 140285488 876016208 1636461297\n 3144065554 178815411 1908996099 286321711 324753137 2435270524\n 1714660382 2249021604 2084491496 2767461976 2231641326 1155063433\n 2925434134 3970205267 3573141248 2741782688 3238353756 2560353518\n 1970773648 3724814762 1099883043 3679546081 1568677538 241586888\n 337883746 1161417765 1062467504 4051560941 1985981028 716261464\n 3524305976 705781704 3818181482 2193201904 1002677502 4067297211\n 1569883422 199564738 4137652498 3574421054 1226091675 2512159821\n 3898047415 3450020753 6564904 3627996503 910172084 2251282325\n 610328357 3886405676 3798496031 781412257 67374363 2153357374\n 1270895940 1097241298 3117406681 3818137602 2531480321 3090242764\n 2277891300 2257665629 2588374043 3556847742 1456867315 474981917\n 2451378197 1471457720 4180277411 268220615 2399692587 195339734\n 2240738584 3729235693 3335590802 149705442 2579867352 4181426097\n 3249382634 622969502 2982828040 238557093 3796607693 496597755\n 289069501 4209577544 3198535759 3853794586 1283292567 215077865\n 1888439721 414223621 809652663 4159057394 3925678805 3356630575\n 75461918 3620226201 653229190 1976162467 3254725552 3455322726\n 3214616737 2171290904 1392717580 969574124 1871677672 3729045178\n 2180258251 1738129180 2629430593 371750976 2953949970 2503948067\n 609774297 536824434 1605198544 1761662796 1067984953 1080910166\n 1369438253 689993527 270301583 1060273736 3363331142 583030296\n 513593688 3645813145 2890759588 3931794017 424176782 2609697771\n 3545648595 1902183671 2443746550 3996156461 3859807248 985243754\n 2633696702 3175014681 3691201685 3038271261 4010997803 576803257\n 3979362543 3174820623 2731062037 984021256 2925365790 1888779011\n 360094658 192169859 4156183465 1961819661 177383608 3845065365\n 710112556 550881371 990757095 1660533796 1933183387 3225368005\n 2904633122 3660560805 3132626564 1415092965 504701436 1802126965\n 500031950 78861905 1546217729 1686597389 1026571722 3718216897\n 184421530 2603574483 2220728711 3674466444 803752319 3430086704\n 2106865973 1585670043 1769950170 902701696 2085232282 3990620226\n 1040256193 3414446914 3465126851 2670681311 3966560849 2426164859\n 558096607 2906132766 2685058608 795069919 3500511777 1131731857\n 3884626424 1423215920 3217158015 3105383292 4098863330 3409982450\n 2746710339 2215485908 433480094 1059328903 3026187637 1853458726\n 3563715991 3851501977 295718392 2143321359 3175077566 54416997\n 2627151123 3656301296 1575014171 4198277503 3871020074 4273465196\n 292665355 1772650904 4093969567 3853930447 1882565056 223036522\n 489994776 4094444991 307044724 1501645069 2406301702 2105042911\n 1864944354 4292905180 4008597833 2060965505 3597804240 2809113232\n 324300780 1873376879 1665046970 3222584348 431214993 1175406988\n 1363115001 187409201 3840122287 4242686775 2712099296 592341639\n 3265480865 2159565897 2620231415 1143834122 2432616377 3884245546\n 4019465617 2832670463 1365221687 2021294773 2603870060 3193319119\n 4205260363 4289842387 1634803836 2607790270 2048163994 187085462]",
|
| 640 |
624,
|
| 641 |
0,
|
| 642 |
0.0
|
| 643 |
],
|
| 644 |
+
"mlx": 1766497666,
|
| 645 |
"mlx_key": [
|
| 646 |
0,
|
| 647 |
+
1766461675
|
| 648 |
]
|
| 649 |
},
|
| 650 |
"training_args_snapshot": {
|
| 651 |
+
"output_dir": "outy1266_align_last32",
|
| 652 |
+
"max_kv_size": 1024,
|
| 653 |
+
"model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
|
| 654 |
+
"ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
|
| 655 |
"draft_model_path": null,
|
| 656 |
+
"punish_reopen_think": -10.0,
|
| 657 |
"punish_reopen_answer": -9.0,
|
| 658 |
"low_band": [
|
| 659 |
0,
|
|
|
|
| 661 |
],
|
| 662 |
"mid_band": [
|
| 663 |
16,
|
| 664 |
+
23
|
| 665 |
],
|
| 666 |
"top_band": [
|
| 667 |
+
24,
|
| 668 |
35
|
| 669 |
],
|
| 670 |
+
"low_mul": 0.1,
|
| 671 |
+
"mid_mul": 0.95,
|
| 672 |
+
"top_mul": 1.5,
|
| 673 |
+
"head_mul": 1.2,
|
| 674 |
+
"train_layer_start": 22,
|
| 675 |
"train_layer_end": 35,
|
| 676 |
+
"think_length_target_min": 8,
|
| 677 |
+
"think_length_target_max": 64,
|
| 678 |
+
"think_length_penalty_strength": 0.8,
|
| 679 |
+
"think_length_penalty_type": "exponential",
|
| 680 |
+
"enable_think_length_penalty": true,
|
| 681 |
"run_server": false,
|
| 682 |
"use_paged_kv_cache": true,
|
| 683 |
"kv_cache_block_size": 16,
|
|
|
|
| 688 |
"benchmark_split": "test",
|
| 689 |
"benchmark_prompt_key": "question",
|
| 690 |
"benchmark_answer_key": "answer",
|
| 691 |
+
"benchmark_samples": 10,
|
| 692 |
"benchmark_max_new_tokens": 196,
|
| 693 |
"benchmark_temperature": 0.0,
|
| 694 |
"benchmark_top_p": 1.0,
|
| 695 |
"benchmark_top_k": 0,
|
| 696 |
"benchmark_use_chat_template": true,
|
| 697 |
"benchmark_stop_on_error": false,
|
| 698 |
+
"min_think_tokens": 16,
|
| 699 |
"think_end_early_bias": -12.0,
|
| 700 |
"bias_answer_start_after_min_think": true,
|
| 701 |
+
"train_dataset_path": "strat/train.jsonl",
|
| 702 |
+
"val_dataset_path": "strat/valid.jsonl",
|
| 703 |
"dataset_name": null,
|
| 704 |
"dataset_config": null,
|
| 705 |
"dataset_train_split": "train",
|
|
|
|
| 708 |
"dataset_answer_key": "completion",
|
| 709 |
"dataset_filter_keywords": [
|
| 710 |
"http://",
|
| 711 |
+
"**other**",
|
| 712 |
"https://",
|
| 713 |
"png",
|
| 714 |
"jpg",
|
| 715 |
+
"Another way",
|
| 716 |
+
"Adeel"
|
| 717 |
],
|
| 718 |
"max_prompt_len": 350,
|
| 719 |
+
"max_gen_len": 384,
|
| 720 |
"system_prompt": null,
|
| 721 |
"think_start_tag": "<think>",
|
| 722 |
"think_end_tag": "</think>",
|
| 723 |
"answer_start_tag": "<answer>",
|
| 724 |
"answer_end_tag": "</answer>",
|
| 725 |
+
"think_boost_tokens": 1,
|
| 726 |
+
"think_temperature": 0.35,
|
| 727 |
+
"answer_temperature": 0.2,
|
| 728 |
+
"sampling_top_p": 0.6,
|
| 729 |
+
"sampling_min_p": 0.0,
|
| 730 |
+
"sampling_top_k": 60,
|
| 731 |
"repetition_penalty": 1.1,
|
| 732 |
"repetition_context_size": 20,
|
| 733 |
"hard_mask_mcq_first_token": true,
|
|
|
|
| 741 |
"bias_answer_start": 6.0,
|
| 742 |
"punish_extra_think_end": -12.0,
|
| 743 |
"bias_eos_after_answer": 3.0,
|
| 744 |
+
"allow_tool_calls": true,
|
| 745 |
+
"tool_call_penalty": 0.0,
|
| 746 |
+
"reward_content_type": "steps",
|
| 747 |
"reward_format_weight": 0.05,
|
| 748 |
"reward_content_weight": 0.7,
|
| 749 |
"think_reward_weight": 0.25,
|
| 750 |
+
"think_len_min": 16,
|
| 751 |
+
"think_len_max": 64,
|
| 752 |
"non_ascii_penalty": 1.0,
|
| 753 |
"off_topic_jaccard_threshold": 0.05,
|
| 754 |
"off_topic_penalty": 1.0,
|
| 755 |
+
"ban_keywords": [],
|
| 756 |
+
"ban_penalty": 3.0,
|
| 757 |
+
"ban_phrases_for_bias": [
|
| 758 |
+
"I think the answer",
|
| 759 |
+
"I believe that",
|
| 760 |
+
"In my view",
|
| 761 |
+
"From what I can tell",
|
| 762 |
+
"It seems to me",
|
| 763 |
+
"It appears that",
|
| 764 |
+
"My understanding is",
|
| 765 |
+
"As far as I know",
|
| 766 |
+
"Let me start by",
|
| 767 |
+
"Let me first",
|
| 768 |
+
"I should probably",
|
| 769 |
+
"I need to figure out",
|
| 770 |
+
"I'm trying to",
|
| 771 |
+
"I'm going to try",
|
| 772 |
+
"I'll attempt to",
|
| 773 |
+
"Confused",
|
| 774 |
+
"stuck",
|
| 775 |
"frustrated",
|
| 776 |
+
"frustrating",
|
| 777 |
+
"Alternatively",
|
| 778 |
+
"Actually",
|
| 779 |
+
"Probably not sure",
|
| 780 |
+
"Uncertain about",
|
| 781 |
+
"Unclear whether",
|
| 782 |
+
"I'm guessing that",
|
| 783 |
+
"maybe this is",
|
| 784 |
+
"Could be that",
|
| 785 |
+
"Might be because",
|
| 786 |
+
"I'm not 100% sure",
|
| 787 |
+
"I'm not sure if",
|
| 788 |
+
"I'm not certain",
|
| 789 |
+
"Hard to say",
|
| 790 |
+
"Difficult to tell",
|
| 791 |
+
"Circular reasoning detected",
|
| 792 |
+
"In some way or another",
|
| 793 |
+
"Magically works",
|
| 794 |
+
"For some unknown reason",
|
| 795 |
+
"Too complicated",
|
| 796 |
+
"It just somehow",
|
| 797 |
+
"Something seems off",
|
| 798 |
+
"False assumption",
|
| 799 |
+
"Insufficient information to",
|
| 800 |
+
"Wait, what if",
|
| 801 |
+
"Wait, actually no",
|
| 802 |
+
"Wait, on second thought",
|
| 803 |
+
"Hold on, maybe",
|
| 804 |
+
"Hmm, perhaps",
|
| 805 |
+
"Or wait, could",
|
| 806 |
+
"Looking at this more closely",
|
| 807 |
+
"Upon further reflection",
|
| 808 |
+
"Taking a step back",
|
| 809 |
+
"Thinking about it more",
|
| 810 |
+
"Now that I consider",
|
| 811 |
+
"When I really think",
|
| 812 |
+
"If I had to guess",
|
| 813 |
+
"To be completely honest",
|
| 814 |
+
"In all honesty",
|
| 815 |
+
"You know what",
|
| 816 |
+
"The thing is",
|
| 817 |
+
"What I mean is",
|
| 818 |
+
"In other words",
|
| 819 |
+
"Put simply",
|
| 820 |
+
"Basically what happens",
|
| 821 |
+
"Long story short",
|
| 822 |
+
"At the end of the day"
|
| 823 |
],
|
| 824 |
+
"encourage_phrases_for_bias": [],
|
| 825 |
+
"encourage_think_bias": 4.5,
|
| 826 |
+
"ban_think_bias": -3.0,
|
| 827 |
+
"symbolic_bonus_per_token": 0.07,
|
| 828 |
+
"max_words_per_think_line": 12,
|
| 829 |
+
"verbosity_penalty_per_word": 0.01,
|
| 830 |
+
"min_unique_token_ratio": 0.75,
|
| 831 |
+
"low_diversity_penalty": 0.5,
|
| 832 |
+
"telegram_style_bonus": 0.25,
|
| 833 |
"use_lora": false,
|
| 834 |
"lora_rank": 8,
|
| 835 |
"lora_alpha": 16.0,
|
|
|
|
| 844 |
"up_proj",
|
| 845 |
"down_proj"
|
| 846 |
],
|
| 847 |
+
"num_rollout_samples": 2,
|
| 848 |
"ppo_batch_size": 1,
|
| 849 |
+
"grpo_beta": 0.005,
|
| 850 |
+
"learning_rate": 3e-05,
|
| 851 |
"optimizer_beta1": 0.9,
|
| 852 |
"optimizer_beta2": 0.95,
|
| 853 |
"optimizer_weight_decay": 0.05,
|
| 854 |
+
"grad_clip_norm": 0.35,
|
| 855 |
"save_optimizer_state": false,
|
| 856 |
"lr_schedule_config": {
|
| 857 |
"name": "cosine_decay",
|
| 858 |
"arguments": [
|
| 859 |
+
3e-05,
|
| 860 |
60000,
|
| 861 |
+
5e-08
|
| 862 |
],
|
| 863 |
"warmup": 4000,
|
| 864 |
"warmup_init": 1e-08
|
| 865 |
},
|
| 866 |
+
"grad_accum_steps": 1,
|
| 867 |
"num_training_steps": 45869,
|
| 868 |
+
"save_every": 5,
|
| 869 |
+
"eval_every": 10,
|
| 870 |
+
"seed": 22934,
|
| 871 |
"shuffle_data": true,
|
| 872 |
"use_grad_checkpointing": false,
|
| 873 |
"grad_checkpoint_layers": 0,
|
|
|
|
| 875 |
"early_stopping_threshold": 0.005,
|
| 876 |
"min_trainable_layers": 4,
|
| 877 |
"use_custom_batch_builder": true,
|
| 878 |
+
"invalid_sample_layers": "33,34,35",
|
| 879 |
"invalid_sample_frequency": 2,
|
| 880 |
"log_samples_every": 1,
|
| 881 |
"max_logged_samples": 50,
|
|
|
|
| 886 |
"quantized_kv_start": 10,
|
| 887 |
"verbose": true,
|
| 888 |
"use_wandb": true,
|
| 889 |
+
"wandb_project": "reasonable-qwen3-4b-mlx-two",
|
| 890 |
"wandb_entity": null,
|
| 891 |
"wandb_run_name": null,
|
| 892 |
+
"resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last32/checkpoint_20251223_144746_periodic_update_20",
|
| 893 |
"allow_cross_arch_ref": false,
|
| 894 |
"align_bridge_path": null,
|
| 895 |
"align_bridge_weight": 1.0,
|
| 896 |
"align_pool": "mean",
|
| 897 |
"align_after_tag": "</think>",
|
| 898 |
+
"effective_batch_size": 2
|
| 899 |
}
|
| 900 |
}
|
xspecial_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|