adeelahmad commited on
Commit
75edb1a
·
verified ·
1 Parent(s): ce43786

Upload folder using huggingface_hub

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
README.md CHANGED
@@ -1,7 +1,9 @@
1
  ---
2
- language: en
3
- pipeline_tag: text-generation
4
  library_name: mlx
 
 
 
 
5
  tags:
6
  - mlx
7
  ---
 
1
  ---
 
 
2
  library_name: mlx
3
+ license: apache-2.0
4
+ license_link: https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507/blob/main/LICENSE
5
+ pipeline_tag: text-generation
6
+ base_model: Qwen/Qwen3-4B-Thinking-2507
7
  tags:
8
  - mlx
9
  ---
ReasonableQwen3-4B-BF16.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54559b51f3d661c587daf366600f8f71b313ac8d19680e63255f4ca645ec1df3
3
- size 8051285280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f95bc6d806e7c98baa9308c5d0f4055d2b917bbc721bd0201d36f14bd412444
3
+ size 8051285184
ReasonableQwen3-4B-Q3_K.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad4776c9da37d2cb15590b56f9c2fe9d7c2b29fbcfb80fd346a6fc8b50a7b84e
3
- size 2075618080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f183fb995f35a3e13305a3e787bdb988f6643ee56b5c7a964d1a7b4e1d7ad4cd
3
+ size 2075617984
ReasonableQwen3-4B-Q4_K.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac988da105d7df38d78b73554134b7f4a23bb5143eed239f0bba2fa12ba012c5
3
- size 2497280800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e162f04a69f148beaadc70cccc515d260a1973b14efde5cd13d124118a35ac2
3
+ size 2497280704
ReasonableQwen3-4B-Q8_0.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f973234d65784567507061d628e5b3f3b17c511193d14f4d09572869694fcf5f
3
- size 4280405280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e1cf3d4afe75e385d5ba30ba548492378b33d84b228ede020ea537fba80fa0
3
+ size 4280405184
adapter_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "adapters/turn80",
3
+ "alpha": [
4
+ 1e-05
5
+ ],
6
+ "batch_size": 1,
7
+ "beta": 0.02,
8
+ "config": null,
9
+ "data": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/strat",
10
+ "delta": 50.0,
11
+ "dpo_cpo_loss_type": "sigmoid",
12
+ "epochs": null,
13
+ "epsilon": 0.0001,
14
+ "epsilon_high": 0.02,
15
+ "fuse": true,
16
+ "grad_checkpoint": false,
17
+ "gradient_accumulation_steps": 2,
18
+ "group_size": 2,
19
+ "grpo_loss_type": "dr_grpo",
20
+ "importance_sampling_level": "token",
21
+ "iters": 1000,
22
+ "judge": "mlx-community/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2-4-bit",
23
+ "judge_config": {},
24
+ "learning_rate": 1e-06,
25
+ "list_reward_functions": false,
26
+ "load_in_4bits": true,
27
+ "load_in_6bits": false,
28
+ "load_in_8bits": false,
29
+ "lora_parameters": {
30
+ "rank": 64,
31
+ "alpha": 128,
32
+ "dropout": 0.0,
33
+ "scale": 2.0
34
+ },
35
+ "lr_schedule": null,
36
+ "mask_prompt": false,
37
+ "max_completion_length": 512,
38
+ "max_seq_length": 2048,
39
+ "model": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
40
+ "num_layers": -1,
41
+ "optimizer": "adamw",
42
+ "optimizer_config": {
43
+ "adam": {},
44
+ "adamw": {},
45
+ "muon": {},
46
+ "qhadam": {}
47
+ },
48
+ "reference_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
49
+ "resume_adapter_file": "adapters/turn80/adapters.safetensors",
50
+ "reward_functions": "r1_semantic_similarity_reward,r1_conditional_content_reward,r1_velocity_to_correct_thinking_reward,r1_format_reward,r1_tag_structure_reward,r1_thinking_quality_reward",
51
+ "reward_functions_file": null,
52
+ "reward_scaling": 1.0,
53
+ "reward_weights": "[0.25, 0.25, 0.20, 0.10, 0.10, 0.10]",
54
+ "save_every": 6,
55
+ "seed": 360,
56
+ "steps_per_eval": 50,
57
+ "steps_per_report": 1,
58
+ "temperature": 0.8,
59
+ "test": false,
60
+ "test_batches": 500,
61
+ "train": true,
62
+ "train_mode": "grpo",
63
+ "train_type": "lora",
64
+ "val_batches": 1,
65
+ "wandb": "mlx-lm-grpo-v3.16"
66
+ }
metadata.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "step": 32,
3
+ "epoch": 13,
4
+ "best_loss": -0.00902040209621191,
5
+ "best_loss_step": 11,
6
+ "reason": "regular",
7
+ "total_tokens": 3840,
8
+ "save_optimizer_state": true,
9
+ "training_config": {
10
+ "learning_rate": 3e-06,
11
+ "batch_size": 1,
12
+ "grad_accum_steps": 1
13
+ },
14
+ "current_metric": -0.00902040209621191,
15
+ "timestamp": "20251229_000031",
16
+ "save_duration_s": 53.69711899757385
17
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45910c6f9bd4d83dbd6ba94ee446cc59e8089fe944eef091bd0eaec06b00449f
3
- size 8044982009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2591a76dd02582af815350ec1d35ad1612ca576f1238fbeb74a1d293bcc752
3
+ size 8044982021
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d47c87966b4db779200053de490f89936ed529f8ab889244e271630715fcfe
3
+ size 11422638
training_state.json CHANGED
@@ -1,659 +1,659 @@
1
  {
2
- "global_step": 640,
3
- "num_updates": 640,
4
  "use_lora": false,
5
  "rng_state": {
6
  "python": [
7
  3,
8
  [
9
- 3100233166,
10
- 3205251947,
11
- 2114003297,
12
- 4083397361,
13
- 3944152784,
14
- 1030840702,
15
- 2037797290,
16
- 2255135174,
17
- 3606282055,
18
- 1805506540,
19
- 1511271408,
20
- 1355721171,
21
- 3219242983,
22
- 2147281139,
23
- 1663895881,
24
- 2744272328,
25
- 586124618,
26
- 2904233013,
27
- 1951649306,
28
- 568843218,
29
- 804729164,
30
- 1031240685,
31
- 632815444,
32
- 1583468126,
33
- 3569217402,
34
- 1474578133,
35
- 1620621069,
36
- 4081348163,
37
- 660493223,
38
- 548758843,
39
- 3288411447,
40
- 2761565335,
41
- 651306001,
42
- 3364357190,
43
- 574611538,
44
- 3080366693,
45
- 648102770,
46
- 662716411,
47
- 65202676,
48
- 1627080032,
49
- 2475536053,
50
- 939008240,
51
- 1513946789,
52
- 2550074718,
53
- 3473906356,
54
- 836348979,
55
- 396042796,
56
- 2909821569,
57
- 2791572300,
58
- 3313263004,
59
- 434385456,
60
- 2799243929,
61
- 1110673623,
62
- 10708347,
63
- 1193947720,
64
- 4154063907,
65
- 3769310438,
66
- 1825986973,
67
- 1865856087,
68
- 2550327576,
69
- 635176254,
70
- 343483848,
71
- 627331621,
72
- 4032575651,
73
- 912584999,
74
- 2184863449,
75
- 2003242393,
76
- 191158593,
77
- 4027956150,
78
- 135767775,
79
- 165725602,
80
- 2940340686,
81
- 1735550527,
82
- 2166976964,
83
- 2935306830,
84
- 1440736616,
85
- 2136544928,
86
- 1238006856,
87
- 904424546,
88
- 4116526595,
89
- 4005584629,
90
- 2601760449,
91
- 271648674,
92
- 139094063,
93
- 3602969447,
94
- 414504323,
95
- 1053832962,
96
- 4134761489,
97
- 3249289190,
98
- 1358383408,
99
- 195832611,
100
- 1843593352,
101
- 2969447193,
102
- 3158396779,
103
- 3569807150,
104
- 4160044175,
105
- 2081938636,
106
- 1429144436,
107
- 385540495,
108
- 4178096289,
109
- 1917190049,
110
- 2474710463,
111
- 3123380742,
112
- 2902614650,
113
- 845733750,
114
- 728346900,
115
- 1111266260,
116
- 1230890723,
117
- 3960864409,
118
- 1864677806,
119
- 2465725907,
120
- 1846265707,
121
- 2510053619,
122
- 4283547983,
123
- 2002390956,
124
- 3488389929,
125
- 391769952,
126
- 3822866039,
127
- 1599671852,
128
- 501962155,
129
- 774437297,
130
- 542827104,
131
- 36072073,
132
- 443060080,
133
- 689178575,
134
- 86655597,
135
- 825092402,
136
- 625421810,
137
- 1794213612,
138
- 1871893587,
139
- 3436665270,
140
- 631289196,
141
- 3067538162,
142
- 552776492,
143
- 627440404,
144
- 2105634472,
145
- 548648577,
146
- 636698996,
147
- 947106975,
148
- 3431407759,
149
- 2240487779,
150
- 3128108668,
151
- 521416466,
152
- 2871250081,
153
- 1062946757,
154
- 1608040378,
155
- 2590414279,
156
- 3358166288,
157
- 1253809383,
158
- 3726438328,
159
- 281309528,
160
- 1669515180,
161
- 3273041510,
162
- 2260421329,
163
- 996425797,
164
- 292515881,
165
- 2296689583,
166
- 2805798357,
167
- 2032275171,
168
- 1538312337,
169
- 643565956,
170
- 3359622205,
171
- 3745536289,
172
- 2003084236,
173
- 3869804701,
174
- 2724155627,
175
- 2996948495,
176
- 4184607962,
177
- 4215169557,
178
- 430613231,
179
- 1647692967,
180
- 1793331314,
181
- 3874676231,
182
- 2730461343,
183
- 3955139318,
184
- 3832613492,
185
- 539666038,
186
- 285792739,
187
- 3198830397,
188
- 4055078950,
189
- 3569499391,
190
- 781279195,
191
- 2220425539,
192
- 2039802440,
193
- 1385766748,
194
- 506267236,
195
- 2271648199,
196
- 1568145524,
197
- 2554254342,
198
- 758016416,
199
- 3621980520,
200
- 160767585,
201
- 2188462111,
202
- 1450007015,
203
- 1105079920,
204
- 3276242179,
205
- 3640324766,
206
- 2860750163,
207
- 2143715191,
208
- 1831279561,
209
- 2293429158,
210
- 739306744,
211
- 2043528556,
212
- 3882672526,
213
- 546842616,
214
- 2714277920,
215
- 3887463926,
216
- 1831957765,
217
- 2229915849,
218
- 2864792220,
219
- 1303437412,
220
- 1091254688,
221
- 826344852,
222
- 3120288320,
223
- 556529779,
224
- 811546242,
225
- 1476206000,
226
- 2630584339,
227
- 2863085756,
228
- 3547631685,
229
- 787693537,
230
- 4066961252,
231
- 308882144,
232
- 1599204914,
233
- 582842064,
234
- 2754105741,
235
- 2202249626,
236
- 3301080360,
237
- 3074376018,
238
- 1129640401,
239
- 3543504945,
240
- 1135643549,
241
- 571814866,
242
- 2951061707,
243
- 2139761207,
244
- 4279741136,
245
- 676772801,
246
- 3652048859,
247
- 410817428,
248
- 896263120,
249
- 358049826,
250
- 418456999,
251
- 3572284105,
252
- 2484345908,
253
- 3844208687,
254
- 3067647389,
255
- 1302900203,
256
- 931143928,
257
- 1519365080,
258
- 712789513,
259
- 2595580658,
260
- 1784776098,
261
- 2848139510,
262
- 3505469053,
263
- 1582655988,
264
- 4158436496,
265
- 3330262652,
266
- 2338444089,
267
- 1002216810,
268
- 484453692,
269
- 3833856314,
270
- 2002391132,
271
- 519925212,
272
- 2981935072,
273
- 3077297433,
274
- 239222133,
275
- 1056108488,
276
- 1645014848,
277
- 2673367049,
278
- 51217098,
279
- 1778100088,
280
- 1899905425,
281
- 3987547773,
282
- 1169518869,
283
- 578030579,
284
- 3367779903,
285
- 956133751,
286
- 1760174970,
287
- 1650239768,
288
- 2802504529,
289
- 1103701391,
290
- 121844885,
291
- 3488652418,
292
- 4134091454,
293
- 3048567685,
294
- 2857939785,
295
- 2220271914,
296
- 4024370836,
297
- 931259945,
298
- 2540064234,
299
- 65277599,
300
- 3847535535,
301
- 990871212,
302
- 1272682701,
303
- 2391502819,
304
- 3737351256,
305
- 1335981297,
306
- 2386770941,
307
- 32918208,
308
- 781820036,
309
- 1166374003,
310
- 2556455899,
311
- 510298691,
312
- 159187659,
313
- 2597521830,
314
- 291154034,
315
- 3494951397,
316
- 2944107260,
317
- 3378737570,
318
- 2839441569,
319
- 770671866,
320
- 4283771012,
321
- 165153002,
322
- 4069832089,
323
- 2175511762,
324
- 222931749,
325
- 154419633,
326
- 2538945322,
327
- 3320641862,
328
- 1807339955,
329
- 3706204773,
330
- 2765491230,
331
- 1260202910,
332
- 1924333259,
333
- 4184170411,
334
- 563038778,
335
- 2507175419,
336
- 1076853275,
337
- 143035900,
338
- 3902786410,
339
- 1559868554,
340
- 511327562,
341
- 3462826369,
342
- 3748998442,
343
- 3845225973,
344
- 2080353851,
345
- 1489047293,
346
- 1924679652,
347
- 3795590602,
348
- 3866562819,
349
- 2198666709,
350
- 1386047356,
351
- 518409235,
352
- 3428184188,
353
- 1732776878,
354
- 273783238,
355
- 3723574330,
356
- 1683692783,
357
- 1523451822,
358
- 3699825569,
359
- 4127346852,
360
- 3111140829,
361
- 1605311581,
362
- 4208002359,
363
- 3118728618,
364
- 3463176645,
365
- 1575015318,
366
- 3747413705,
367
- 2206390158,
368
- 2583654895,
369
- 1354557377,
370
- 2183517438,
371
- 3254493976,
372
- 3575291627,
373
- 4028981055,
374
- 3968296862,
375
- 2729291170,
376
- 247729344,
377
- 650485877,
378
- 1318877280,
379
- 285023715,
380
- 2202961239,
381
- 4219685020,
382
- 3779707729,
383
- 917974987,
384
- 4089069944,
385
- 344146767,
386
- 359806484,
387
- 4275723208,
388
- 703513891,
389
- 4224074000,
390
- 2009124160,
391
- 604809477,
392
- 4008576440,
393
- 2427561320,
394
- 2309325311,
395
- 2362683196,
396
- 235168397,
397
- 1600648522,
398
- 4215967893,
399
- 1675054314,
400
- 2013381593,
401
- 1903651062,
402
- 3601280500,
403
- 496338735,
404
- 2169133753,
405
- 2364495966,
406
- 1056246743,
407
- 1421447090,
408
- 1995033995,
409
- 1869864803,
410
- 698922377,
411
- 2409323964,
412
- 1510516618,
413
- 641888605,
414
- 1077564019,
415
- 29567647,
416
- 2785537387,
417
- 1925896177,
418
- 1227126769,
419
- 2748886622,
420
- 1332019659,
421
- 638361144,
422
- 540471184,
423
- 2887245217,
424
- 4182719198,
425
- 884751481,
426
- 3445591454,
427
- 675466383,
428
- 2087852801,
429
- 2083882276,
430
- 414620424,
431
- 3713986203,
432
- 2425060387,
433
- 607871428,
434
- 487946518,
435
- 718902558,
436
- 3786110198,
437
- 1653762457,
438
- 3787648973,
439
- 431397113,
440
- 4108771890,
441
- 402186349,
442
- 2471796344,
443
- 4273010052,
444
- 4248683,
445
- 1754846221,
446
- 2340586638,
447
- 412924166,
448
- 2306146182,
449
- 2417039245,
450
- 1511347613,
451
- 1319385385,
452
- 671950230,
453
- 4188344613,
454
- 1223621563,
455
- 3353660436,
456
- 1926932037,
457
- 3945195786,
458
- 61444401,
459
- 3450186348,
460
- 3982842641,
461
- 2088756605,
462
- 1824246909,
463
- 2656102781,
464
- 3750651125,
465
- 3189574856,
466
- 3078132247,
467
- 588048676,
468
- 1141387549,
469
- 1187369381,
470
- 750263661,
471
- 1681491629,
472
- 2025242095,
473
- 3733996179,
474
- 2457640372,
475
- 956774679,
476
- 1782785055,
477
- 2881116689,
478
- 1311179739,
479
- 66826352,
480
- 2111027409,
481
- 271078370,
482
- 99525850,
483
- 3874156593,
484
- 730907368,
485
- 2126769770,
486
- 2524302228,
487
- 2128545293,
488
- 1371594345,
489
- 2849348044,
490
- 2996788033,
491
- 2591750469,
492
- 3642360292,
493
- 3727095772,
494
- 655210820,
495
- 1162591913,
496
- 1003845048,
497
- 439189944,
498
- 3637207234,
499
- 14122112,
500
- 967296467,
501
- 2664681396,
502
- 2685037860,
503
- 684065770,
504
- 3082029956,
505
- 2341710744,
506
- 187950156,
507
- 2219843456,
508
- 3886109287,
509
- 503801047,
510
- 2410462727,
511
- 2929691138,
512
- 1032873297,
513
- 1688752406,
514
- 2759671101,
515
- 3287456852,
516
- 3986113072,
517
- 1579528064,
518
- 947167093,
519
- 1051079192,
520
- 1049610650,
521
- 2231280468,
522
- 726914012,
523
- 1534264355,
524
- 2904096012,
525
- 4062734554,
526
- 531616723,
527
- 3561831970,
528
- 737998148,
529
- 3230395316,
530
- 3022021563,
531
- 251528766,
532
- 3999173370,
533
- 210218179,
534
- 3088594306,
535
- 1439401799,
536
- 1041859435,
537
- 2695195830,
538
- 3465077144,
539
- 2823089326,
540
- 2556938793,
541
- 1458448626,
542
- 1308700694,
543
- 444948412,
544
- 2304470611,
545
- 3654027317,
546
- 1034742794,
547
- 3810003506,
548
- 1772353215,
549
- 3041681477,
550
- 3394149945,
551
- 440869916,
552
- 3892023657,
553
- 556989640,
554
- 2481299321,
555
- 3529362628,
556
- 3054249201,
557
- 1737493058,
558
- 1235777870,
559
- 1334111971,
560
- 2971707615,
561
- 3873667589,
562
- 1175895135,
563
- 1940930396,
564
- 3443474468,
565
- 2788127120,
566
- 3319749384,
567
- 3328773738,
568
- 1966987235,
569
- 1065977210,
570
- 2939975890,
571
- 3943360814,
572
- 3458298409,
573
- 2200312840,
574
- 2897384251,
575
- 3453182268,
576
- 875764420,
577
- 2531405399,
578
- 912921748,
579
- 4283104359,
580
- 2928545310,
581
- 1224323513,
582
- 722455736,
583
- 3977634571,
584
- 3582693887,
585
- 1125205481,
586
- 1576562126,
587
- 3743288371,
588
- 3499633816,
589
- 735307326,
590
- 281573363,
591
- 1724151404,
592
- 2589762545,
593
- 2902873521,
594
- 3471234108,
595
- 2219383722,
596
- 2061024598,
597
- 814769644,
598
- 3401335543,
599
- 4190237183,
600
- 650711606,
601
- 4101800219,
602
- 2172610273,
603
- 3777607756,
604
- 4042849157,
605
- 2817574694,
606
- 2095924374,
607
- 1909727874,
608
- 2477919892,
609
- 2834537640,
610
- 3637236485,
611
- 1190839625,
612
- 1838707007,
613
- 3247768062,
614
- 2294183597,
615
- 1445875960,
616
- 2333419384,
617
- 335327851,
618
- 3000828940,
619
- 1726549769,
620
- 4275198256,
621
- 3227919305,
622
- 3533288452,
623
- 173416253,
624
- 2262899688,
625
- 3349430595,
626
- 396380904,
627
- 1372995131,
628
- 3355176830,
629
- 4107929401,
630
- 3693751586,
631
- 772959075,
632
- 3498819257,
633
- 370
634
  ],
635
  null
636
  ],
637
  "numpy": [
638
  "MT19937",
639
- "[ 8303 3362861772 1619186349 1762601439 1567969946 1320913452\n 808097991 3993093514 203964437 4072637778 354349567 380629158\n 3082602378 1492811957 2661358354 1249590623 2886239270 990864965\n 1924220491 1117017413 2869102568 114579815 997221561 1716370964\n 100154209 3193787998 982404710 1860404313 1073630932 4015033793\n 1056096168 3689842023 1077486740 3341954026 4020896783 3063022815\n 2319528021 1962385784 560690659 2930834870 2206559276 2815052879\n 130043787 3393045762 2815640209 452914988 784787594 2058459297\n 522114896 1804911297 1630115314 877409298 1852583246 102115168\n 3580881174 1417032320 1630533149 2372212037 1781883965 2270734311\n 186043029 4168788486 1870971191 457187469 1905478881 1029780641\n 2565532871 2731005692 869777530 2775988839 3249018655 3446872659\n 531509720 894321281 654198319 891934678 2363338170 2507473381\n 377446257 1382752484 1528924073 2070637977 1311806794 2451376618\n 1427771612 945417350 1522009908 3572053312 2253905607 977621266\n 3845758580 3469358158 3051280317 2090423736 2607666011 1809711996\n 853558705 2879273014 3672899814 1306126268 1713869557 1877839529\n 279523502 3666825997 459293422 2370684495 1189704139 1015349021\n 1120545757 2386561593 2421857461 4003382178 1622890485 3282519989\n 1952035136 738572312 2523822572 911767899 2136380509 1896989891\n 1469870850 3009084328 221856908 3441096631 270470784 1619212541\n 740841450 576514513 3632273653 1855946639 1695699336 3030088848\n 1996507166 646843328 2358534982 1183223387 2981778442 2176020401\n 3296124201 3815160861 2131117666 702136476 524319258 612243153\n 2880166405 2628502612 1647344768 2202609784 2245804726 1921295769\n 3525029262 4020694584 1240895711 3219125807 1163212123 1637955357\n 484227752 2106690789 2031000210 3514869662 1388324497 1214400369\n 1527848914 4203641314 1519812457 2741431469 3641575601 2047783137\n 1519626504 754352694 6964728 2468828035 3468360593 426009415\n 1756179121 1660553247 3150922374 2230969029 1482946613 414236983\n 3517164135 612220457 796850147 989153350 140311126 788581031\n 4132721565 1096190993 2894713612 2176107331 2878722659 2354637316\n 3799747358 2180737074 3963734962 2848855704 3734852998 2038033214\n 783779745 1197780044 1023946793 320860662 164699352 2983721475\n 17247281 533834018 3106467128 3903824561 1446671114 367871784\n 1942584218 1189132282 620423643 2803988284 1136015948 1345533496\n 2479351381 1216573484 1988507 303531010 754591654 3144907611\n 2902543355 318389020 3558821612 1847659564 1942545315 3246490061\n 456226090 509391991 3127370457 3143677006 2880949988 3448223655\n 3981326494 1148410844 2092401693 3604880377 3731870864 1860602350\n 3125695291 3187627886 2407156366 653809199 1969367167 450324651\n 3735867245 1764865373 3761162820 4171681276 2483404181 1541053582\n 3606137959 902867057 747499155 313576446 3101518390 1151122309\n 3268124694 261383756 1818167552 1213667178 3989930301 2726571133\n 2775483939 21599502 3727576464 2462926602 389566260 1711674513\n 3512151518 3153813056 460030746 3897140307 1277994146 908931170\n 963021246 1901922059 3955106312 1418220142 1659825635 3200240963\n 4014654655 485243719 1742562079 3526699507 3355114702 2632610560\n 1561948906 2717326040 3620881188 4126498438 184973469 1482988566\n 2950358329 2930115694 1859226564 3403387618 2053266671 3742107921\n 1209546566 3725840176 1712851789 2224824875 453012573 330532578\n 3778095708 1688965038 1230854207 2009005995 718461256 208618143\n 1491427571 2449392307 4002056719 383575031 2867208367 1963096702\n 1795247193 533084151 1491616947 1497652603 3562351716 86942950\n 2347771394 3289710405 466562532 1233657915 898220586 3748140507\n 2427008898 636905163 1254503011 1238782967 3895969628 4283203786\n 4221133213 2886931111 922986859 1879645066 2015511083 1108514279\n 618085140 3306423355 883925872 2144496777 1641126658 1153156234\n 2651844403 430020786 3696656792 683284102 3441596990 1725726834\n 2632006081 1159341650 1377340195 2886866895 3027966535 3917695904\n 968627127 734115740 3287543286 3074949140 537776154 2995193263\n 3574191023 2832961611 3705012029 3367494375 4248078694 1627669324\n 3788358613 3604335331 940239830 2942210789 1161729435 3740692795\n 2455348370 4133420619 1569119204 2972012502 2050815010 1235750478\n 3072257451 2550281774 856747230 1451377433 3438563836 1849610784\n 3357774219 1503802415 2962862 592596271 3943668757 2756336697\n 105838547 2324476620 3235171028 878843746 618966330 595747699\n 97437937 515195304 3376271580 1145290896 2696909771 3358253284\n 2241289147 1121727126 1309918765 1578024183 4054463658 720648522\n 1846097360 4230354452 729417651 791205440 1367656674 2065284658\n 257316547 3839072148 1555799865 4173515455 1048353236 4198966861\n 3316898928 3423897098 4128420921 2104108687 1063597492 2492958131\n 170555781 745349162 2255105092 2541678417 1114429555 2011457711\n 3454299228 2209782322 652117672 4201843201 2110523012 446611764\n 2926776128 2841467079 1337488503 559094349 3539441441 897438763\n 1650244025 570474331 1882207147 810573271 1456346777 1200505791\n 1603496382 284750116 3007277310 3591850295 3720918864 4052795532\n 1561428281 1050080487 2700034035 4080219366 1757769003 2977271141\n 1037953655 3360174536 2813217261 1188893730 2515840679 2105685490\n 820204985 1453315288 333847673 3528785562 4193735995 4021786871\n 4076203556 1854592580 3780409371 2202024795 1401490689 2819039973\n 2228184585 3518768958 1055269369 4073237030 1866198915 1516245557\n 2165376112 1043711207 669470737 3834818980 4270028755 3417085697\n 2968889276 98688489 2403155937 3448746372 3462613817 386185689\n 1027700117 1171464898 3241939945 2797737805 2687859495 3456328854\n 3023924679 408796600 2542490520 2599118787 1538079271 2122336513\n 2393755396 2139546979 514323376 3816576631 2541629900 2450712143\n 2261717867 3572816760 4067191955 1657792221 1586877146 2800772470\n 961486036 745526709 2507157371 1459191248 1903853193 683849149\n 4231977639 1380583627 2339345354 596858881 3074507647 3460859756\n 3139491815 3938565238 3481073991 1049607667 1359085055 547890519\n 2545110133 417979414 1202035922 856214628 2660641178 816366367\n 3973682275 1893657609 2826660178 624713403 2057336051 2323752871\n 962687815 3843324978 1759656581 3157113925 3518908725 1572522625\n 2056283828 3437388958 4185176359 572346731 2215666799 2603677242\n 3382517330 2691518768 468897014 1496614219 705901168 2158024815\n 3124935233 2379635120 930064252 2215801391 362554117 645562430\n 1722271932 2211076568 3051429706 1988551089 1573797050 3992652562\n 3060693249 2876070780 3498936580 3109867794 1156582816 4117203926\n 3934399067 1986776843 1268452166 270890072 3094479118 3739080723\n 1318506664 3083453958 2544846318 503702647 1926747215 2246476579\n 2986698595 3759912100 2737612611 1155975174 474590245 3878926844\n 2414533375 801426742 2398150324 724649525 2960527953 1386784808\n 3282520983 964626383 888462871 3054956672 2303752120 4209494225]",
640
  624,
641
  0,
642
  0.0
643
  ],
644
- "mlx": 1758816679,
645
  "mlx_key": [
646
  0,
647
- 1758814100
648
  ]
649
  },
650
  "training_args_snapshot": {
651
- "output_dir": "outy1266_align_last29",
652
- "max_kv_size": 1536,
653
- "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507",
654
- "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507",
655
  "draft_model_path": null,
656
- "punish_reopen_think": -8.0,
657
  "punish_reopen_answer": -9.0,
658
  "low_band": [
659
  0,
@@ -661,18 +661,23 @@
661
  ],
662
  "mid_band": [
663
  16,
664
- 27
665
  ],
666
  "top_band": [
667
- 28,
668
  35
669
  ],
670
- "low_mul": 0.25,
671
- "mid_mul": 0.75,
672
- "top_mul": 1.0,
673
- "head_mul": 1.0,
674
- "train_layer_start": 20,
675
  "train_layer_end": 35,
 
 
 
 
 
676
  "run_server": false,
677
  "use_paged_kv_cache": true,
678
  "kv_cache_block_size": 16,
@@ -683,18 +688,18 @@
683
  "benchmark_split": "test",
684
  "benchmark_prompt_key": "question",
685
  "benchmark_answer_key": "answer",
686
- "benchmark_samples": 999999910,
687
  "benchmark_max_new_tokens": 196,
688
  "benchmark_temperature": 0.0,
689
  "benchmark_top_p": 1.0,
690
  "benchmark_top_k": 0,
691
  "benchmark_use_chat_template": true,
692
  "benchmark_stop_on_error": false,
693
- "min_think_tokens": 64,
694
  "think_end_early_bias": -12.0,
695
  "bias_answer_start_after_min_think": true,
696
- "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
697
- "val_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/valid.jsonl",
698
  "dataset_name": null,
699
  "dataset_config": null,
700
  "dataset_train_split": "train",
@@ -703,25 +708,26 @@
703
  "dataset_answer_key": "completion",
704
  "dataset_filter_keywords": [
705
  "http://",
 
706
  "https://",
707
  "png",
708
  "jpg",
709
- "json",
710
- "JSON"
711
  ],
712
  "max_prompt_len": 350,
713
- "max_gen_len": 128,
714
  "system_prompt": null,
715
  "think_start_tag": "<think>",
716
  "think_end_tag": "</think>",
717
  "answer_start_tag": "<answer>",
718
  "answer_end_tag": "</answer>",
719
- "think_boost_tokens": 128,
720
- "think_temperature": 0.15,
721
- "answer_temperature": 0.08,
722
- "sampling_top_p": 0.8,
723
- "sampling_min_p": 0.05,
724
- "sampling_top_k": 50,
725
  "repetition_penalty": 1.1,
726
  "repetition_context_size": 20,
727
  "hard_mask_mcq_first_token": true,
@@ -735,34 +741,95 @@
735
  "bias_answer_start": 6.0,
736
  "punish_extra_think_end": -12.0,
737
  "bias_eos_after_answer": 3.0,
738
- "allow_tool_calls": false,
739
- "tool_call_penalty": 1.0,
740
- "reward_content_type": "smart",
741
  "reward_format_weight": 0.05,
742
  "reward_content_weight": 0.7,
743
  "think_reward_weight": 0.25,
744
- "think_len_min": 64,
745
- "think_len_max": 1024,
746
  "non_ascii_penalty": 1.0,
747
  "off_topic_jaccard_threshold": 0.05,
748
  "off_topic_penalty": 1.0,
749
- "ban_keywords": [
750
- "http://",
751
- "**other**",
752
- "https://",
753
- "qwen",
754
- "png",
755
- "jpg",
756
- "Another way",
757
- "json",
758
- "JSON",
759
- "Wait",
760
- "Online",
 
 
 
 
 
 
 
 
761
  "frustrated",
762
- "confused",
763
- "must have an answer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  ],
765
- "ban_penalty": 1.0,
 
 
 
 
 
 
 
 
766
  "use_lora": false,
767
  "lora_rank": 8,
768
  "lora_alpha": 16.0,
@@ -777,30 +844,30 @@
777
  "up_proj",
778
  "down_proj"
779
  ],
780
- "num_rollout_samples": 3,
781
  "ppo_batch_size": 1,
782
- "grpo_beta": 0.085,
783
- "learning_rate": 1e-07,
784
  "optimizer_beta1": 0.9,
785
  "optimizer_beta2": 0.95,
786
  "optimizer_weight_decay": 0.05,
787
- "grad_clip_norm": 0.25,
788
  "save_optimizer_state": false,
789
  "lr_schedule_config": {
790
  "name": "cosine_decay",
791
  "arguments": [
792
- 1e-07,
793
  60000,
794
- 1e-08
795
  ],
796
  "warmup": 4000,
797
  "warmup_init": 1e-08
798
  },
799
- "grad_accum_steps": 2,
800
  "num_training_steps": 45869,
801
- "save_every": 10,
802
- "eval_every": 9999999999999999990,
803
- "seed": 8303,
804
  "shuffle_data": true,
805
  "use_grad_checkpointing": false,
806
  "grad_checkpoint_layers": 0,
@@ -808,7 +875,7 @@
808
  "early_stopping_threshold": 0.005,
809
  "min_trainable_layers": 4,
810
  "use_custom_batch_builder": true,
811
- "invalid_sample_layers": "34,35",
812
  "invalid_sample_frequency": 2,
813
  "log_samples_every": 1,
814
  "max_logged_samples": 50,
@@ -819,15 +886,15 @@
819
  "quantized_kv_start": 10,
820
  "verbose": true,
821
  "use_wandb": true,
822
- "wandb_project": "reasonable-qwen3-4b-mlx-int",
823
  "wandb_entity": null,
824
  "wandb_run_name": null,
825
- "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last29/checkpoint_20250926_012807_shutdown_signal_update_620",
826
  "allow_cross_arch_ref": false,
827
  "align_bridge_path": null,
828
  "align_bridge_weight": 1.0,
829
  "align_pool": "mean",
830
  "align_after_tag": "</think>",
831
- "effective_batch_size": 6
832
  }
833
  }
 
1
  {
2
+ "global_step": 20,
3
+ "num_updates": 20,
4
  "use_lora": false,
5
  "rng_state": {
6
  "python": [
7
  3,
8
  [
9
+ 3216301271,
10
+ 1638589946,
11
+ 2537792956,
12
+ 1791281964,
13
+ 3760261316,
14
+ 922243971,
15
+ 1321160261,
16
+ 1084778220,
17
+ 586720584,
18
+ 1270082830,
19
+ 1488723237,
20
+ 2882031569,
21
+ 472302726,
22
+ 2704091108,
23
+ 2056781727,
24
+ 2970816838,
25
+ 2322412756,
26
+ 1632365467,
27
+ 1300809479,
28
+ 3361280950,
29
+ 3731722602,
30
+ 2265652514,
31
+ 66405514,
32
+ 1998964901,
33
+ 1328256060,
34
+ 3229765999,
35
+ 3472921105,
36
+ 2861524040,
37
+ 3758860811,
38
+ 1614718258,
39
+ 753983115,
40
+ 2957522965,
41
+ 1530571560,
42
+ 4031589591,
43
+ 2433316409,
44
+ 3368174648,
45
+ 2993816745,
46
+ 4197347992,
47
+ 1573952526,
48
+ 344497567,
49
+ 3200698952,
50
+ 3702339252,
51
+ 1865014020,
52
+ 3674977437,
53
+ 1009076463,
54
+ 3380727117,
55
+ 1376568460,
56
+ 1093055879,
57
+ 2664677007,
58
+ 3831752380,
59
+ 3416303751,
60
+ 2668336320,
61
+ 2536986437,
62
+ 2176404736,
63
+ 2459553842,
64
+ 3064478178,
65
+ 134356449,
66
+ 1016307678,
67
+ 2863417742,
68
+ 4204930386,
69
+ 4005432546,
70
+ 3941864302,
71
+ 1389063615,
72
+ 299902141,
73
+ 1534902877,
74
+ 1090782544,
75
+ 1342837280,
76
+ 1509926241,
77
+ 692845405,
78
+ 3324579114,
79
+ 1823107754,
80
+ 421705736,
81
+ 3293085396,
82
+ 1111043475,
83
+ 1268323190,
84
+ 676931135,
85
+ 204668262,
86
+ 1893728502,
87
+ 4037067929,
88
+ 1849257882,
89
+ 93068610,
90
+ 4059570703,
91
+ 3045179051,
92
+ 3539616529,
93
+ 957175217,
94
+ 3886104598,
95
+ 780945561,
96
+ 643091166,
97
+ 1423576215,
98
+ 981472631,
99
+ 3487959523,
100
+ 2218847375,
101
+ 1763709917,
102
+ 3646797235,
103
+ 1339613935,
104
+ 4010160562,
105
+ 2587755169,
106
+ 2435965615,
107
+ 2653610932,
108
+ 1547879087,
109
+ 4227811445,
110
+ 2292992450,
111
+ 1438139136,
112
+ 605911038,
113
+ 2540438098,
114
+ 2258228213,
115
+ 3803308885,
116
+ 1167308659,
117
+ 336102497,
118
+ 80858507,
119
+ 1067538030,
120
+ 3633040700,
121
+ 4117903847,
122
+ 2873585794,
123
+ 2524408933,
124
+ 3246498222,
125
+ 1283032620,
126
+ 3918636776,
127
+ 3851263884,
128
+ 798301987,
129
+ 1112440379,
130
+ 4277952394,
131
+ 629717336,
132
+ 3920106767,
133
+ 2725791487,
134
+ 863162233,
135
+ 2484669209,
136
+ 524662217,
137
+ 594795402,
138
+ 646610193,
139
+ 2016114842,
140
+ 3543138824,
141
+ 1744105850,
142
+ 1471458831,
143
+ 16055520,
144
+ 3451238897,
145
+ 2649894194,
146
+ 28079149,
147
+ 1717400745,
148
+ 3843861055,
149
+ 1729621288,
150
+ 2382674851,
151
+ 1482280295,
152
+ 2484330875,
153
+ 2652905708,
154
+ 1852970052,
155
+ 922832256,
156
+ 981750678,
157
+ 3603955815,
158
+ 48872518,
159
+ 4120226148,
160
+ 3468729345,
161
+ 1359680933,
162
+ 2035523113,
163
+ 1636218722,
164
+ 1135300069,
165
+ 843565866,
166
+ 350364929,
167
+ 610970006,
168
+ 1333520763,
169
+ 40543674,
170
+ 4086935670,
171
+ 3608689159,
172
+ 127008503,
173
+ 33165965,
174
+ 1487923785,
175
+ 557670011,
176
+ 2547038282,
177
+ 2692324307,
178
+ 896487775,
179
+ 3009940986,
180
+ 884172805,
181
+ 3134586463,
182
+ 1896640741,
183
+ 1951622258,
184
+ 3818751434,
185
+ 3774872157,
186
+ 4265178116,
187
+ 22018610,
188
+ 2771183949,
189
+ 1092356279,
190
+ 3626563666,
191
+ 3056832186,
192
+ 466823777,
193
+ 26528483,
194
+ 3306111213,
195
+ 2787064354,
196
+ 1301381161,
197
+ 1357275651,
198
+ 3729056079,
199
+ 3637929366,
200
+ 118695465,
201
+ 3561014385,
202
+ 3485944733,
203
+ 1880147723,
204
+ 2851148920,
205
+ 566721308,
206
+ 1585692209,
207
+ 2174728738,
208
+ 3332856867,
209
+ 3012044400,
210
+ 594669592,
211
+ 1085256034,
212
+ 1911387417,
213
+ 1808424278,
214
+ 49725467,
215
+ 3671361089,
216
+ 3018318529,
217
+ 1321357173,
218
+ 1264096094,
219
+ 2609276892,
220
+ 3780658123,
221
+ 3493487793,
222
+ 1968502013,
223
+ 1590268860,
224
+ 1027860715,
225
+ 3864311766,
226
+ 3552593723,
227
+ 3210731206,
228
+ 2822056447,
229
+ 2307565781,
230
+ 3348916517,
231
+ 4141557819,
232
+ 1845648049,
233
+ 3846513828,
234
+ 4062979700,
235
+ 1920414511,
236
+ 419748265,
237
+ 2517978192,
238
+ 2209214935,
239
+ 4000522468,
240
+ 3350224734,
241
+ 1274601663,
242
+ 238404923,
243
+ 2695642219,
244
+ 3189244541,
245
+ 1576264231,
246
+ 584860670,
247
+ 1162133805,
248
+ 1888475678,
249
+ 1657591314,
250
+ 1973127602,
251
+ 4181341606,
252
+ 328594839,
253
+ 2972579801,
254
+ 64884668,
255
+ 57352394,
256
+ 3366585502,
257
+ 1332667901,
258
+ 259198561,
259
+ 2770398609,
260
+ 2623481028,
261
+ 4151229829,
262
+ 4187964972,
263
+ 1479448580,
264
+ 3089063047,
265
+ 3738968187,
266
+ 2316267302,
267
+ 1628900130,
268
+ 57574938,
269
+ 3860463424,
270
+ 2550525000,
271
+ 1378666729,
272
+ 440952975,
273
+ 3063789057,
274
+ 2162250394,
275
+ 392699033,
276
+ 1437494348,
277
+ 3730138982,
278
+ 843908175,
279
+ 1654216796,
280
+ 1149264807,
281
+ 3269464133,
282
+ 1643181417,
283
+ 223165507,
284
+ 1833610851,
285
+ 2905543205,
286
+ 1581267380,
287
+ 552340655,
288
+ 141518224,
289
+ 1375368123,
290
+ 1797843945,
291
+ 105854190,
292
+ 3107423202,
293
+ 1729432723,
294
+ 1453922408,
295
+ 421285811,
296
+ 4260773214,
297
+ 1663467292,
298
+ 3010424040,
299
+ 998308271,
300
+ 1531244161,
301
+ 441858737,
302
+ 1026159130,
303
+ 3807950868,
304
+ 2102254140,
305
+ 651977202,
306
+ 2742723439,
307
+ 3026135341,
308
+ 3429613629,
309
+ 3093870412,
310
+ 3975773274,
311
+ 1843002010,
312
+ 218264968,
313
+ 213172249,
314
+ 878259451,
315
+ 3304491193,
316
+ 592609509,
317
+ 1227930501,
318
+ 2939504209,
319
+ 1028100527,
320
+ 910420457,
321
+ 4249171053,
322
+ 2436625031,
323
+ 280632582,
324
+ 3824207624,
325
+ 3808044241,
326
+ 1540222143,
327
+ 130394784,
328
+ 3235395054,
329
+ 1855681800,
330
+ 285255351,
331
+ 2006409572,
332
+ 1456272851,
333
+ 1035666983,
334
+ 1012020147,
335
+ 1728317729,
336
+ 2980706887,
337
+ 3022227688,
338
+ 3391984528,
339
+ 379619077,
340
+ 2645906665,
341
+ 1698735140,
342
+ 821721005,
343
+ 920711771,
344
+ 722909650,
345
+ 18039109,
346
+ 2815813575,
347
+ 1976471014,
348
+ 354541559,
349
+ 1674510917,
350
+ 678839048,
351
+ 3832607702,
352
+ 4010081893,
353
+ 1057864545,
354
+ 317734621,
355
+ 3197537114,
356
+ 372364370,
357
+ 2227253439,
358
+ 36215654,
359
+ 477947141,
360
+ 3408831109,
361
+ 913622025,
362
+ 2005920223,
363
+ 1594738861,
364
+ 2510486329,
365
+ 1289342545,
366
+ 2657087163,
367
+ 209514413,
368
+ 2695143954,
369
+ 1593251690,
370
+ 1168296992,
371
+ 3825393452,
372
+ 3829339427,
373
+ 3450734470,
374
+ 3726943420,
375
+ 2873788605,
376
+ 4076445439,
377
+ 1642259085,
378
+ 2820605589,
379
+ 1349230573,
380
+ 2914266145,
381
+ 193008665,
382
+ 2400069287,
383
+ 739496051,
384
+ 3978596673,
385
+ 1734592292,
386
+ 3830180644,
387
+ 687951849,
388
+ 2385518411,
389
+ 3994677982,
390
+ 3353206377,
391
+ 3164975800,
392
+ 2853766442,
393
+ 3048732068,
394
+ 1325296937,
395
+ 2088553967,
396
+ 2507747299,
397
+ 2274111218,
398
+ 2817243984,
399
+ 72033310,
400
+ 2914210511,
401
+ 1525724516,
402
+ 1394521015,
403
+ 186442305,
404
+ 816713634,
405
+ 2091595891,
406
+ 2122879665,
407
+ 3540000281,
408
+ 124073308,
409
+ 1552560750,
410
+ 4215113149,
411
+ 1312110692,
412
+ 1262184467,
413
+ 76709321,
414
+ 3169759337,
415
+ 1789679024,
416
+ 1259840820,
417
+ 1011505219,
418
+ 1791532447,
419
+ 3407943243,
420
+ 772528328,
421
+ 924508991,
422
+ 2418186248,
423
+ 3862012009,
424
+ 4079302195,
425
+ 3496350600,
426
+ 4262944162,
427
+ 861726770,
428
+ 470271259,
429
+ 2830298055,
430
+ 4068995925,
431
+ 1295520851,
432
+ 1695434781,
433
+ 4069470402,
434
+ 1142976013,
435
+ 492294381,
436
+ 3873594410,
437
+ 2525043471,
438
+ 3995502078,
439
+ 2809340295,
440
+ 941315540,
441
+ 2373864457,
442
+ 1434156134,
443
+ 1234185530,
444
+ 2787227707,
445
+ 4036418762,
446
+ 2529809824,
447
+ 4134811789,
448
+ 1906789833,
449
+ 4001687990,
450
+ 1520529512,
451
+ 272516287,
452
+ 1662764451,
453
+ 1228753286,
454
+ 1246993659,
455
+ 877037883,
456
+ 64379529,
457
+ 1609700841,
458
+ 2449854583,
459
+ 2937445549,
460
+ 182121114,
461
+ 2415096642,
462
+ 1326797853,
463
+ 994909645,
464
+ 1728401166,
465
+ 2042093922,
466
+ 2083138154,
467
+ 630862173,
468
+ 1839644782,
469
+ 1106108870,
470
+ 3114161313,
471
+ 1090438833,
472
+ 4163538017,
473
+ 3067452757,
474
+ 3473448680,
475
+ 2712244648,
476
+ 3433442429,
477
+ 3758650831,
478
+ 1920842611,
479
+ 2784630272,
480
+ 3809793223,
481
+ 485700721,
482
+ 2394860714,
483
+ 1191485807,
484
+ 1463231400,
485
+ 1786607131,
486
+ 1627757854,
487
+ 3264969659,
488
+ 3230336784,
489
+ 3221534551,
490
+ 3582581842,
491
+ 2120261546,
492
+ 311188027,
493
+ 384727679,
494
+ 2995099829,
495
+ 2745968316,
496
+ 4257849614,
497
+ 3614085722,
498
+ 319327385,
499
+ 3526297819,
500
+ 1357165933,
501
+ 4161273241,
502
+ 509451631,
503
+ 684166916,
504
+ 1094549025,
505
+ 2340900655,
506
+ 878685951,
507
+ 1079285739,
508
+ 2862658053,
509
+ 2668448333,
510
+ 380528928,
511
+ 4217208245,
512
+ 2802311413,
513
+ 2248154821,
514
+ 3231776626,
515
+ 211925274,
516
+ 2385310660,
517
+ 4135810322,
518
+ 2183561119,
519
+ 372329701,
520
+ 1873336116,
521
+ 3345280608,
522
+ 2404818403,
523
+ 83489739,
524
+ 773594056,
525
+ 3199418238,
526
+ 1593518424,
527
+ 2753303056,
528
+ 3562772212,
529
+ 935855294,
530
+ 1627465530,
531
+ 2356582227,
532
+ 2412204406,
533
+ 3612892106,
534
+ 3582378062,
535
+ 1915113233,
536
+ 3492335241,
537
+ 493539992,
538
+ 4111854657,
539
+ 1577508821,
540
+ 3037703576,
541
+ 2617037207,
542
+ 2337542484,
543
+ 1249034540,
544
+ 682590757,
545
+ 1569972856,
546
+ 3915350413,
547
+ 1925553231,
548
+ 301159787,
549
+ 2156957783,
550
+ 3167039132,
551
+ 1266518600,
552
+ 103091609,
553
+ 6603238,
554
+ 3398481065,
555
+ 1496671537,
556
+ 1237235668,
557
+ 2074950188,
558
+ 459863858,
559
+ 619747929,
560
+ 3239256589,
561
+ 196803419,
562
+ 3252960935,
563
+ 1459794254,
564
+ 3967706347,
565
+ 3765913008,
566
+ 2459110550,
567
+ 309720372,
568
+ 4150471162,
569
+ 3536834001,
570
+ 3611312688,
571
+ 2757603619,
572
+ 4233589760,
573
+ 3138192018,
574
+ 781794088,
575
+ 4267631241,
576
+ 1678966961,
577
+ 1957364830,
578
+ 3614555434,
579
+ 4047113479,
580
+ 2143747495,
581
+ 315790501,
582
+ 3400984168,
583
+ 3772021651,
584
+ 2072010687,
585
+ 2165280713,
586
+ 198382056,
587
+ 3607795865,
588
+ 2619689202,
589
+ 469410151,
590
+ 3678263974,
591
+ 190658400,
592
+ 3876953403,
593
+ 2375580782,
594
+ 3824379237,
595
+ 419148754,
596
+ 225777110,
597
+ 1767601301,
598
+ 1715000632,
599
+ 2103758356,
600
+ 796280675,
601
+ 3861354653,
602
+ 2535720117,
603
+ 282490129,
604
+ 4280453973,
605
+ 1572850125,
606
+ 3926912307,
607
+ 2704865264,
608
+ 685706099,
609
+ 3652913870,
610
+ 4129365562,
611
+ 2144336147,
612
+ 4199977075,
613
+ 3698970464,
614
+ 1850525854,
615
+ 1206240743,
616
+ 3518376768,
617
+ 480369488,
618
+ 5189568,
619
+ 2270418116,
620
+ 351197706,
621
+ 1404922079,
622
+ 1359970593,
623
+ 25133159,
624
+ 2778255563,
625
+ 1905723803,
626
+ 3873039996,
627
+ 2347023852,
628
+ 4121935682,
629
+ 1561583794,
630
+ 232195495,
631
+ 1099088568,
632
+ 3128551111,
633
+ 100
634
  ],
635
  null
636
  ],
637
  "numpy": [
638
  "MT19937",
639
+ "[ 22934 3945700911 2671637342 1915830095 191936458 341417143\n 2033828665 3016424735 3092603257 4181149840 3903540489 3407199997\n 599541826 2915161111 4035635543 251134515 879438127 4221200028\n 3341718477 1964740185 4151591116 4046805184 1742823429 3899196843\n 3977126752 3543724328 2327062801 1184128922 1089514307 4050117415\n 1015472466 3507079033 2700213570 3972016737 3207408332 2891719017\n 1546154587 429737895 2952176649 2321914750 2326770708 4081832663\n 3297354190 444500748 162154472 3355543221 816548860 628889243\n 877321815 2061977476 1395156139 3849630021 1523738322 1104051316\n 977581919 3671535538 2034403853 1232397045 2906529406 3174458151\n 327609045 800695110 730325212 3656741643 3718097512 2622783352\n 3655636324 3572764902 3013097373 3928688896 3991021941 3433333205\n 2604449718 4017813069 3983862544 2993875402 2867479604 1919405979\n 1447318032 55998980 2659600356 1723757583 2309048280 2332754773\n 2340610215 3579702894 4087506519 1143034747 1999124858 4294869216\n 2090292457 1451010019 3291754118 3072231382 3163032322 2089768799\n 3856713334 3306547594 2858316911 4002235492 170400775 822332968\n 3807100462 3741523240 2533590367 1481466650 1515097361 1173043131\n 1958053582 879490840 1467765990 3553500818 3768887461 1673617391\n 3743627096 3372321370 1964258961 1075615557 1796674122 3169934094\n 1111906356 2422745698 1617303642 3117240162 3642405212 3725382136\n 424356741 2174048504 4083148066 2254114694 3654730646 360245068\n 2408508800 1997293263 1633562060 2370160488 1076193114 2324593264\n 1524322692 2378848004 579898346 2303552991 3630341567 3312834491\n 2113377320 2316723646 2406203326 3443671487 2356513728 4235785759\n 3794933154 2394731036 3533067374 3547899034 182309367 4037150222\n 1266964669 1540857545 1468181894 672970210 2178397386 4216413065\n 1776023572 538910188 3032357568 2661310767 226651239 2336810058\n 2481232144 1989740227 3058400564 2252623097 2231481011 4045617538\n 4214405523 323044991 150194123 413562568 1857414554 2967505370\n 3611491052 1983899648 2657489435 681805204 2718689308 1320985231\n 1565270208 3721868768 4008951371 1257675557 4199381490 946476500\n 274010468 2711249205 1013959285 2426830316 76942762 875126487\n 66353817 1262436388 3863682145 746628467 1903219241 3239203475\n 4233733020 356951432 439162742 3784468573 3677322214 3796660010\n 2244201727 3856549284 4287158967 4176780761 1777463768 1875938420\n 3953461249 1500724131 4006492612 481905502 906103538 3410482263\n 3796089346 88730692 3413547956 891379476 3684186054 3142151836\n 3732747066 979220322 2311769232 343595137 702248141 2729560778\n 1092749778 846515242 3061647230 1824084185 1732804134 1915594066\n 46179759 2322925052 2454866984 2455799685 290306103 2831162792\n 3947998984 4214485838 198062937 4114867222 829110851 3176192106\n 3213691396 3677104987 1795633846 2868638258 693255664 1109909681\n 3586774642 3850048664 1540560171 2093012119 2180130356 2926967381\n 2212844635 2651646246 3554164798 3428646428 1804065351 2689766571\n 3468826811 3201710503 518065193 1063960126 2157854088 1411588741\n 2700165928 709294759 4172628985 1958386873 3565571504 1551266744\n 347646487 1518496558 1125740967 979742107 2537714245 1347425826\n 3932583919 672046397 2828316467 2327926904 435630918 2011279811\n 3363435696 2432063174 352002172 3738840597 962065880 2334955363\n 3577848689 3548086055 817852002 172425177 2547847885 284308444\n 2287153918 67107487 2138437871 2722403611 1485321491 3718353745\n 2278818450 3723760649 4098573100 1391372742 158694335 2429291160\n 2489489920 1566561801 1519499880 488619438 298448104 2870331595\n 1904920977 1418696469 3322695722 920687220 3324370188 3483778868\n 3538706685 2249995137 2907074299 586943114 819594176 205863951\n 3705518651 2208985449 1725825929 3727896059 4019379244 4128179936\n 315765733 4059498416 1668929015 340529511 2770599421 4028214774\n 2515320837 224166688 3716930814 140285488 876016208 1636461297\n 3144065554 178815411 1908996099 286321711 324753137 2435270524\n 1714660382 2249021604 2084491496 2767461976 2231641326 1155063433\n 2925434134 3970205267 3573141248 2741782688 3238353756 2560353518\n 1970773648 3724814762 1099883043 3679546081 1568677538 241586888\n 337883746 1161417765 1062467504 4051560941 1985981028 716261464\n 3524305976 705781704 3818181482 2193201904 1002677502 4067297211\n 1569883422 199564738 4137652498 3574421054 1226091675 2512159821\n 3898047415 3450020753 6564904 3627996503 910172084 2251282325\n 610328357 3886405676 3798496031 781412257 67374363 2153357374\n 1270895940 1097241298 3117406681 3818137602 2531480321 3090242764\n 2277891300 2257665629 2588374043 3556847742 1456867315 474981917\n 2451378197 1471457720 4180277411 268220615 2399692587 195339734\n 2240738584 3729235693 3335590802 149705442 2579867352 4181426097\n 3249382634 622969502 2982828040 238557093 3796607693 496597755\n 289069501 4209577544 3198535759 3853794586 1283292567 215077865\n 1888439721 414223621 809652663 4159057394 3925678805 3356630575\n 75461918 3620226201 653229190 1976162467 3254725552 3455322726\n 3214616737 2171290904 1392717580 969574124 1871677672 3729045178\n 2180258251 1738129180 2629430593 371750976 2953949970 2503948067\n 609774297 536824434 1605198544 1761662796 1067984953 1080910166\n 1369438253 689993527 270301583 1060273736 3363331142 583030296\n 513593688 3645813145 2890759588 3931794017 424176782 2609697771\n 3545648595 1902183671 2443746550 3996156461 3859807248 985243754\n 2633696702 3175014681 3691201685 3038271261 4010997803 576803257\n 3979362543 3174820623 2731062037 984021256 2925365790 1888779011\n 360094658 192169859 4156183465 1961819661 177383608 3845065365\n 710112556 550881371 990757095 1660533796 1933183387 3225368005\n 2904633122 3660560805 3132626564 1415092965 504701436 1802126965\n 500031950 78861905 1546217729 1686597389 1026571722 3718216897\n 184421530 2603574483 2220728711 3674466444 803752319 3430086704\n 2106865973 1585670043 1769950170 902701696 2085232282 3990620226\n 1040256193 3414446914 3465126851 2670681311 3966560849 2426164859\n 558096607 2906132766 2685058608 795069919 3500511777 1131731857\n 3884626424 1423215920 3217158015 3105383292 4098863330 3409982450\n 2746710339 2215485908 433480094 1059328903 3026187637 1853458726\n 3563715991 3851501977 295718392 2143321359 3175077566 54416997\n 2627151123 3656301296 1575014171 4198277503 3871020074 4273465196\n 292665355 1772650904 4093969567 3853930447 1882565056 223036522\n 489994776 4094444991 307044724 1501645069 2406301702 2105042911\n 1864944354 4292905180 4008597833 2060965505 3597804240 2809113232\n 324300780 1873376879 1665046970 3222584348 431214993 1175406988\n 1363115001 187409201 3840122287 4242686775 2712099296 592341639\n 3265480865 2159565897 2620231415 1143834122 2432616377 3884245546\n 4019465617 2832670463 1365221687 2021294773 2603870060 3193319119\n 4205260363 4289842387 1634803836 2607790270 2048163994 187085462]",
640
  624,
641
  0,
642
  0.0
643
  ],
644
+ "mlx": 1766497666,
645
  "mlx_key": [
646
  0,
647
+ 1766461675
648
  ]
649
  },
650
  "training_args_snapshot": {
651
+ "output_dir": "outy1266_align_last32",
652
+ "max_kv_size": 1024,
653
+ "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
654
+ "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z",
655
  "draft_model_path": null,
656
+ "punish_reopen_think": -10.0,
657
  "punish_reopen_answer": -9.0,
658
  "low_band": [
659
  0,
 
661
  ],
662
  "mid_band": [
663
  16,
664
+ 23
665
  ],
666
  "top_band": [
667
+ 24,
668
  35
669
  ],
670
+ "low_mul": 0.1,
671
+ "mid_mul": 0.95,
672
+ "top_mul": 1.5,
673
+ "head_mul": 1.2,
674
+ "train_layer_start": 22,
675
  "train_layer_end": 35,
676
+ "think_length_target_min": 8,
677
+ "think_length_target_max": 64,
678
+ "think_length_penalty_strength": 0.8,
679
+ "think_length_penalty_type": "exponential",
680
+ "enable_think_length_penalty": true,
681
  "run_server": false,
682
  "use_paged_kv_cache": true,
683
  "kv_cache_block_size": 16,
 
688
  "benchmark_split": "test",
689
  "benchmark_prompt_key": "question",
690
  "benchmark_answer_key": "answer",
691
+ "benchmark_samples": 10,
692
  "benchmark_max_new_tokens": 196,
693
  "benchmark_temperature": 0.0,
694
  "benchmark_top_p": 1.0,
695
  "benchmark_top_k": 0,
696
  "benchmark_use_chat_template": true,
697
  "benchmark_stop_on_error": false,
698
+ "min_think_tokens": 16,
699
  "think_end_early_bias": -12.0,
700
  "bias_answer_start_after_min_think": true,
701
+ "train_dataset_path": "strat/train.jsonl",
702
+ "val_dataset_path": "strat/valid.jsonl",
703
  "dataset_name": null,
704
  "dataset_config": null,
705
  "dataset_train_split": "train",
 
708
  "dataset_answer_key": "completion",
709
  "dataset_filter_keywords": [
710
  "http://",
711
+ "**other**",
712
  "https://",
713
  "png",
714
  "jpg",
715
+ "Another way",
716
+ "Adeel"
717
  ],
718
  "max_prompt_len": 350,
719
+ "max_gen_len": 384,
720
  "system_prompt": null,
721
  "think_start_tag": "<think>",
722
  "think_end_tag": "</think>",
723
  "answer_start_tag": "<answer>",
724
  "answer_end_tag": "</answer>",
725
+ "think_boost_tokens": 1,
726
+ "think_temperature": 0.35,
727
+ "answer_temperature": 0.2,
728
+ "sampling_top_p": 0.6,
729
+ "sampling_min_p": 0.0,
730
+ "sampling_top_k": 60,
731
  "repetition_penalty": 1.1,
732
  "repetition_context_size": 20,
733
  "hard_mask_mcq_first_token": true,
 
741
  "bias_answer_start": 6.0,
742
  "punish_extra_think_end": -12.0,
743
  "bias_eos_after_answer": 3.0,
744
+ "allow_tool_calls": true,
745
+ "tool_call_penalty": 0.0,
746
+ "reward_content_type": "steps",
747
  "reward_format_weight": 0.05,
748
  "reward_content_weight": 0.7,
749
  "think_reward_weight": 0.25,
750
+ "think_len_min": 16,
751
+ "think_len_max": 64,
752
  "non_ascii_penalty": 1.0,
753
  "off_topic_jaccard_threshold": 0.05,
754
  "off_topic_penalty": 1.0,
755
+ "ban_keywords": [],
756
+ "ban_penalty": 3.0,
757
+ "ban_phrases_for_bias": [
758
+ "I think the answer",
759
+ "I believe that",
760
+ "In my view",
761
+ "From what I can tell",
762
+ "It seems to me",
763
+ "It appears that",
764
+ "My understanding is",
765
+ "As far as I know",
766
+ "Let me start by",
767
+ "Let me first",
768
+ "I should probably",
769
+ "I need to figure out",
770
+ "I'm trying to",
771
+ "I'm going to try",
772
+ "I'll attempt to",
773
+ "Confused",
774
+ "stuck",
775
  "frustrated",
776
+ "frustrating",
777
+ "Alternatively",
778
+ "Actually",
779
+ "Probably not sure",
780
+ "Uncertain about",
781
+ "Unclear whether",
782
+ "I'm guessing that",
783
+ "maybe this is",
784
+ "Could be that",
785
+ "Might be because",
786
+ "I'm not 100% sure",
787
+ "I'm not sure if",
788
+ "I'm not certain",
789
+ "Hard to say",
790
+ "Difficult to tell",
791
+ "Circular reasoning detected",
792
+ "In some way or another",
793
+ "Magically works",
794
+ "For some unknown reason",
795
+ "Too complicated",
796
+ "It just somehow",
797
+ "Something seems off",
798
+ "False assumption",
799
+ "Insufficient information to",
800
+ "Wait, what if",
801
+ "Wait, actually no",
802
+ "Wait, on second thought",
803
+ "Hold on, maybe",
804
+ "Hmm, perhaps",
805
+ "Or wait, could",
806
+ "Looking at this more closely",
807
+ "Upon further reflection",
808
+ "Taking a step back",
809
+ "Thinking about it more",
810
+ "Now that I consider",
811
+ "When I really think",
812
+ "If I had to guess",
813
+ "To be completely honest",
814
+ "In all honesty",
815
+ "You know what",
816
+ "The thing is",
817
+ "What I mean is",
818
+ "In other words",
819
+ "Put simply",
820
+ "Basically what happens",
821
+ "Long story short",
822
+ "At the end of the day"
823
  ],
824
+ "encourage_phrases_for_bias": [],
825
+ "encourage_think_bias": 4.5,
826
+ "ban_think_bias": -3.0,
827
+ "symbolic_bonus_per_token": 0.07,
828
+ "max_words_per_think_line": 12,
829
+ "verbosity_penalty_per_word": 0.01,
830
+ "min_unique_token_ratio": 0.75,
831
+ "low_diversity_penalty": 0.5,
832
+ "telegram_style_bonus": 0.25,
833
  "use_lora": false,
834
  "lora_rank": 8,
835
  "lora_alpha": 16.0,
 
844
  "up_proj",
845
  "down_proj"
846
  ],
847
+ "num_rollout_samples": 2,
848
  "ppo_batch_size": 1,
849
+ "grpo_beta": 0.005,
850
+ "learning_rate": 3e-05,
851
  "optimizer_beta1": 0.9,
852
  "optimizer_beta2": 0.95,
853
  "optimizer_weight_decay": 0.05,
854
+ "grad_clip_norm": 0.35,
855
  "save_optimizer_state": false,
856
  "lr_schedule_config": {
857
  "name": "cosine_decay",
858
  "arguments": [
859
+ 3e-05,
860
  60000,
861
+ 5e-08
862
  ],
863
  "warmup": 4000,
864
  "warmup_init": 1e-08
865
  },
866
+ "grad_accum_steps": 1,
867
  "num_training_steps": 45869,
868
+ "save_every": 5,
869
+ "eval_every": 10,
870
+ "seed": 22934,
871
  "shuffle_data": true,
872
  "use_grad_checkpointing": false,
873
  "grad_checkpoint_layers": 0,
 
875
  "early_stopping_threshold": 0.005,
876
  "min_trainable_layers": 4,
877
  "use_custom_batch_builder": true,
878
+ "invalid_sample_layers": "33,34,35",
879
  "invalid_sample_frequency": 2,
880
  "log_samples_every": 1,
881
  "max_logged_samples": 50,
 
886
  "quantized_kv_start": 10,
887
  "verbose": true,
888
  "use_wandb": true,
889
+ "wandb_project": "reasonable-qwen3-4b-mlx-two",
890
  "wandb_entity": null,
891
  "wandb_run_name": null,
892
+ "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last32/checkpoint_20251223_144746_periodic_update_20",
893
  "allow_cross_arch_ref": false,
894
  "align_bridge_path": null,
895
  "align_bridge_weight": 1.0,
896
  "align_pool": "mean",
897
  "align_after_tag": "</think>",
898
+ "effective_batch_size": 2
899
  }
900
  }
xspecial_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }