brthor commited on
Commit
2dec6c5
·
verified ·
1 Parent(s): ef21f4b

Upload full trainer checkpoint step=1521 (profile=llama3_2_1b_base_grpo, run_type=grpo)

Browse files
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/adapter_config.json CHANGED
@@ -33,13 +33,13 @@
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
 
 
 
36
  "down_proj",
37
  "o_proj",
38
- "q_proj",
39
  "gate_proj",
40
- "k_proj",
41
- "up_proj",
42
- "v_proj"
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
 
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "up_proj",
37
+ "k_proj",
38
+ "v_proj",
39
  "down_proj",
40
  "o_proj",
 
41
  "gate_proj",
42
+ "q_proj"
 
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b64520dac120360a681172ee6c20f6a8a359f022b00e3736a03fa8f661bc82d
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525397e0ef3aa15a7fcebf66df0a5900bdbf57328545211f70ccbe51da3755fe
3
  size 45118424
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0830b03c2dcc83bcc0067bae18a13b427eafd77e198cb48e8c6b6ad8abeb1c0c
3
  size 23162187
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7bebb0a43f2b28a2d033fc7b2e0996db510e45e86b7c962f42f3273e1b516e
3
  size 23162187
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:226d4b57f915ee5226f4e1a0ab194b7b91546f46d15d7a9bc73d173fa0e7e5eb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f874dd9d7ae4223b7072142300bf2ea2e985a109e5fc02ebebfca0a03f44ac
3
  size 14645
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bc9636f2dbc22ed3ccdb05391916eed679ce751978eea151bc52b8cfe3954b4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe0516817ef50b8c17ec21995e3135256f60beca23aec092a3ecc2038dc33d6
3
  size 1465
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0027015601909741956,
6
  "eval_steps": 500,
7
  "global_step": 1521,
8
  "is_hyper_param_search": false,
@@ -15,25 +15,25 @@
15
  "clip_ratio/low_mean": 0.0,
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
- "completion_length": 65.67,
19
  "completions/clipped_ratio": 0.0,
20
- "completions/max_length": 93.75,
21
- "completions/max_terminated_length": 93.75,
22
- "completions/mean_length": 63.3203125,
23
- "completions/mean_terminated_length": 63.3203125,
24
- "completions/min_length": 38.25,
25
- "completions/min_terminated_length": 38.25,
26
- "epoch": 8.88086847789019e-05,
27
- "frac_reward_zero_std": 0.5625,
28
- "grad_norm": 0.007002627942711115,
29
  "kl": 0.0,
30
- "learning_rate": 2.7197442330321267e-08,
31
- "loss": -0.0006554330885410309,
32
- "num_tokens": 57618.0,
33
- "reward": -6.51925802230835e-09,
34
- "reward_std": 0.48731958121061325,
35
- "rewards/TranscriptCorrectionGrpoReward/mean": -8.381903171539307e-09,
36
- "rewards/TranscriptCorrectionGrpoReward/std": 1.007905274629593,
37
  "step": 50
38
  },
39
  {
@@ -42,52 +42,52 @@
42
  "clip_ratio/low_mean": 0.0,
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
- "completion_length": 52.34,
46
  "completions/clipped_ratio": 0.0,
47
- "completions/max_length": 84.33333333333333,
48
- "completions/max_terminated_length": 84.33333333333333,
49
- "completions/mean_length": 51.208333333333336,
50
- "completions/mean_terminated_length": 51.208333333333336,
51
- "completions/min_length": 26.0,
52
- "completions/min_terminated_length": 26.0,
53
- "epoch": 0.0001776173695578038,
54
- "frac_reward_zero_std": 0.6666666666666666,
55
  "grad_norm": 0.0,
56
  "kl": 0.0,
57
- "learning_rate": 5.494993450411847e-08,
58
- "loss": 0.0013675823807716369,
59
- "num_tokens": 94074.0,
60
- "reward": 1.4901161193847656e-08,
61
- "reward_std": 0.44680649042129517,
62
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.179675261179606e-08,
63
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
64
  "step": 100
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
68
  "clip_ratio/high_mean": 0.0,
69
- "clip_ratio/low_mean": 0.00017899675294756888,
70
- "clip_ratio/low_min": 0.00017899675294756888,
71
- "clip_ratio/region_mean": 0.00017899675294756888,
72
- "completion_length": 64.945,
73
  "completions/clipped_ratio": 0.0,
74
- "completions/max_length": 119.33333333333333,
75
- "completions/max_terminated_length": 119.33333333333333,
76
- "completions/mean_length": 66.65625,
77
- "completions/mean_terminated_length": 66.65625,
78
- "completions/min_length": 39.0,
79
- "completions/min_terminated_length": 39.0,
80
- "epoch": 0.0002664260543367057,
81
- "frac_reward_zero_std": 0.7083333333333334,
82
- "grad_norm": 0.015731461346149445,
83
  "kl": 0.0,
84
- "learning_rate": 8.270242667791569e-08,
85
- "loss": 0.0017325745522975922,
86
- "num_tokens": 139152.0,
87
- "reward": 2.483526865641276e-09,
88
- "reward_std": 0.2613494098186493,
89
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
90
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
91
  "step": 150
92
  },
93
  {
@@ -96,52 +96,52 @@
96
  "clip_ratio/low_mean": 0.0,
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
- "completion_length": 60.15,
100
  "completions/clipped_ratio": 0.0,
101
- "completions/max_length": 99.0,
102
- "completions/max_terminated_length": 99.0,
103
- "completions/mean_length": 63.015625,
104
- "completions/mean_terminated_length": 63.015625,
105
- "completions/min_length": 32.666666666666664,
106
- "completions/min_terminated_length": 32.666666666666664,
107
- "epoch": 0.0003552347391156076,
108
- "frac_reward_zero_std": 0.4583333333333333,
109
- "grad_norm": 0.039579447358846664,
110
  "kl": 0.0,
111
- "learning_rate": 1.1045491885171288e-07,
112
- "loss": -0.0044269835948944096,
113
- "num_tokens": 182803.0,
114
- "reward": -4.967053731282552e-09,
115
- "reward_std": 0.5628267228603363,
116
- "rewards/TranscriptCorrectionGrpoReward/mean": -2.483526865641276e-09,
117
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
118
  "step": 200
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
122
  "clip_ratio/high_mean": 0.0,
123
- "clip_ratio/low_mean": 0.00016559829469770193,
124
- "clip_ratio/low_min": 0.00016559829469770193,
125
- "clip_ratio/region_mean": 0.00016559829469770193,
126
- "completion_length": 64.905,
127
  "completions/clipped_ratio": 0.0,
128
- "completions/max_length": 98.66666666666667,
129
- "completions/max_terminated_length": 98.66666666666667,
130
- "completions/mean_length": 64.203125,
131
- "completions/mean_terminated_length": 64.203125,
132
- "completions/min_length": 35.666666666666664,
133
- "completions/min_terminated_length": 35.666666666666664,
134
- "epoch": 0.0004440434238945095,
135
- "frac_reward_zero_std": 0.4166666666666667,
136
- "grad_norm": 0.011699045076966286,
137
  "kl": 0.0,
138
- "learning_rate": 1.382074110255101e-07,
139
- "loss": 0.00223430335521698,
140
- "num_tokens": 227314.0,
141
- "reward": 2.2351741790771484e-08,
142
- "reward_std": 0.4298504690329234,
143
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.241763432820638e-08,
144
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
145
  "step": 250
146
  },
147
  {
@@ -150,106 +150,106 @@
150
  "clip_ratio/low_mean": 0.0,
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
- "completion_length": 70.07,
154
  "completions/clipped_ratio": 0.0,
155
- "completions/max_length": 108.0,
156
- "completions/max_terminated_length": 108.0,
157
- "completions/mean_length": 70.63541666666667,
158
- "completions/mean_terminated_length": 70.63541666666667,
159
- "completions/min_length": 34.0,
160
- "completions/min_terminated_length": 34.0,
161
- "epoch": 0.0005328521086734114,
162
- "frac_reward_zero_std": 0.5833333333333334,
163
- "grad_norm": 0.0456908717751503,
164
  "kl": 0.0,
165
- "learning_rate": 1.659599031993073e-07,
166
- "loss": -0.0011175717413425446,
167
- "num_tokens": 275956.0,
168
- "reward": 5.587935447692871e-09,
169
- "reward_std": 0.4012269576390584,
170
- "rewards/TranscriptCorrectionGrpoReward/mean": 3.104408582051595e-09,
171
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
172
  "step": 300
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
176
  "clip_ratio/high_mean": 0.0,
177
- "clip_ratio/low_mean": 5.8139534667134284e-05,
178
- "clip_ratio/low_min": 5.8139534667134284e-05,
179
- "clip_ratio/region_mean": 5.8139534667134284e-05,
180
- "completion_length": 62.93,
181
  "completions/clipped_ratio": 0.0,
182
- "completions/max_length": 95.66666666666667,
183
- "completions/max_terminated_length": 95.66666666666667,
184
- "completions/mean_length": 61.6875,
185
- "completions/mean_terminated_length": 61.6875,
186
- "completions/min_length": 29.333333333333332,
187
- "completions/min_terminated_length": 29.333333333333332,
188
- "epoch": 0.0006216607934523133,
189
- "frac_reward_zero_std": 0.75,
190
- "grad_norm": 0.0190290417522192,
191
  "kl": 0.0,
192
- "learning_rate": 1.9371239537310452e-07,
193
- "loss": -0.0010719958692789078,
194
- "num_tokens": 319328.0,
195
- "reward": -2.483526865641276e-09,
196
- "reward_std": 0.34829530119895935,
197
- "rewards/TranscriptCorrectionGrpoReward/mean": -4.967053731282552e-09,
198
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
199
  "step": 350
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
203
  "clip_ratio/high_mean": 0.0,
204
- "clip_ratio/low_mean": 7.352941203862428e-05,
205
- "clip_ratio/low_min": 7.352941203862428e-05,
206
- "clip_ratio/region_mean": 7.352941203862428e-05,
207
- "completion_length": 62.725,
208
  "completions/clipped_ratio": 0.0,
209
- "completions/max_length": 105.66666666666667,
210
- "completions/max_terminated_length": 105.66666666666667,
211
- "completions/mean_length": 62.890625,
212
- "completions/mean_terminated_length": 62.890625,
213
- "completions/min_length": 34.666666666666664,
214
- "completions/min_terminated_length": 34.666666666666664,
215
- "epoch": 0.0007104694782312152,
216
- "frac_reward_zero_std": 0.5,
217
- "grad_norm": 0.02625960297882557,
218
  "kl": 0.0,
219
- "learning_rate": 2.2146488754690174e-07,
220
- "loss": 0.0013731226325035095,
221
- "num_tokens": 362563.0,
222
- "reward": 4.967053731282552e-09,
223
- "reward_std": 0.44316299756368,
224
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
225
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
226
  "step": 400
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
230
  "clip_ratio/high_mean": 0.0,
231
- "clip_ratio/low_mean": 8.695651777088643e-05,
232
- "clip_ratio/low_min": 8.695651777088643e-05,
233
- "clip_ratio/region_mean": 8.695651777088643e-05,
234
- "completion_length": 66.03,
235
  "completions/clipped_ratio": 0.0,
236
- "completions/max_length": 112.0,
237
- "completions/max_terminated_length": 112.0,
238
- "completions/mean_length": 70.2890625,
239
- "completions/mean_terminated_length": 70.2890625,
240
- "completions/min_length": 42.75,
241
- "completions/min_terminated_length": 42.75,
242
- "epoch": 0.0007992781630101171,
243
- "frac_reward_zero_std": 0.5625,
244
- "grad_norm": 0.0035960455425083637,
245
  "kl": 0.0,
246
- "learning_rate": 2.492173797206989e-07,
247
- "loss": -0.0004108186066150665,
248
- "num_tokens": 427021.0,
249
- "reward": 1.30385160446167e-08,
250
- "reward_std": 0.383321788161993,
251
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.4901161193847656e-08,
252
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
253
  "step": 450
254
  },
255
  {
@@ -258,115 +258,115 @@
258
  "clip_ratio/low_mean": 0.0,
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
- "completion_length": 63.98,
262
  "completions/clipped_ratio": 0.0,
263
- "completions/max_length": 85.33333333333333,
264
- "completions/max_terminated_length": 85.33333333333333,
265
- "completions/mean_length": 55.046875,
266
- "completions/mean_terminated_length": 55.046875,
267
- "completions/min_length": 29.666666666666668,
268
- "completions/min_terminated_length": 29.666666666666668,
269
- "epoch": 0.000888086847789019,
270
- "frac_reward_zero_std": 0.7916666666666666,
271
- "grad_norm": 0.0006704159895889461,
272
  "kl": 0.0,
273
- "learning_rate": 2.7696987189449615e-07,
274
- "loss": 0.0006217561289668084,
275
- "num_tokens": 464766.0,
276
- "reward": -4.967053731282552e-09,
277
- "reward_std": 0.1608196645975113,
278
- "rewards/TranscriptCorrectionGrpoReward/mean": 4.967053731282552e-09,
279
- "rewards/TranscriptCorrectionGrpoReward/std": 0.6719369093577067,
280
  "step": 500
281
  },
282
  {
283
- "eval_cer_subset": 0.013239532952269286,
284
- "eval_cer_subset_edit_distance": 813,
285
  "eval_cer_subset_groups": 250,
286
  "eval_cer_subset_items": 250,
287
  "eval_cer_subset_ref_chars": 61407,
288
- "eval_loss": 0.03968957871396896,
289
  "step": 507
290
  },
291
  {
292
- "clip_ratio/high_max": 0.00010526316240429879,
293
- "clip_ratio/high_mean": 0.00010526316240429879,
294
- "clip_ratio/low_mean": 5.917159840464592e-05,
295
- "clip_ratio/low_min": 5.917159840464592e-05,
296
- "clip_ratio/region_mean": 0.0001644347608089447,
297
- "completion_length": 65.61,
298
  "completions/clipped_ratio": 0.0,
299
- "completions/max_length": 114.66666666666667,
300
- "completions/max_terminated_length": 114.66666666666667,
301
- "completions/mean_length": 71.984375,
302
- "completions/mean_terminated_length": 71.984375,
303
- "completions/min_length": 37.0,
304
- "completions/min_terminated_length": 37.0,
305
- "epoch": 0.000976895532567921,
306
- "frac_reward_zero_std": 0.7083333333333334,
307
- "grad_norm": 0.008066101931035519,
308
  "kl": 0.0,
309
- "learning_rate": 3.0472236406829335e-07,
310
- "loss": -3.333181142807007e-05,
311
- "num_tokens": 513963.0,
312
- "reward": 0.0,
313
- "reward_std": 0.36842172344525653,
314
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
315
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
316
  "step": 550
317
  },
318
  {
319
  "clip_ratio/high_max": 0.0,
320
  "clip_ratio/high_mean": 0.0,
321
- "clip_ratio/low_mean": 9.615384973585606e-05,
322
- "clip_ratio/low_min": 9.615384973585606e-05,
323
- "clip_ratio/region_mean": 9.615384973585606e-05,
324
- "completion_length": 73.1,
325
  "completions/clipped_ratio": 0.0,
326
- "completions/max_length": 108.33333333333333,
327
- "completions/max_terminated_length": 108.33333333333333,
328
- "completions/mean_length": 71.40104166666667,
329
- "completions/mean_terminated_length": 71.40104166666667,
330
- "completions/min_length": 47.0,
331
- "completions/min_terminated_length": 47.0,
332
- "epoch": 0.0010657042173468228,
333
- "frac_reward_zero_std": 0.4583333333333333,
334
- "grad_norm": 0.00526217557489872,
335
  "kl": 0.0,
336
- "learning_rate": 3.3247485624209054e-07,
337
- "loss": 0.00033511649817228315,
338
- "num_tokens": 562392.0,
339
- "reward": -4.967053731282552e-09,
340
- "reward_std": 0.35033048689365387,
341
- "rewards/TranscriptCorrectionGrpoReward/mean": -2.483526865641276e-09,
342
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
343
  "step": 600
344
  },
345
  {
346
  "clip_ratio/high_max": 0.0,
347
  "clip_ratio/high_mean": 0.0,
348
- "clip_ratio/low_mean": 6.493506487458944e-05,
349
- "clip_ratio/low_min": 6.493506487458944e-05,
350
- "clip_ratio/region_mean": 6.493506487458944e-05,
351
- "completion_length": 71.48,
352
  "completions/clipped_ratio": 0.0,
353
- "completions/max_length": 111.66666666666667,
354
- "completions/max_terminated_length": 111.66666666666667,
355
- "completions/mean_length": 71.27604166666667,
356
- "completions/mean_terminated_length": 71.27604166666667,
357
- "completions/min_length": 39.666666666666664,
358
- "completions/min_terminated_length": 39.666666666666664,
359
- "epoch": 0.0011545129021257247,
360
- "frac_reward_zero_std": 0.5416666666666666,
361
- "grad_norm": 0.02180999144911766,
362
  "kl": 0.0,
363
- "learning_rate": 3.602273484158878e-07,
364
- "loss": -0.0010874558240175248,
365
- "num_tokens": 610477.0,
366
- "reward": -4.967053731282552e-09,
367
- "reward_std": 0.29229696094989777,
368
- "rewards/TranscriptCorrectionGrpoReward/mean": -4.967053731282552e-09,
369
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
370
  "step": 650
371
  },
372
  {
@@ -375,304 +375,304 @@
375
  "clip_ratio/low_mean": 0.0,
376
  "clip_ratio/low_min": 0.0,
377
  "clip_ratio/region_mean": 0.0,
378
- "completion_length": 68.335,
379
  "completions/clipped_ratio": 0.0,
380
- "completions/max_length": 106.66666666666667,
381
- "completions/max_terminated_length": 106.66666666666667,
382
- "completions/mean_length": 67.78645833333333,
383
- "completions/mean_terminated_length": 67.78645833333333,
384
- "completions/min_length": 34.0,
385
- "completions/min_terminated_length": 34.0,
386
- "epoch": 0.0012433215869046266,
387
- "frac_reward_zero_std": 0.5,
388
- "grad_norm": 0.03915110602974892,
389
  "kl": 0.0,
390
- "learning_rate": 3.87979840589685e-07,
391
- "loss": -0.0011869536340236664,
392
- "num_tokens": 657028.0,
393
- "reward": 2.483526865641276e-09,
394
- "reward_std": 0.5572122434775034,
395
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.483526865641276e-09,
396
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
397
  "step": 700
398
  },
399
  {
400
- "clip_ratio/high_max": 0.0,
401
- "clip_ratio/high_mean": 0.0,
402
  "clip_ratio/low_mean": 0.0,
403
  "clip_ratio/low_min": 0.0,
404
- "clip_ratio/region_mean": 0.0,
405
- "completion_length": 67.125,
406
  "completions/clipped_ratio": 0.0,
407
- "completions/max_length": 105.33333333333333,
408
- "completions/max_terminated_length": 105.33333333333333,
409
- "completions/mean_length": 67.28645833333333,
410
- "completions/mean_terminated_length": 67.28645833333333,
411
- "completions/min_length": 35.333333333333336,
412
- "completions/min_terminated_length": 35.333333333333336,
413
- "epoch": 0.0013321302716835285,
414
- "frac_reward_zero_std": 0.6666666666666666,
415
- "grad_norm": 0.048128049820661545,
416
  "kl": 0.0,
417
- "learning_rate": 4.1573233276348217e-07,
418
- "loss": 0.0027270379662513735,
419
- "num_tokens": 702891.0,
420
- "reward": 2.483526865641276e-09,
421
- "reward_std": 0.4348108967145284,
422
- "rewards/TranscriptCorrectionGrpoReward/mean": -6.829698880513509e-09,
423
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
424
  "step": 750
425
  },
426
  {
427
  "clip_ratio/high_max": 0.0,
428
  "clip_ratio/high_mean": 0.0,
429
- "clip_ratio/low_mean": 0.00010526316240429879,
430
- "clip_ratio/low_min": 0.00010526316240429879,
431
- "clip_ratio/region_mean": 0.00010526316240429879,
432
- "completion_length": 67.81,
433
  "completions/clipped_ratio": 0.0,
434
- "completions/max_length": 98.66666666666667,
435
- "completions/max_terminated_length": 98.66666666666667,
436
- "completions/mean_length": 67.61458333333333,
437
- "completions/mean_terminated_length": 67.61458333333333,
438
- "completions/min_length": 34.333333333333336,
439
- "completions/min_terminated_length": 34.333333333333336,
440
- "epoch": 0.0014209389564624304,
441
- "frac_reward_zero_std": 0.5833333333333334,
442
- "grad_norm": 0.028703227639198303,
443
  "kl": 0.0,
444
- "learning_rate": 4.434848249372794e-07,
445
- "loss": -0.0013799819350242615,
446
- "num_tokens": 749081.0,
447
- "reward": -1.4901161193847656e-08,
448
- "reward_std": 0.40635913610458374,
449
- "rewards/TranscriptCorrectionGrpoReward/mean": -1.4901161193847656e-08,
450
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
451
  "step": 800
452
  },
453
  {
454
  "clip_ratio/high_max": 0.0,
455
  "clip_ratio/high_mean": 0.0,
456
- "clip_ratio/low_mean": 8.333333767950535e-05,
457
- "clip_ratio/low_min": 8.333333767950535e-05,
458
- "clip_ratio/region_mean": 8.333333767950535e-05,
459
- "completion_length": 73.535,
460
  "completions/clipped_ratio": 0.0,
461
- "completions/max_length": 113.5,
462
- "completions/max_terminated_length": 113.5,
463
- "completions/mean_length": 75.88671875,
464
- "completions/mean_terminated_length": 75.88671875,
465
- "completions/min_length": 36.5,
466
- "completions/min_terminated_length": 36.5,
467
- "epoch": 0.0015097476412413323,
468
- "frac_reward_zero_std": 0.59375,
469
- "grad_norm": 0.002819702262058854,
470
  "kl": 0.0,
471
- "learning_rate": 4.712373171110766e-07,
472
- "loss": -0.0007266353815793992,
473
- "num_tokens": 818284.0,
474
- "reward": 9.313225746154785e-09,
475
- "reward_std": 0.472036711871624,
476
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.1175870895385742e-08,
477
- "rewards/TranscriptCorrectionGrpoReward/std": 1.007905274629593,
478
  "step": 850
479
  },
480
  {
481
- "clip_ratio/high_max": 4.291845485568046e-05,
482
- "clip_ratio/high_mean": 4.291845485568046e-05,
483
- "clip_ratio/low_mean": 4.32900432497263e-05,
484
- "clip_ratio/low_min": 4.32900432497263e-05,
485
- "clip_ratio/region_mean": 8.620849810540676e-05,
486
- "completion_length": 76.735,
487
  "completions/clipped_ratio": 0.0,
488
- "completions/max_length": 123.0,
489
- "completions/max_terminated_length": 123.0,
490
- "completions/mean_length": 73.05208333333333,
491
- "completions/mean_terminated_length": 73.05208333333333,
492
- "completions/min_length": 38.666666666666664,
493
- "completions/min_terminated_length": 38.666666666666664,
494
- "epoch": 0.0015985563260202341,
495
- "frac_reward_zero_std": 0.5416666666666666,
496
- "grad_norm": 0.0,
497
  "kl": 0.0,
498
- "learning_rate": 4.989898092848738e-07,
499
- "loss": 0.0014878523349761963,
500
- "num_tokens": 868206.0,
501
- "reward": 1.241763432820638e-09,
502
- "reward_std": 0.4125714997450511,
503
- "rewards/TranscriptCorrectionGrpoReward/mean": 4.967053731282552e-09,
504
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052050908406,
505
  "step": 900
506
  },
507
  {
508
  "clip_ratio/high_max": 0.0,
509
  "clip_ratio/high_mean": 0.0,
510
- "clip_ratio/low_mean": 5.917159840464592e-05,
511
- "clip_ratio/low_min": 5.917159840464592e-05,
512
- "clip_ratio/region_mean": 5.917159840464592e-05,
513
- "completion_length": 69.03,
514
  "completions/clipped_ratio": 0.0,
515
- "completions/max_length": 106.33333333333333,
516
- "completions/max_terminated_length": 106.33333333333333,
517
- "completions/mean_length": 66.10416666666667,
518
- "completions/mean_terminated_length": 66.10416666666667,
519
- "completions/min_length": 36.666666666666664,
520
- "completions/min_terminated_length": 36.666666666666664,
521
- "epoch": 0.001687365010799136,
522
- "frac_reward_zero_std": 0.625,
523
- "grad_norm": 0.0075177694670856,
524
  "kl": 0.0,
525
- "learning_rate": 5.26742301458671e-07,
526
- "loss": -0.002387867271900177,
527
- "num_tokens": 913738.0,
528
- "reward": 0.0,
529
- "reward_std": 0.4860446055730184,
530
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.483526865641276e-09,
531
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
532
  "step": 950
533
  },
534
  {
535
  "clip_ratio/high_max": 0.0,
536
  "clip_ratio/high_mean": 0.0,
537
- "clip_ratio/low_mean": 0.0,
538
- "clip_ratio/low_min": 0.0,
539
- "clip_ratio/region_mean": 0.0,
540
- "completion_length": 63.8,
541
  "completions/clipped_ratio": 0.0,
542
- "completions/max_length": 106.33333333333333,
543
- "completions/max_terminated_length": 106.33333333333333,
544
- "completions/mean_length": 65.10416666666667,
545
- "completions/mean_terminated_length": 65.10416666666667,
546
- "completions/min_length": 28.0,
547
- "completions/min_terminated_length": 28.0,
548
- "epoch": 0.001776173695578038,
549
- "frac_reward_zero_std": 0.5416666666666666,
550
- "grad_norm": 0.02761891484260559,
551
  "kl": 0.0,
552
- "learning_rate": 5.544947936324682e-07,
553
- "loss": 0.0017396166920661926,
554
- "num_tokens": 958582.0,
555
- "reward": 0.0,
556
- "reward_std": 0.4594339330991109,
557
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.483526865641276e-09,
558
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079053243001301,
559
  "step": 1000
560
  },
561
  {
562
- "eval_cer_subset": 0.013044115491719185,
563
- "eval_cer_subset_edit_distance": 801,
564
  "eval_cer_subset_groups": 250,
565
  "eval_cer_subset_items": 250,
566
  "eval_cer_subset_ref_chars": 61407,
567
- "eval_loss": 0.03965790307253722,
568
  "step": 1014
569
  },
570
  {
571
  "clip_ratio/high_max": 0.0,
572
  "clip_ratio/high_mean": 0.0,
573
- "clip_ratio/low_mean": 7.633587811142206e-05,
574
- "clip_ratio/low_min": 7.633587811142206e-05,
575
- "clip_ratio/region_mean": 7.633587811142206e-05,
576
- "completion_length": 60.39,
577
  "completions/clipped_ratio": 0.0,
578
- "completions/max_length": 106.33333333333333,
579
- "completions/max_terminated_length": 106.33333333333333,
580
- "completions/mean_length": 60.489583333333336,
581
- "completions/mean_terminated_length": 60.489583333333336,
582
- "completions/min_length": 35.666666666666664,
583
- "completions/min_terminated_length": 35.666666666666664,
584
- "epoch": 0.0018649823803569398,
585
- "frac_reward_zero_std": 0.5833333333333334,
586
- "grad_norm": 0.0038077128119766712,
587
  "kl": 0.0,
588
- "learning_rate": 5.822472858062655e-07,
589
- "loss": -0.00032259501516819,
590
- "num_tokens": 1000028.0,
591
- "reward": 9.934107462565104e-09,
592
- "reward_std": 0.44259731968243915,
593
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
594
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
595
  "step": 1050
596
  },
597
  {
598
  "clip_ratio/high_max": 0.0,
599
  "clip_ratio/high_mean": 0.0,
600
- "clip_ratio/low_mean": 6.211180239915847e-05,
601
- "clip_ratio/low_min": 6.211180239915847e-05,
602
- "clip_ratio/region_mean": 6.211180239915847e-05,
603
- "completion_length": 62.45,
604
  "completions/clipped_ratio": 0.0,
605
- "completions/max_length": 102.66666666666667,
606
- "completions/max_terminated_length": 102.66666666666667,
607
- "completions/mean_length": 62.046875,
608
- "completions/mean_terminated_length": 62.046875,
609
- "completions/min_length": 38.0,
610
- "completions/min_terminated_length": 38.0,
611
- "epoch": 0.001953791065135842,
612
- "frac_reward_zero_std": 0.5416666666666666,
613
- "grad_norm": 0.010413075797259808,
614
  "kl": 0.0,
615
- "learning_rate": 6.099997779800627e-07,
616
- "loss": 0.0010119295865297319,
617
- "num_tokens": 1042829.0,
618
- "reward": 1.241763432820638e-09,
619
- "reward_std": 0.5440023342768351,
620
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.483526865641276e-09,
621
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
622
  "step": 1100
623
  },
624
  {
625
  "clip_ratio/high_max": 0.0,
626
  "clip_ratio/high_mean": 0.0,
627
- "clip_ratio/low_mean": 5.3191487677395346e-05,
628
- "clip_ratio/low_min": 5.3191487677395346e-05,
629
- "clip_ratio/region_mean": 5.3191487677395346e-05,
630
- "completion_length": 67.07,
631
  "completions/clipped_ratio": 0.0,
632
- "completions/max_length": 104.66666666666667,
633
- "completions/max_terminated_length": 104.66666666666667,
634
- "completions/mean_length": 67.171875,
635
- "completions/mean_terminated_length": 67.171875,
636
- "completions/min_length": 35.666666666666664,
637
- "completions/min_terminated_length": 35.666666666666664,
638
- "epoch": 0.0020425997499147436,
639
- "frac_reward_zero_std": 0.4583333333333333,
640
- "grad_norm": 0.0493728443980217,
641
  "kl": 0.0,
642
- "learning_rate": 6.377522701538599e-07,
643
- "loss": -0.00021354854106903076,
644
- "num_tokens": 1088622.0,
645
- "reward": -1.7074247201283772e-08,
646
- "reward_std": 0.5459979971249899,
647
- "rewards/TranscriptCorrectionGrpoReward/mean": -2.0489096641540527e-08,
648
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
649
  "step": 1150
650
  },
651
  {
652
- "clip_ratio/high_max": 6.211180239915847e-05,
653
- "clip_ratio/high_mean": 6.211180239915847e-05,
654
- "clip_ratio/low_mean": 4.999999888241291e-05,
655
- "clip_ratio/low_min": 4.999999888241291e-05,
656
- "clip_ratio/region_mean": 0.00011211180128157139,
657
- "completion_length": 66.76,
658
  "completions/clipped_ratio": 0.0,
659
- "completions/max_length": 100.33333333333333,
660
- "completions/max_terminated_length": 100.33333333333333,
661
- "completions/mean_length": 67.109375,
662
- "completions/mean_terminated_length": 67.109375,
663
- "completions/min_length": 40.666666666666664,
664
- "completions/min_terminated_length": 40.666666666666664,
665
- "epoch": 0.0021314084346936457,
666
- "frac_reward_zero_std": 0.6666666666666666,
667
- "grad_norm": 0.0007921307114884257,
668
  "kl": 0.0,
669
- "learning_rate": 6.655047623276571e-07,
670
- "loss": -0.0006299776583909988,
671
- "num_tokens": 1134523.0,
672
- "reward": 4.967053731282552e-09,
673
- "reward_std": 0.24005796015262604,
674
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.4901161193847656e-08,
675
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
676
  "step": 1200
677
  },
678
  {
@@ -681,175 +681,175 @@
681
  "clip_ratio/low_mean": 0.0,
682
  "clip_ratio/low_min": 0.0,
683
  "clip_ratio/region_mean": 0.0,
684
- "completion_length": 66.65,
685
  "completions/clipped_ratio": 0.0,
686
- "completions/max_length": 108.5,
687
- "completions/max_terminated_length": 108.5,
688
- "completions/mean_length": 64.26953125,
689
- "completions/mean_terminated_length": 64.26953125,
690
- "completions/min_length": 33.5,
691
- "completions/min_terminated_length": 33.5,
692
- "epoch": 0.0022202171194725473,
693
- "frac_reward_zero_std": 0.6875,
694
- "grad_norm": 0.0,
695
  "kl": 0.0,
696
- "learning_rate": 6.932572545014544e-07,
697
- "loss": -0.000320572704076767,
698
- "num_tokens": 1193424.0,
699
- "reward": 3.725290298461914e-09,
700
- "reward_std": 0.4126547574996948,
701
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.7939677238464355e-09,
702
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
703
  "step": 1250
704
  },
705
  {
706
  "clip_ratio/high_max": 0.0,
707
  "clip_ratio/high_mean": 0.0,
708
- "clip_ratio/low_mean": 0.00013079616706818342,
709
- "clip_ratio/low_min": 0.00013079616706818342,
710
- "clip_ratio/region_mean": 0.00013079616706818342,
711
- "completion_length": 65.935,
712
  "completions/clipped_ratio": 0.0,
713
- "completions/max_length": 100.33333333333333,
714
- "completions/max_terminated_length": 100.33333333333333,
715
- "completions/mean_length": 66.515625,
716
- "completions/mean_terminated_length": 66.515625,
717
- "completions/min_length": 35.333333333333336,
718
- "completions/min_terminated_length": 35.333333333333336,
719
- "epoch": 0.0023090258042514494,
720
- "frac_reward_zero_std": 0.625,
721
- "grad_norm": 0.009241115301847458,
722
  "kl": 0.0,
723
- "learning_rate": 7.210097466752515e-07,
724
- "loss": -0.000618676021695137,
725
- "num_tokens": 1239227.0,
726
  "reward": 0.0,
727
- "reward_std": 0.4066670338312785,
728
  "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
729
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
730
  "step": 1300
731
  },
732
  {
733
  "clip_ratio/high_max": 0.0,
734
  "clip_ratio/high_mean": 0.0,
735
- "clip_ratio/low_mean": 5.586592014878988e-05,
736
- "clip_ratio/low_min": 5.586592014878988e-05,
737
- "clip_ratio/region_mean": 5.586592014878988e-05,
738
- "completion_length": 61.28,
739
  "completions/clipped_ratio": 0.0,
740
- "completions/max_length": 106.66666666666667,
741
- "completions/max_terminated_length": 106.66666666666667,
742
- "completions/mean_length": 65.02083333333333,
743
- "completions/mean_terminated_length": 65.02083333333333,
744
- "completions/min_length": 33.666666666666664,
745
- "completions/min_terminated_length": 33.666666666666664,
746
- "epoch": 0.002397834489030351,
747
- "frac_reward_zero_std": 0.6666666666666666,
748
- "grad_norm": 0.010475926101207733,
749
  "kl": 0.0,
750
- "learning_rate": 7.487622388490487e-07,
751
- "loss": 0.0006281441450119019,
752
- "num_tokens": 1283967.0,
753
- "reward": 9.934107462565104e-09,
754
- "reward_std": 0.3140339305003484,
755
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.241763432820638e-09,
756
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
757
  "step": 1350
758
  },
759
  {
760
  "clip_ratio/high_max": 0.0,
761
  "clip_ratio/high_mean": 0.0,
762
- "clip_ratio/low_mean": 0.00010416666977107525,
763
- "clip_ratio/low_min": 0.00010416666977107525,
764
- "clip_ratio/region_mean": 0.00010416666977107525,
765
- "completion_length": 64.255,
766
  "completions/clipped_ratio": 0.0,
767
- "completions/max_length": 96.66666666666667,
768
- "completions/max_terminated_length": 96.66666666666667,
769
- "completions/mean_length": 61.161458333333336,
770
- "completions/mean_terminated_length": 61.161458333333336,
771
- "completions/min_length": 34.333333333333336,
772
- "completions/min_terminated_length": 34.333333333333336,
773
- "epoch": 0.002486643173809253,
774
- "frac_reward_zero_std": 0.7083333333333334,
775
- "grad_norm": 0.02102189138531685,
776
  "kl": 0.0,
777
- "learning_rate": 7.765147310228459e-07,
778
- "loss": -0.0013700899481773375,
779
- "num_tokens": 1326542.0,
780
- "reward": 2.483526865641276e-09,
781
- "reward_std": 0.38102721174558,
782
- "rewards/TranscriptCorrectionGrpoReward/mean": 7.450580596923828e-09,
783
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
784
  "step": 1400
785
  },
786
  {
787
  "clip_ratio/high_max": 0.0,
788
  "clip_ratio/high_mean": 0.0,
789
- "clip_ratio/low_mean": 8.771929889917374e-05,
790
- "clip_ratio/low_min": 8.771929889917374e-05,
791
- "clip_ratio/region_mean": 8.771929889917374e-05,
792
- "completion_length": 65.215,
793
  "completions/clipped_ratio": 0.0,
794
- "completions/max_length": 112.0,
795
- "completions/max_terminated_length": 112.0,
796
- "completions/mean_length": 67.92708333333333,
797
- "completions/mean_terminated_length": 67.92708333333333,
798
- "completions/min_length": 33.666666666666664,
799
- "completions/min_terminated_length": 33.666666666666664,
800
- "epoch": 0.002575451858588155,
801
- "frac_reward_zero_std": 0.8333333333333334,
802
  "grad_norm": 0.0,
803
  "kl": 0.0,
804
- "learning_rate": 8.042672231966431e-07,
805
- "loss": 0.001429552286863327,
806
- "num_tokens": 1373424.0,
807
- "reward": 0.0,
808
- "reward_std": 0.2193582976857821,
809
- "rewards/TranscriptCorrectionGrpoReward/mean": 4.967053731282552e-09,
810
- "rewards/TranscriptCorrectionGrpoReward/std": 0.671936829884847,
811
  "step": 1450
812
  },
813
  {
814
  "clip_ratio/high_max": 0.0,
815
  "clip_ratio/high_mean": 0.0,
816
- "clip_ratio/low_mean": 0.00015233600046485663,
817
- "clip_ratio/low_min": 0.00015233600046485663,
818
- "clip_ratio/region_mean": 0.00015233600046485663,
819
- "completion_length": 69.035,
820
  "completions/clipped_ratio": 0.0,
821
- "completions/max_length": 94.0,
822
- "completions/max_terminated_length": 94.0,
823
- "completions/mean_length": 69.44791666666667,
824
- "completions/mean_terminated_length": 69.44791666666667,
825
- "completions/min_length": 41.0,
826
- "completions/min_terminated_length": 41.0,
827
- "epoch": 0.002664260543367057,
828
- "frac_reward_zero_std": 0.625,
829
- "grad_norm": 0.001688106800429523,
830
  "kl": 0.0,
831
- "learning_rate": 8.320197153704403e-07,
832
- "loss": -0.0005409684777259826,
833
- "num_tokens": 1421518.0,
834
- "reward": 1.4901161193847656e-08,
835
- "reward_std": 0.3605746428171794,
836
- "rewards/TranscriptCorrectionGrpoReward/mean": 2.483526865641276e-08,
837
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
838
  "step": 1500
839
  },
840
  {
841
- "eval_cer_subset": 0.012653280570618985,
842
- "eval_cer_subset_edit_distance": 777,
843
  "eval_cer_subset_groups": 250,
844
  "eval_cer_subset_items": 250,
845
  "eval_cer_subset_ref_chars": 61407,
846
- "eval_loss": 0.03984795692112765,
847
  "step": 1521
848
  }
849
  ],
850
  "logging_steps": 50,
851
- "max_steps": 2252032,
852
- "num_input_tokens_seen": 1452552,
853
  "num_train_epochs": 4,
854
  "save_steps": 507,
855
  "stateful_callbacks": {
@@ -865,7 +865,7 @@
865
  }
866
  },
867
  "total_flos": 0.0,
868
- "train_batch_size": 2,
869
  "trial_name": null,
870
  "trial_params": null
871
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0027015505941279908,
6
  "eval_steps": 500,
7
  "global_step": 1521,
8
  "is_hyper_param_search": false,
 
15
  "clip_ratio/low_mean": 0.0,
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
+ "completion_length": 65.68,
19
  "completions/clipped_ratio": 0.0,
20
+ "completions/max_length": 65.88,
21
+ "completions/max_terminated_length": 65.88,
22
+ "completions/mean_length": 65.68,
23
+ "completions/mean_terminated_length": 65.68,
24
+ "completions/min_length": 65.48,
25
+ "completions/min_terminated_length": 65.48,
26
+ "epoch": 8.88083693007229e-05,
27
+ "frac_reward_zero_std": 0.56,
28
+ "grad_norm": 0.5719226598739624,
29
  "kl": 0.0,
30
+ "learning_rate": 5.439488466064254e-09,
31
+ "loss": -0.000577671229839325,
32
+ "num_tokens": 46552.0,
33
+ "reward": 2.9802322387695314e-09,
34
+ "reward_std": 0.47037978172302247,
35
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.9802322387695314e-09,
36
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
37
  "step": 50
38
  },
39
  {
 
42
  "clip_ratio/low_mean": 0.0,
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
+ "completion_length": 53.46,
46
  "completions/clipped_ratio": 0.0,
47
+ "completions/max_length": 53.6,
48
+ "completions/max_terminated_length": 53.6,
49
+ "completions/mean_length": 53.46,
50
+ "completions/mean_terminated_length": 53.46,
51
+ "completions/min_length": 53.24,
52
+ "completions/min_terminated_length": 53.24,
53
+ "epoch": 0.0001776167386014458,
54
+ "frac_reward_zero_std": 0.64,
55
  "grad_norm": 0.0,
56
  "kl": 0.0,
57
+ "learning_rate": 1.0989986900823695e-08,
58
+ "loss": -0.0009160846471786499,
59
+ "num_tokens": 85556.0,
60
+ "reward": 3.874301910400391e-09,
61
+ "reward_std": 0.38485618114471437,
62
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.874301910400391e-09,
63
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
64
  "step": 100
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
68
  "clip_ratio/high_mean": 0.0,
69
+ "clip_ratio/low_mean": 0.00011904762126505375,
70
+ "clip_ratio/low_min": 0.00011904762126505375,
71
+ "clip_ratio/region_mean": 0.00011904762126505375,
72
+ "completion_length": 62.48,
73
  "completions/clipped_ratio": 0.0,
74
+ "completions/max_length": 62.6,
75
+ "completions/max_terminated_length": 62.6,
76
+ "completions/mean_length": 62.48,
77
+ "completions/mean_terminated_length": 62.48,
78
+ "completions/min_length": 62.24,
79
+ "completions/min_terminated_length": 62.24,
80
+ "epoch": 0.0002664251079021687,
81
+ "frac_reward_zero_std": 0.56,
82
+ "grad_norm": 0.2554231286048889,
83
  "kl": 0.0,
84
+ "learning_rate": 1.6540485335583138e-08,
85
+ "loss": -0.0009111672639846802,
86
+ "num_tokens": 130380.0,
87
+ "reward": -1.1920928955078125e-09,
88
+ "reward_std": 0.4703797769546509,
89
+ "rewards/TranscriptCorrectionGrpoReward/mean": -1.1920928955078125e-09,
90
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.47037978172302247,
91
  "step": 150
92
  },
93
  {
 
96
  "clip_ratio/low_mean": 0.0,
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
+ "completion_length": 62.175,
100
  "completions/clipped_ratio": 0.0,
101
+ "completions/max_length": 62.36,
102
+ "completions/max_terminated_length": 62.36,
103
+ "completions/mean_length": 62.175,
104
+ "completions/mean_terminated_length": 62.175,
105
+ "completions/min_length": 61.88,
106
+ "completions/min_terminated_length": 61.88,
107
+ "epoch": 0.0003552334772028916,
108
+ "frac_reward_zero_std": 0.56,
109
+ "grad_norm": 0.627755343914032,
110
  "kl": 0.0,
111
+ "learning_rate": 2.2090983770342578e-08,
112
+ "loss": -0.0005262196063995361,
113
+ "num_tokens": 174959.0,
114
+ "reward": 1.1920928955078125e-09,
115
+ "reward_std": 0.47037978172302247,
116
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.1920928955078125e-09,
117
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
118
  "step": 200
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
122
  "clip_ratio/high_mean": 0.0,
123
+ "clip_ratio/low_mean": 0.00016129031777381897,
124
+ "clip_ratio/low_min": 0.00016129031777381897,
125
+ "clip_ratio/region_mean": 0.00016129031777381897,
126
+ "completion_length": 65.835,
127
  "completions/clipped_ratio": 0.0,
128
+ "completions/max_length": 66.08,
129
+ "completions/max_terminated_length": 66.08,
130
+ "completions/mean_length": 65.835,
131
+ "completions/mean_terminated_length": 65.835,
132
+ "completions/min_length": 65.6,
133
+ "completions/min_terminated_length": 65.6,
134
+ "epoch": 0.0004440418465036145,
135
+ "frac_reward_zero_std": 0.52,
136
+ "grad_norm": 0.0,
137
  "kl": 0.0,
138
+ "learning_rate": 2.764148220510202e-08,
139
+ "loss": -0.00036669328808784483,
140
+ "num_tokens": 222270.0,
141
+ "reward": 3.874301910400391e-09,
142
+ "reward_std": 0.5131415796279907,
143
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.874301910400391e-09,
144
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5131415748596191,
145
  "step": 250
146
  },
147
  {
 
150
  "clip_ratio/low_mean": 0.0,
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
+ "completion_length": 68.75,
154
  "completions/clipped_ratio": 0.0,
155
+ "completions/max_length": 69.04,
156
+ "completions/max_terminated_length": 69.04,
157
+ "completions/mean_length": 68.75,
158
+ "completions/mean_terminated_length": 68.75,
159
+ "completions/min_length": 68.56,
160
+ "completions/min_terminated_length": 68.56,
161
+ "epoch": 0.0005328502158043374,
162
+ "frac_reward_zero_std": 0.6,
163
+ "grad_norm": 0.0,
164
  "kl": 0.0,
165
+ "learning_rate": 3.3191980639861464e-08,
166
+ "loss": 0.0001394149661064148,
167
+ "num_tokens": 271740.0,
168
+ "reward": 6.258487701416015e-09,
169
+ "reward_std": 0.4276179838180542,
170
+ "rewards/TranscriptCorrectionGrpoReward/mean": 6.258487701416015e-09,
171
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
172
  "step": 300
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
176
  "clip_ratio/high_mean": 0.0,
177
+ "clip_ratio/low_mean": 0.0,
178
+ "clip_ratio/low_min": 0.0,
179
+ "clip_ratio/region_mean": 0.0,
180
+ "completion_length": 61.07,
181
  "completions/clipped_ratio": 0.0,
182
+ "completions/max_length": 61.2,
183
+ "completions/max_terminated_length": 61.2,
184
+ "completions/mean_length": 61.07,
185
+ "completions/mean_terminated_length": 61.07,
186
+ "completions/min_length": 60.96,
187
+ "completions/min_terminated_length": 60.96,
188
+ "epoch": 0.0006216585851050603,
189
+ "frac_reward_zero_std": 0.72,
190
+ "grad_norm": 0.0,
191
  "kl": 0.0,
192
+ "learning_rate": 3.8742479074620904e-08,
193
+ "loss": -5.508854985237121e-05,
194
+ "num_tokens": 316354.0,
195
+ "reward": 2.384185791015625e-09,
196
+ "reward_std": 0.29933258533477786,
197
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.384185791015625e-09,
198
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.2993325901031494,
199
  "step": 350
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
203
  "clip_ratio/high_mean": 0.0,
204
+ "clip_ratio/low_mean": 0.0,
205
+ "clip_ratio/low_min": 0.0,
206
+ "clip_ratio/region_mean": 0.0,
207
+ "completion_length": 64.22,
208
  "completions/clipped_ratio": 0.0,
209
+ "completions/max_length": 64.6,
210
+ "completions/max_terminated_length": 64.6,
211
+ "completions/mean_length": 64.22,
212
+ "completions/mean_terminated_length": 64.22,
213
+ "completions/min_length": 63.92,
214
+ "completions/min_terminated_length": 63.92,
215
+ "epoch": 0.0007104669544057832,
216
+ "frac_reward_zero_std": 0.56,
217
+ "grad_norm": 0.0,
218
  "kl": 0.0,
219
+ "learning_rate": 4.429297750938035e-08,
220
+ "loss": -0.0010024748742580413,
221
+ "num_tokens": 362550.0,
222
+ "reward": 5.960464477539063e-10,
223
+ "reward_std": 0.4703797769546509,
224
+ "rewards/TranscriptCorrectionGrpoReward/mean": 5.960464477539063e-10,
225
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
226
  "step": 400
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
230
  "clip_ratio/high_mean": 0.0,
231
+ "clip_ratio/low_mean": 0.0,
232
+ "clip_ratio/low_min": 0.0,
233
+ "clip_ratio/region_mean": 0.0,
234
+ "completion_length": 65.25,
235
  "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 65.36,
237
+ "completions/max_terminated_length": 65.36,
238
+ "completions/mean_length": 65.25,
239
+ "completions/mean_terminated_length": 65.25,
240
+ "completions/min_length": 65.04,
241
+ "completions/min_terminated_length": 65.04,
242
+ "epoch": 0.000799275323706506,
243
+ "frac_reward_zero_std": 0.6,
244
+ "grad_norm": 0.0,
245
  "kl": 0.0,
246
+ "learning_rate": 4.984347594413978e-08,
247
+ "loss": -0.0002909022569656372,
248
+ "num_tokens": 409416.0,
249
+ "reward": 1.7881393432617187e-09,
250
+ "reward_std": 0.4276179838180542,
251
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.7881393432617187e-09,
252
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4276179838180542,
253
  "step": 450
254
  },
255
  {
 
258
  "clip_ratio/low_mean": 0.0,
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
+ "completion_length": 65.76,
262
  "completions/clipped_ratio": 0.0,
263
+ "completions/max_length": 65.92,
264
+ "completions/max_terminated_length": 65.92,
265
+ "completions/mean_length": 65.76,
266
+ "completions/mean_terminated_length": 65.76,
267
+ "completions/min_length": 65.68,
268
+ "completions/min_terminated_length": 65.68,
269
+ "epoch": 0.000888083693007229,
270
+ "frac_reward_zero_std": 0.84,
271
+ "grad_norm": 0.035873379558324814,
272
  "kl": 0.0,
273
+ "learning_rate": 5.5393974378899236e-08,
274
+ "loss": -0.000893859937787056,
275
+ "num_tokens": 456024.0,
276
+ "reward": 0.0,
277
+ "reward_std": 0.17104718685150147,
278
+ "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
279
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.17104719161987306,
280
  "step": 500
281
  },
282
  {
283
+ "eval_cer_subset": 0.012913837184685785,
284
+ "eval_cer_subset_edit_distance": 793,
285
  "eval_cer_subset_groups": 250,
286
  "eval_cer_subset_items": 250,
287
  "eval_cer_subset_ref_chars": 61407,
288
+ "eval_loss": 0.03959455178967374,
289
  "step": 507
290
  },
291
  {
292
+ "clip_ratio/high_max": 8.333333767950535e-05,
293
+ "clip_ratio/high_mean": 8.333333767950535e-05,
294
+ "clip_ratio/low_mean": 0.00014285714365541935,
295
+ "clip_ratio/low_min": 0.00014285714365541935,
296
+ "clip_ratio/region_mean": 0.0002261904813349247,
297
+ "completion_length": 66.25,
298
  "completions/clipped_ratio": 0.0,
299
+ "completions/max_length": 66.64,
300
+ "completions/max_terminated_length": 66.64,
301
+ "completions/mean_length": 66.25,
302
+ "completions/mean_terminated_length": 66.25,
303
+ "completions/min_length": 66.0,
304
+ "completions/min_terminated_length": 66.0,
305
+ "epoch": 0.0009768920623079518,
306
+ "frac_reward_zero_std": 0.64,
307
+ "grad_norm": 0.0,
308
  "kl": 0.0,
309
+ "learning_rate": 6.094447281365867e-08,
310
+ "loss": 0.000100860595703125,
311
+ "num_tokens": 503522.0,
312
+ "reward": 5.960464477539063e-10,
313
+ "reward_std": 0.38485618114471437,
314
+ "rewards/TranscriptCorrectionGrpoReward/mean": 5.960464477539063e-10,
315
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
316
  "step": 550
317
  },
318
  {
319
  "clip_ratio/high_max": 0.0,
320
  "clip_ratio/high_mean": 0.0,
321
+ "clip_ratio/low_mean": 0.0,
322
+ "clip_ratio/low_min": 0.0,
323
+ "clip_ratio/region_mean": 0.0,
324
+ "completion_length": 70.03,
325
  "completions/clipped_ratio": 0.0,
326
+ "completions/max_length": 70.16,
327
+ "completions/max_terminated_length": 70.16,
328
+ "completions/mean_length": 70.03,
329
+ "completions/mean_terminated_length": 70.03,
330
+ "completions/min_length": 69.8,
331
+ "completions/min_terminated_length": 69.8,
332
+ "epoch": 0.0010657004316086747,
333
+ "frac_reward_zero_std": 0.52,
334
+ "grad_norm": 0.0,
335
  "kl": 0.0,
336
+ "learning_rate": 6.649497124841812e-08,
337
+ "loss": -0.00029084362089633944,
338
+ "num_tokens": 553312.0,
339
+ "reward": 2.682209014892578e-09,
340
+ "reward_std": 0.5131415700912476,
341
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.682209014892578e-09,
342
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5131415748596191,
343
  "step": 600
344
  },
345
  {
346
  "clip_ratio/high_max": 0.0,
347
  "clip_ratio/high_mean": 0.0,
348
+ "clip_ratio/low_mean": 0.0,
349
+ "clip_ratio/low_min": 0.0,
350
+ "clip_ratio/region_mean": 0.0,
351
+ "completion_length": 71.26,
352
  "completions/clipped_ratio": 0.0,
353
+ "completions/max_length": 71.76,
354
+ "completions/max_terminated_length": 71.76,
355
+ "completions/mean_length": 71.26,
356
+ "completions/mean_terminated_length": 71.26,
357
+ "completions/min_length": 70.52,
358
+ "completions/min_terminated_length": 70.52,
359
+ "epoch": 0.0011545088009093977,
360
+ "frac_reward_zero_std": 0.44,
361
+ "grad_norm": 0.0,
362
  "kl": 0.0,
363
+ "learning_rate": 7.204546968317756e-08,
364
+ "loss": -0.0016799652576446533,
365
+ "num_tokens": 603180.0,
366
+ "reward": -1.7881393432617187e-09,
367
+ "reward_std": 0.5986651754379273,
368
+ "rewards/TranscriptCorrectionGrpoReward/mean": -1.7881393432617187e-09,
369
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5986651802062988,
370
  "step": 650
371
  },
372
  {
 
375
  "clip_ratio/low_mean": 0.0,
376
  "clip_ratio/low_min": 0.0,
377
  "clip_ratio/region_mean": 0.0,
378
+ "completion_length": 71.675,
379
  "completions/clipped_ratio": 0.0,
380
+ "completions/max_length": 71.88,
381
+ "completions/max_terminated_length": 71.88,
382
+ "completions/mean_length": 71.675,
383
+ "completions/mean_terminated_length": 71.675,
384
+ "completions/min_length": 71.52,
385
+ "completions/min_terminated_length": 71.52,
386
+ "epoch": 0.0012433171702101206,
387
+ "frac_reward_zero_std": 0.6,
388
+ "grad_norm": 0.18659119307994843,
389
  "kl": 0.0,
390
+ "learning_rate": 7.7595968117937e-08,
391
+ "loss": -0.0002913355827331543,
392
+ "num_tokens": 654187.0,
393
+ "reward": 4.172325134277344e-09,
394
+ "reward_std": 0.4276179838180542,
395
+ "rewards/TranscriptCorrectionGrpoReward/mean": 4.172325134277344e-09,
396
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
397
  "step": 700
398
  },
399
  {
400
+ "clip_ratio/high_max": 3.731343196704984e-05,
401
+ "clip_ratio/high_mean": 3.731343196704984e-05,
402
  "clip_ratio/low_mean": 0.0,
403
  "clip_ratio/low_min": 0.0,
404
+ "clip_ratio/region_mean": 3.731343196704984e-05,
405
+ "completion_length": 65.635,
406
  "completions/clipped_ratio": 0.0,
407
+ "completions/max_length": 65.92,
408
+ "completions/max_terminated_length": 65.92,
409
+ "completions/mean_length": 65.635,
410
+ "completions/mean_terminated_length": 65.635,
411
+ "completions/min_length": 65.4,
412
+ "completions/min_terminated_length": 65.4,
413
+ "epoch": 0.0013321255395108435,
414
+ "frac_reward_zero_std": 0.64,
415
+ "grad_norm": 0.32996097207069397,
416
  "kl": 0.0,
417
+ "learning_rate": 8.314646655269643e-08,
418
+ "loss": 0.0007577691972255707,
419
+ "num_tokens": 700882.0,
420
+ "reward": 2.9802322387695314e-09,
421
+ "reward_std": 0.38485618114471437,
422
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.9802322387695314e-09,
423
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
424
  "step": 750
425
  },
426
  {
427
  "clip_ratio/high_max": 0.0,
428
  "clip_ratio/high_mean": 0.0,
429
+ "clip_ratio/low_mean": 0.0,
430
+ "clip_ratio/low_min": 0.0,
431
+ "clip_ratio/region_mean": 0.0,
432
+ "completion_length": 67.45,
433
  "completions/clipped_ratio": 0.0,
434
+ "completions/max_length": 67.64,
435
+ "completions/max_terminated_length": 67.64,
436
+ "completions/mean_length": 67.45,
437
+ "completions/mean_terminated_length": 67.45,
438
+ "completions/min_length": 67.16,
439
+ "completions/min_terminated_length": 67.16,
440
+ "epoch": 0.0014209339088115665,
441
+ "frac_reward_zero_std": 0.36,
442
+ "grad_norm": 0.2260763943195343,
443
  "kl": 0.0,
444
+ "learning_rate": 8.869696498745589e-08,
445
+ "loss": -2.6343166828155516e-05,
446
+ "num_tokens": 749036.0,
447
+ "reward": 3.2782554626464844e-09,
448
+ "reward_std": 0.6841887712478638,
449
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.2782554626464844e-09,
450
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.6841887760162354,
451
  "step": 800
452
  },
453
  {
454
  "clip_ratio/high_max": 0.0,
455
  "clip_ratio/high_mean": 0.0,
456
+ "clip_ratio/low_mean": 0.0001351351384073496,
457
+ "clip_ratio/low_min": 0.0001351351384073496,
458
+ "clip_ratio/region_mean": 0.0001351351384073496,
459
+ "completion_length": 75.2,
460
  "completions/clipped_ratio": 0.0,
461
+ "completions/max_length": 75.36,
462
+ "completions/max_terminated_length": 75.36,
463
+ "completions/mean_length": 75.2,
464
+ "completions/mean_terminated_length": 75.2,
465
+ "completions/min_length": 75.04,
466
+ "completions/min_terminated_length": 75.04,
467
+ "epoch": 0.0015097422781122894,
468
+ "frac_reward_zero_std": 0.52,
469
+ "grad_norm": 0.1280648559331894,
470
  "kl": 0.0,
471
+ "learning_rate": 9.424746342221532e-08,
472
+ "loss": -0.00025221601128578186,
473
+ "num_tokens": 802604.0,
474
+ "reward": 0.0,
475
+ "reward_std": 0.5131415748596191,
476
+ "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
477
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5131415796279907,
478
  "step": 850
479
  },
480
  {
481
+ "clip_ratio/high_max": 0.0,
482
+ "clip_ratio/high_mean": 0.0,
483
+ "clip_ratio/low_mean": 0.0,
484
+ "clip_ratio/low_min": 0.0,
485
+ "clip_ratio/region_mean": 0.0,
486
+ "completion_length": 74.185,
487
  "completions/clipped_ratio": 0.0,
488
+ "completions/max_length": 74.4,
489
+ "completions/max_terminated_length": 74.4,
490
+ "completions/mean_length": 74.185,
491
+ "completions/mean_terminated_length": 74.185,
492
+ "completions/min_length": 73.92,
493
+ "completions/min_terminated_length": 73.92,
494
+ "epoch": 0.001598550647413012,
495
+ "frac_reward_zero_std": 0.48,
496
+ "grad_norm": 0.4582439661026001,
497
  "kl": 0.0,
498
+ "learning_rate": 9.979796185697477e-08,
499
+ "loss": -8.721232414245606e-05,
500
+ "num_tokens": 855385.0,
501
+ "reward": 3.5762786865234374e-09,
502
+ "reward_std": 0.5559033727645875,
503
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.5762786865234374e-09,
504
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5559033727645875,
505
  "step": 900
506
  },
507
  {
508
  "clip_ratio/high_max": 0.0,
509
  "clip_ratio/high_mean": 0.0,
510
+ "clip_ratio/low_mean": 0.0,
511
+ "clip_ratio/low_min": 0.0,
512
+ "clip_ratio/region_mean": 0.0,
513
+ "completion_length": 69.455,
514
  "completions/clipped_ratio": 0.0,
515
+ "completions/max_length": 69.68,
516
+ "completions/max_terminated_length": 69.68,
517
+ "completions/mean_length": 69.455,
518
+ "completions/mean_terminated_length": 69.455,
519
+ "completions/min_length": 69.04,
520
+ "completions/min_terminated_length": 69.04,
521
+ "epoch": 0.001687359016713735,
522
+ "frac_reward_zero_std": 0.6,
523
+ "grad_norm": 0.03906433284282684,
524
  "kl": 0.0,
525
+ "learning_rate": 1.053484602917342e-07,
526
+ "loss": 0.00033147528767585755,
527
+ "num_tokens": 905012.0,
528
+ "reward": 4.172325134277344e-09,
529
+ "reward_std": 0.42761797428131104,
530
+ "rewards/TranscriptCorrectionGrpoReward/mean": 4.172325134277344e-09,
531
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
532
  "step": 950
533
  },
534
  {
535
  "clip_ratio/high_max": 0.0,
536
  "clip_ratio/high_mean": 0.0,
537
+ "clip_ratio/low_mean": 0.0001351351384073496,
538
+ "clip_ratio/low_min": 0.0001351351384073496,
539
+ "clip_ratio/region_mean": 0.0001351351384073496,
540
+ "completion_length": 61.425,
541
  "completions/clipped_ratio": 0.0,
542
+ "completions/max_length": 61.72,
543
+ "completions/max_terminated_length": 61.72,
544
+ "completions/mean_length": 61.425,
545
+ "completions/mean_terminated_length": 61.425,
546
+ "completions/min_length": 61.12,
547
+ "completions/min_terminated_length": 61.12,
548
+ "epoch": 0.001776167386014458,
549
+ "frac_reward_zero_std": 0.56,
550
+ "grad_norm": 1.1754662990570068,
551
  "kl": 0.0,
552
+ "learning_rate": 1.1089895872649365e-07,
553
+ "loss": -0.0008977200835943222,
554
+ "num_tokens": 949385.0,
555
+ "reward": -5.960464477539063e-10,
556
+ "reward_std": 0.47037978172302247,
557
+ "rewards/TranscriptCorrectionGrpoReward/mean": -5.960464477539063e-10,
558
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
559
  "step": 1000
560
  },
561
  {
562
+ "eval_cer_subset": 0.013076685068477535,
563
+ "eval_cer_subset_edit_distance": 803,
564
  "eval_cer_subset_groups": 250,
565
  "eval_cer_subset_items": 250,
566
  "eval_cer_subset_ref_chars": 61407,
567
+ "eval_loss": 0.03962622743110548,
568
  "step": 1014
569
  },
570
  {
571
  "clip_ratio/high_max": 0.0,
572
  "clip_ratio/high_mean": 0.0,
573
+ "clip_ratio/low_mean": 0.0,
574
+ "clip_ratio/low_min": 0.0,
575
+ "clip_ratio/region_mean": 0.0,
576
+ "completion_length": 61.815,
577
  "completions/clipped_ratio": 0.0,
578
+ "completions/max_length": 62.04,
579
+ "completions/max_terminated_length": 62.04,
580
+ "completions/mean_length": 61.815,
581
+ "completions/mean_terminated_length": 61.815,
582
+ "completions/min_length": 61.56,
583
+ "completions/min_terminated_length": 61.56,
584
+ "epoch": 0.001864975755315181,
585
+ "frac_reward_zero_std": 0.6,
586
+ "grad_norm": 0.0,
587
  "kl": 0.0,
588
+ "learning_rate": 1.1644945716125309e-07,
589
+ "loss": 0.0001289863884449005,
590
+ "num_tokens": 993580.0,
591
+ "reward": 1.4901161193847657e-09,
592
+ "reward_std": 0.4276179885864258,
593
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.4901161193847657e-09,
594
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
595
  "step": 1050
596
  },
597
  {
598
  "clip_ratio/high_max": 0.0,
599
  "clip_ratio/high_mean": 0.0,
600
+ "clip_ratio/low_mean": 0.00011363636702299118,
601
+ "clip_ratio/low_min": 0.00011363636702299118,
602
+ "clip_ratio/region_mean": 0.00011363636702299118,
603
+ "completion_length": 62.35,
604
  "completions/clipped_ratio": 0.0,
605
+ "completions/max_length": 62.56,
606
+ "completions/max_terminated_length": 62.56,
607
+ "completions/mean_length": 62.35,
608
+ "completions/mean_terminated_length": 62.35,
609
+ "completions/min_length": 62.24,
610
+ "completions/min_terminated_length": 62.24,
611
+ "epoch": 0.0019537841246159036,
612
+ "frac_reward_zero_std": 0.64,
613
+ "grad_norm": 0.14012472331523895,
614
  "kl": 0.0,
615
+ "learning_rate": 1.2199995559601254e-07,
616
+ "loss": 0.0005487602949142456,
617
+ "num_tokens": 1038234.0,
618
+ "reward": 3.5762786865234374e-09,
619
+ "reward_std": 0.38485618591308596,
620
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.5762786865234374e-09,
621
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
622
  "step": 1100
623
  },
624
  {
625
  "clip_ratio/high_max": 0.0,
626
  "clip_ratio/high_mean": 0.0,
627
+ "clip_ratio/low_mean": 0.0,
628
+ "clip_ratio/low_min": 0.0,
629
+ "clip_ratio/region_mean": 0.0,
630
+ "completion_length": 67.58,
631
  "completions/clipped_ratio": 0.0,
632
+ "completions/max_length": 67.76,
633
+ "completions/max_terminated_length": 67.76,
634
+ "completions/mean_length": 67.58,
635
+ "completions/mean_terminated_length": 67.58,
636
+ "completions/min_length": 67.32,
637
+ "completions/min_terminated_length": 67.32,
638
+ "epoch": 0.0020425924939166265,
639
+ "frac_reward_zero_std": 0.48,
640
+ "grad_norm": 0.0,
641
  "kl": 0.0,
642
+ "learning_rate": 1.2755045403077197e-07,
643
+ "loss": -0.00019566014409065248,
644
+ "num_tokens": 1086214.0,
645
+ "reward": 2.682209014892578e-09,
646
+ "reward_std": 0.5559033727645875,
647
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.682209014892578e-09,
648
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5559033727645875,
649
  "step": 1150
650
  },
651
  {
652
+ "clip_ratio/high_max": 0.0,
653
+ "clip_ratio/high_mean": 0.0,
654
+ "clip_ratio/low_mean": 6.849315017461777e-05,
655
+ "clip_ratio/low_min": 6.849315017461777e-05,
656
+ "clip_ratio/region_mean": 6.849315017461777e-05,
657
+ "completion_length": 67.835,
658
  "completions/clipped_ratio": 0.0,
659
+ "completions/max_length": 68.12,
660
+ "completions/max_terminated_length": 68.12,
661
+ "completions/mean_length": 67.835,
662
+ "completions/mean_terminated_length": 67.835,
663
+ "completions/min_length": 67.64,
664
+ "completions/min_terminated_length": 67.64,
665
+ "epoch": 0.0021314008632173495,
666
+ "frac_reward_zero_std": 0.64,
667
+ "grad_norm": 0.0,
668
  "kl": 0.0,
669
+ "learning_rate": 1.331009524655314e-07,
670
+ "loss": 0.0007767707109451294,
671
+ "num_tokens": 1134493.0,
672
+ "reward": 1.1920928955078125e-09,
673
+ "reward_std": 0.3848561763763428,
674
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.1920928955078125e-09,
675
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
676
  "step": 1200
677
  },
678
  {
 
681
  "clip_ratio/low_mean": 0.0,
682
  "clip_ratio/low_min": 0.0,
683
  "clip_ratio/region_mean": 0.0,
684
+ "completion_length": 68.39,
685
  "completions/clipped_ratio": 0.0,
686
+ "completions/max_length": 68.68,
687
+ "completions/max_terminated_length": 68.68,
688
+ "completions/mean_length": 68.39,
689
+ "completions/mean_terminated_length": 68.39,
690
+ "completions/min_length": 68.2,
691
+ "completions/min_terminated_length": 68.2,
692
+ "epoch": 0.0022202092325180724,
693
+ "frac_reward_zero_std": 0.6,
694
+ "grad_norm": 0.18301734328269958,
695
  "kl": 0.0,
696
+ "learning_rate": 1.3865145090029086e-07,
697
+ "loss": -0.00031893253326416017,
698
+ "num_tokens": 1183027.0,
699
+ "reward": 2.384185791015625e-09,
700
+ "reward_std": 0.42761797904968263,
701
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.384185791015625e-09,
702
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
703
  "step": 1250
704
  },
705
  {
706
  "clip_ratio/high_max": 0.0,
707
  "clip_ratio/high_mean": 0.0,
708
+ "clip_ratio/low_mean": 0.0,
709
+ "clip_ratio/low_min": 0.0,
710
+ "clip_ratio/region_mean": 0.0,
711
+ "completion_length": 64.62,
712
  "completions/clipped_ratio": 0.0,
713
+ "completions/max_length": 64.72,
714
+ "completions/max_terminated_length": 64.72,
715
+ "completions/mean_length": 64.62,
716
+ "completions/mean_terminated_length": 64.62,
717
+ "completions/min_length": 64.48,
718
+ "completions/min_terminated_length": 64.48,
719
+ "epoch": 0.0023090176018187953,
720
+ "frac_reward_zero_std": 0.8,
721
+ "grad_norm": 0.0,
722
  "kl": 0.0,
723
+ "learning_rate": 1.442019493350503e-07,
724
+ "loss": -0.0005694808065891266,
725
+ "num_tokens": 1229847.0,
726
  "reward": 0.0,
727
+ "reward_std": 0.21380898952484131,
728
  "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
729
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.21380898952484131,
730
  "step": 1300
731
  },
732
  {
733
  "clip_ratio/high_max": 0.0,
734
  "clip_ratio/high_mean": 0.0,
735
+ "clip_ratio/low_mean": 0.00011627906933426857,
736
+ "clip_ratio/low_min": 0.00011627906933426857,
737
+ "clip_ratio/region_mean": 0.00011627906933426857,
738
+ "completion_length": 62.82,
739
  "completions/clipped_ratio": 0.0,
740
+ "completions/max_length": 63.04,
741
+ "completions/max_terminated_length": 63.04,
742
+ "completions/mean_length": 62.82,
743
+ "completions/mean_terminated_length": 62.82,
744
+ "completions/min_length": 62.6,
745
+ "completions/min_terminated_length": 62.6,
746
+ "epoch": 0.0023978259711195183,
747
+ "frac_reward_zero_std": 0.52,
748
+ "grad_norm": 0.0,
749
  "kl": 0.0,
750
+ "learning_rate": 1.4975244776980973e-07,
751
+ "loss": -0.0006587636470794677,
752
+ "num_tokens": 1274827.0,
753
+ "reward": -1.9371509552001954e-09,
754
+ "reward_std": 0.5131415700912476,
755
+ "rewards/TranscriptCorrectionGrpoReward/mean": -1.9371509552001954e-09,
756
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5131415796279907,
757
  "step": 1350
758
  },
759
  {
760
  "clip_ratio/high_max": 0.0,
761
  "clip_ratio/high_mean": 0.0,
762
+ "clip_ratio/low_mean": 0.0,
763
+ "clip_ratio/low_min": 0.0,
764
+ "clip_ratio/region_mean": 0.0,
765
+ "completion_length": 62.54,
766
  "completions/clipped_ratio": 0.0,
767
+ "completions/max_length": 62.68,
768
+ "completions/max_terminated_length": 62.68,
769
+ "completions/mean_length": 62.54,
770
+ "completions/mean_terminated_length": 62.54,
771
+ "completions/min_length": 62.36,
772
+ "completions/min_terminated_length": 62.36,
773
+ "epoch": 0.002486634340420241,
774
+ "frac_reward_zero_std": 0.56,
775
+ "grad_norm": 0.0,
776
  "kl": 0.0,
777
+ "learning_rate": 1.553029462045692e-07,
778
+ "loss": 0.00019890040159225463,
779
+ "num_tokens": 1320071.0,
780
+ "reward": 2.2351741790771484e-09,
781
+ "reward_std": 0.47037978172302247,
782
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.2351741790771484e-09,
783
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.47037978172302247,
784
  "step": 1400
785
  },
786
  {
787
  "clip_ratio/high_max": 0.0,
788
  "clip_ratio/high_mean": 0.0,
789
+ "clip_ratio/low_mean": 0.0,
790
+ "clip_ratio/low_min": 0.0,
791
+ "clip_ratio/region_mean": 0.0,
792
+ "completion_length": 63.6,
793
  "completions/clipped_ratio": 0.0,
794
+ "completions/max_length": 63.68,
795
+ "completions/max_terminated_length": 63.68,
796
+ "completions/mean_length": 63.6,
797
+ "completions/mean_terminated_length": 63.6,
798
+ "completions/min_length": 63.48,
799
+ "completions/min_terminated_length": 63.48,
800
+ "epoch": 0.002575442709720964,
801
+ "frac_reward_zero_std": 0.8,
802
  "grad_norm": 0.0,
803
  "kl": 0.0,
804
+ "learning_rate": 1.6085344463932862e-07,
805
+ "loss": 2.284705638885498e-05,
806
+ "num_tokens": 1366199.0,
807
+ "reward": 1.7881393432617187e-09,
808
+ "reward_std": 0.21380898952484131,
809
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.7881393432617187e-09,
810
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.21380898952484131,
811
  "step": 1450
812
  },
813
  {
814
  "clip_ratio/high_max": 0.0,
815
  "clip_ratio/high_mean": 0.0,
816
+ "clip_ratio/low_mean": 0.0,
817
+ "clip_ratio/low_min": 0.0,
818
+ "clip_ratio/region_mean": 0.0,
819
+ "completion_length": 68.165,
820
  "completions/clipped_ratio": 0.0,
821
+ "completions/max_length": 68.36,
822
+ "completions/max_terminated_length": 68.36,
823
+ "completions/mean_length": 68.165,
824
+ "completions/mean_terminated_length": 68.165,
825
+ "completions/min_length": 67.88,
826
+ "completions/min_terminated_length": 67.88,
827
+ "epoch": 0.002664251079021687,
828
+ "frac_reward_zero_std": 0.56,
829
+ "grad_norm": 0.32475942373275757,
830
  "kl": 0.0,
831
+ "learning_rate": 1.6640394307408808e-07,
832
+ "loss": -0.0001750713586807251,
833
+ "num_tokens": 1415240.0,
834
+ "reward": 8.940696716308593e-10,
835
+ "reward_std": 0.4703797721862793,
836
+ "rewards/TranscriptCorrectionGrpoReward/mean": 8.940696716308593e-10,
837
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
838
  "step": 1500
839
  },
840
  {
841
+ "eval_cer_subset": 0.01286498281954826,
842
+ "eval_cer_subset_edit_distance": 790,
843
  "eval_cer_subset_groups": 250,
844
  "eval_cer_subset_items": 250,
845
  "eval_cer_subset_ref_chars": 61407,
846
+ "eval_loss": 0.03962622743110548,
847
  "step": 1521
848
  }
849
  ],
850
  "logging_steps": 50,
851
+ "max_steps": 2252040,
852
+ "num_input_tokens_seen": 1438098,
853
  "num_train_epochs": 4,
854
  "save_steps": 507,
855
  "stateful_callbacks": {
 
865
  }
866
  },
867
  "total_flos": 0.0,
868
+ "train_batch_size": 1,
869
  "trial_name": null,
870
  "trial_params": null
871
  }
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efd03f5456a4c15d2256ba2960a071c6dc2cb8a2ff8cf43b4f1331b6c112c442
3
  size 6737
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdad4bf60f02650b7130e20d3de1e81654197d408ff77954683d133ac0bce86
3
  size 6737
meta-llama__llama-3.2-1b/grpo/run_manifest.json CHANGED
@@ -6,10 +6,10 @@
6
  "model_name": "meta-llama/Llama-3.2-1B",
7
  "status": "running",
8
  "best_metric_name": "eval_cer_subset",
9
- "best_metric_value": 0.012913837184685785,
10
- "best_step": 507,
11
- "best_checkpoint_step": 507,
12
- "latest_checkpoint_step": 1014,
13
  "checkpoints": [
14
  {
15
  "step": 507,
@@ -22,10 +22,16 @@
22
  "epoch": 0.0018010337294186605,
23
  "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1014",
24
  "timestamp": "2026-04-09T03:49:15.799552+00:00"
 
 
 
 
 
 
25
  }
26
  ],
27
  "epoch_artifacts": [],
28
  "resume_history": [],
29
  "created_at": "2026-04-09T03:35:11.674696+00:00",
30
- "updated_at": "2026-04-09T03:49:15.799567+00:00"
31
  }
 
6
  "model_name": "meta-llama/Llama-3.2-1B",
7
  "status": "running",
8
  "best_metric_name": "eval_cer_subset",
9
+ "best_metric_value": 0.01286498281954826,
10
+ "best_step": 1521,
11
+ "best_checkpoint_step": 1521,
12
+ "latest_checkpoint_step": 1521,
13
  "checkpoints": [
14
  {
15
  "step": 507,
 
22
  "epoch": 0.0018010337294186605,
23
  "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1014",
24
  "timestamp": "2026-04-09T03:49:15.799552+00:00"
25
+ },
26
+ {
27
+ "step": 1521,
28
+ "epoch": 0.0027015505941279908,
29
+ "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521",
30
+ "timestamp": "2026-04-09T03:55:44.298843+00:00"
31
  }
32
  ],
33
  "epoch_artifacts": [],
34
  "resume_history": [],
35
  "created_at": "2026-04-09T03:35:11.674696+00:00",
36
+ "updated_at": "2026-04-09T03:55:44.298859+00:00"
37
  }