brthor commited on
Commit
2f2239b
·
verified ·
1 Parent(s): 762df2e

Upload full trainer checkpoint step=507 (profile=llama3_2_1b_base_grpo, run_type=grpo)

Browse files
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/adapter_config.json CHANGED
@@ -33,13 +33,13 @@
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
 
 
 
36
  "down_proj",
37
  "o_proj",
38
- "q_proj",
39
  "gate_proj",
40
- "k_proj",
41
- "up_proj",
42
- "v_proj"
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
 
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "up_proj",
37
+ "k_proj",
38
+ "v_proj",
39
  "down_proj",
40
  "o_proj",
 
41
  "gate_proj",
42
+ "q_proj"
 
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3488c462f9a16c18f77b6c50b23298751f3403f3423e962979e7384a0b50041
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ffaf8361976d27fe67e70aff60a2b4f511b5d4dcf325ad2b661bf3dcf67788b
3
  size 45118424
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c45fddad6005039ed09a90bd9a0a4a1ce76c7945d53ffdefb831f35b22894d8
3
  size 23162187
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c015b9ccd39081dc46df8e1131e8b46f973b8ba5d7930d46df13b6f10ebb5f5
3
  size 23162187
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bba30b3c53bec237646fa7befb0d9754bb02d88a3b5c49b2dc26db53253a23c0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e14d5d69b57745b112c7829210514ea094e9af0aa8ea5e30f32362a3d8e797f
3
  size 14645
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48205f86f7675d7ee9e083ac48b20843b9d59809cc4035576cf13ff625869f20
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c99e06fc1c30a1b880d75b9b0017154f42da9a6b9f156c33d5915937be0b01
3
  size 1465
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0009005200636580653,
6
  "eval_steps": 500,
7
  "global_step": 507,
8
  "is_hyper_param_search": false,
@@ -15,25 +15,25 @@
15
  "clip_ratio/low_mean": 0.0,
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
- "completion_length": 65.67,
19
  "completions/clipped_ratio": 0.0,
20
- "completions/max_length": 93.75,
21
- "completions/max_terminated_length": 93.75,
22
- "completions/mean_length": 63.3203125,
23
- "completions/mean_terminated_length": 63.3203125,
24
- "completions/min_length": 38.25,
25
- "completions/min_terminated_length": 38.25,
26
- "epoch": 8.88086847789019e-05,
27
- "frac_reward_zero_std": 0.5625,
28
- "grad_norm": 0.007002627942711115,
29
  "kl": 0.0,
30
- "learning_rate": 2.7197442330321267e-08,
31
- "loss": -0.0006554330885410309,
32
- "num_tokens": 57618.0,
33
- "reward": -6.51925802230835e-09,
34
- "reward_std": 0.48731958121061325,
35
- "rewards/TranscriptCorrectionGrpoReward/mean": -8.381903171539307e-09,
36
- "rewards/TranscriptCorrectionGrpoReward/std": 1.007905274629593,
37
  "step": 50
38
  },
39
  {
@@ -42,52 +42,52 @@
42
  "clip_ratio/low_mean": 0.0,
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
- "completion_length": 52.34,
46
  "completions/clipped_ratio": 0.0,
47
- "completions/max_length": 84.33333333333333,
48
- "completions/max_terminated_length": 84.33333333333333,
49
- "completions/mean_length": 51.208333333333336,
50
- "completions/mean_terminated_length": 51.208333333333336,
51
- "completions/min_length": 26.0,
52
- "completions/min_terminated_length": 26.0,
53
- "epoch": 0.0001776173695578038,
54
- "frac_reward_zero_std": 0.6666666666666666,
55
  "grad_norm": 0.0,
56
  "kl": 0.0,
57
- "learning_rate": 5.494993450411847e-08,
58
- "loss": 0.0013675823807716369,
59
- "num_tokens": 94074.0,
60
- "reward": 1.4901161193847656e-08,
61
- "reward_std": 0.44680649042129517,
62
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.179675261179606e-08,
63
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
64
  "step": 100
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
68
  "clip_ratio/high_mean": 0.0,
69
- "clip_ratio/low_mean": 0.00017899675294756888,
70
- "clip_ratio/low_min": 0.00017899675294756888,
71
- "clip_ratio/region_mean": 0.00017899675294756888,
72
- "completion_length": 64.945,
73
  "completions/clipped_ratio": 0.0,
74
- "completions/max_length": 119.33333333333333,
75
- "completions/max_terminated_length": 119.33333333333333,
76
- "completions/mean_length": 66.65625,
77
- "completions/mean_terminated_length": 66.65625,
78
- "completions/min_length": 39.0,
79
- "completions/min_terminated_length": 39.0,
80
- "epoch": 0.0002664260543367057,
81
- "frac_reward_zero_std": 0.7083333333333334,
82
- "grad_norm": 0.015731461346149445,
83
  "kl": 0.0,
84
- "learning_rate": 8.270242667791569e-08,
85
- "loss": 0.0017325745522975922,
86
- "num_tokens": 139152.0,
87
- "reward": 2.483526865641276e-09,
88
- "reward_std": 0.2613494098186493,
89
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
90
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
91
  "step": 150
92
  },
93
  {
@@ -96,52 +96,52 @@
96
  "clip_ratio/low_mean": 0.0,
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
- "completion_length": 60.15,
100
  "completions/clipped_ratio": 0.0,
101
- "completions/max_length": 99.0,
102
- "completions/max_terminated_length": 99.0,
103
- "completions/mean_length": 63.015625,
104
- "completions/mean_terminated_length": 63.015625,
105
- "completions/min_length": 32.666666666666664,
106
- "completions/min_terminated_length": 32.666666666666664,
107
- "epoch": 0.0003552347391156076,
108
- "frac_reward_zero_std": 0.4583333333333333,
109
- "grad_norm": 0.039579447358846664,
110
  "kl": 0.0,
111
- "learning_rate": 1.1045491885171288e-07,
112
- "loss": -0.0044269835948944096,
113
- "num_tokens": 182803.0,
114
- "reward": -4.967053731282552e-09,
115
- "reward_std": 0.5628267228603363,
116
- "rewards/TranscriptCorrectionGrpoReward/mean": -2.483526865641276e-09,
117
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052845637004,
118
  "step": 200
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
122
  "clip_ratio/high_mean": 0.0,
123
- "clip_ratio/low_mean": 0.00016559829469770193,
124
- "clip_ratio/low_min": 0.00016559829469770193,
125
- "clip_ratio/region_mean": 0.00016559829469770193,
126
- "completion_length": 64.905,
127
  "completions/clipped_ratio": 0.0,
128
- "completions/max_length": 98.66666666666667,
129
- "completions/max_terminated_length": 98.66666666666667,
130
- "completions/mean_length": 64.203125,
131
- "completions/mean_terminated_length": 64.203125,
132
- "completions/min_length": 35.666666666666664,
133
- "completions/min_terminated_length": 35.666666666666664,
134
- "epoch": 0.0004440434238945095,
135
- "frac_reward_zero_std": 0.4166666666666667,
136
- "grad_norm": 0.011699045076966286,
137
  "kl": 0.0,
138
- "learning_rate": 1.382074110255101e-07,
139
- "loss": 0.00223430335521698,
140
- "num_tokens": 227314.0,
141
- "reward": 2.2351741790771484e-08,
142
- "reward_std": 0.4298504690329234,
143
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.241763432820638e-08,
144
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
145
  "step": 250
146
  },
147
  {
@@ -150,106 +150,106 @@
150
  "clip_ratio/low_mean": 0.0,
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
- "completion_length": 70.07,
154
  "completions/clipped_ratio": 0.0,
155
- "completions/max_length": 108.0,
156
- "completions/max_terminated_length": 108.0,
157
- "completions/mean_length": 70.63541666666667,
158
- "completions/mean_terminated_length": 70.63541666666667,
159
- "completions/min_length": 34.0,
160
- "completions/min_terminated_length": 34.0,
161
- "epoch": 0.0005328521086734114,
162
- "frac_reward_zero_std": 0.5833333333333334,
163
- "grad_norm": 0.0456908717751503,
164
  "kl": 0.0,
165
- "learning_rate": 1.659599031993073e-07,
166
- "loss": -0.0011175717413425446,
167
- "num_tokens": 275956.0,
168
- "reward": 5.587935447692871e-09,
169
- "reward_std": 0.4012269576390584,
170
- "rewards/TranscriptCorrectionGrpoReward/mean": 3.104408582051595e-09,
171
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
172
  "step": 300
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
176
  "clip_ratio/high_mean": 0.0,
177
- "clip_ratio/low_mean": 5.8139534667134284e-05,
178
- "clip_ratio/low_min": 5.8139534667134284e-05,
179
- "clip_ratio/region_mean": 5.8139534667134284e-05,
180
- "completion_length": 62.93,
181
  "completions/clipped_ratio": 0.0,
182
- "completions/max_length": 95.66666666666667,
183
- "completions/max_terminated_length": 95.66666666666667,
184
- "completions/mean_length": 61.6875,
185
- "completions/mean_terminated_length": 61.6875,
186
- "completions/min_length": 29.333333333333332,
187
- "completions/min_terminated_length": 29.333333333333332,
188
- "epoch": 0.0006216607934523133,
189
- "frac_reward_zero_std": 0.75,
190
- "grad_norm": 0.0190290417522192,
191
  "kl": 0.0,
192
- "learning_rate": 1.9371239537310452e-07,
193
- "loss": -0.0010719958692789078,
194
- "num_tokens": 319328.0,
195
- "reward": -2.483526865641276e-09,
196
- "reward_std": 0.34829530119895935,
197
- "rewards/TranscriptCorrectionGrpoReward/mean": -4.967053731282552e-09,
198
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
199
  "step": 350
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
203
  "clip_ratio/high_mean": 0.0,
204
- "clip_ratio/low_mean": 7.352941203862428e-05,
205
- "clip_ratio/low_min": 7.352941203862428e-05,
206
- "clip_ratio/region_mean": 7.352941203862428e-05,
207
- "completion_length": 62.725,
208
  "completions/clipped_ratio": 0.0,
209
- "completions/max_length": 105.66666666666667,
210
- "completions/max_terminated_length": 105.66666666666667,
211
- "completions/mean_length": 62.890625,
212
- "completions/mean_terminated_length": 62.890625,
213
- "completions/min_length": 34.666666666666664,
214
- "completions/min_terminated_length": 34.666666666666664,
215
- "epoch": 0.0007104694782312152,
216
- "frac_reward_zero_std": 0.5,
217
- "grad_norm": 0.02625960297882557,
218
  "kl": 0.0,
219
- "learning_rate": 2.2146488754690174e-07,
220
- "loss": 0.0013731226325035095,
221
- "num_tokens": 362563.0,
222
- "reward": 4.967053731282552e-09,
223
- "reward_std": 0.44316299756368,
224
- "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
225
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
226
  "step": 400
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
230
  "clip_ratio/high_mean": 0.0,
231
- "clip_ratio/low_mean": 8.695651777088643e-05,
232
- "clip_ratio/low_min": 8.695651777088643e-05,
233
- "clip_ratio/region_mean": 8.695651777088643e-05,
234
- "completion_length": 66.03,
235
  "completions/clipped_ratio": 0.0,
236
- "completions/max_length": 112.0,
237
- "completions/max_terminated_length": 112.0,
238
- "completions/mean_length": 70.2890625,
239
- "completions/mean_terminated_length": 70.2890625,
240
- "completions/min_length": 42.75,
241
- "completions/min_terminated_length": 42.75,
242
- "epoch": 0.0007992781630101171,
243
- "frac_reward_zero_std": 0.5625,
244
- "grad_norm": 0.0035960455425083637,
245
  "kl": 0.0,
246
- "learning_rate": 2.492173797206989e-07,
247
- "loss": -0.0004108186066150665,
248
- "num_tokens": 427021.0,
249
- "reward": 1.30385160446167e-08,
250
- "reward_std": 0.383321788161993,
251
- "rewards/TranscriptCorrectionGrpoReward/mean": 1.4901161193847656e-08,
252
- "rewards/TranscriptCorrectionGrpoReward/std": 1.0079052448272705,
253
  "step": 450
254
  },
255
  {
@@ -258,40 +258,40 @@
258
  "clip_ratio/low_mean": 0.0,
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
- "completion_length": 63.98,
262
  "completions/clipped_ratio": 0.0,
263
- "completions/max_length": 85.33333333333333,
264
- "completions/max_terminated_length": 85.33333333333333,
265
- "completions/mean_length": 55.046875,
266
- "completions/mean_terminated_length": 55.046875,
267
- "completions/min_length": 29.666666666666668,
268
- "completions/min_terminated_length": 29.666666666666668,
269
- "epoch": 0.000888086847789019,
270
- "frac_reward_zero_std": 0.7916666666666666,
271
- "grad_norm": 0.0006704159895889461,
272
  "kl": 0.0,
273
- "learning_rate": 2.7696987189449615e-07,
274
- "loss": 0.0006217561289668084,
275
- "num_tokens": 464766.0,
276
- "reward": -4.967053731282552e-09,
277
- "reward_std": 0.1608196645975113,
278
- "rewards/TranscriptCorrectionGrpoReward/mean": 4.967053731282552e-09,
279
- "rewards/TranscriptCorrectionGrpoReward/std": 0.6719369093577067,
280
  "step": 500
281
  },
282
  {
283
- "eval_cer_subset": 0.013239532952269286,
284
- "eval_cer_subset_edit_distance": 813,
285
  "eval_cer_subset_groups": 250,
286
  "eval_cer_subset_items": 250,
287
  "eval_cer_subset_ref_chars": 61407,
288
- "eval_loss": 0.03968957871396896,
289
  "step": 507
290
  }
291
  ],
292
  "logging_steps": 50,
293
- "max_steps": 2252032,
294
- "num_input_tokens_seen": 464766,
295
  "num_train_epochs": 4,
296
  "save_steps": 507,
297
  "stateful_callbacks": {
@@ -307,7 +307,7 @@
307
  }
308
  },
309
  "total_flos": 0.0,
310
- "train_batch_size": 2,
311
  "trial_name": null,
312
  "trial_params": null
313
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0009005168647093303,
6
  "eval_steps": 500,
7
  "global_step": 507,
8
  "is_hyper_param_search": false,
 
15
  "clip_ratio/low_mean": 0.0,
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
+ "completion_length": 65.68,
19
  "completions/clipped_ratio": 0.0,
20
+ "completions/max_length": 65.88,
21
+ "completions/max_terminated_length": 65.88,
22
+ "completions/mean_length": 65.68,
23
+ "completions/mean_terminated_length": 65.68,
24
+ "completions/min_length": 65.48,
25
+ "completions/min_terminated_length": 65.48,
26
+ "epoch": 8.88083693007229e-05,
27
+ "frac_reward_zero_std": 0.56,
28
+ "grad_norm": 0.5719226598739624,
29
  "kl": 0.0,
30
+ "learning_rate": 5.439488466064254e-09,
31
+ "loss": -0.000577671229839325,
32
+ "num_tokens": 46552.0,
33
+ "reward": 2.9802322387695314e-09,
34
+ "reward_std": 0.47037978172302247,
35
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.9802322387695314e-09,
36
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
37
  "step": 50
38
  },
39
  {
 
42
  "clip_ratio/low_mean": 0.0,
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
+ "completion_length": 53.46,
46
  "completions/clipped_ratio": 0.0,
47
+ "completions/max_length": 53.6,
48
+ "completions/max_terminated_length": 53.6,
49
+ "completions/mean_length": 53.46,
50
+ "completions/mean_terminated_length": 53.46,
51
+ "completions/min_length": 53.24,
52
+ "completions/min_terminated_length": 53.24,
53
+ "epoch": 0.0001776167386014458,
54
+ "frac_reward_zero_std": 0.64,
55
  "grad_norm": 0.0,
56
  "kl": 0.0,
57
+ "learning_rate": 1.0989986900823695e-08,
58
+ "loss": -0.0009160846471786499,
59
+ "num_tokens": 85556.0,
60
+ "reward": 3.874301910400391e-09,
61
+ "reward_std": 0.38485618114471437,
62
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.874301910400391e-09,
63
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.38485618114471437,
64
  "step": 100
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
68
  "clip_ratio/high_mean": 0.0,
69
+ "clip_ratio/low_mean": 0.00011904762126505375,
70
+ "clip_ratio/low_min": 0.00011904762126505375,
71
+ "clip_ratio/region_mean": 0.00011904762126505375,
72
+ "completion_length": 62.48,
73
  "completions/clipped_ratio": 0.0,
74
+ "completions/max_length": 62.6,
75
+ "completions/max_terminated_length": 62.6,
76
+ "completions/mean_length": 62.48,
77
+ "completions/mean_terminated_length": 62.48,
78
+ "completions/min_length": 62.24,
79
+ "completions/min_terminated_length": 62.24,
80
+ "epoch": 0.0002664251079021687,
81
+ "frac_reward_zero_std": 0.56,
82
+ "grad_norm": 0.2554231286048889,
83
  "kl": 0.0,
84
+ "learning_rate": 1.6540485335583138e-08,
85
+ "loss": -0.0009111672639846802,
86
+ "num_tokens": 130380.0,
87
+ "reward": -1.1920928955078125e-09,
88
+ "reward_std": 0.4703797769546509,
89
+ "rewards/TranscriptCorrectionGrpoReward/mean": -1.1920928955078125e-09,
90
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.47037978172302247,
91
  "step": 150
92
  },
93
  {
 
96
  "clip_ratio/low_mean": 0.0,
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
+ "completion_length": 62.175,
100
  "completions/clipped_ratio": 0.0,
101
+ "completions/max_length": 62.36,
102
+ "completions/max_terminated_length": 62.36,
103
+ "completions/mean_length": 62.175,
104
+ "completions/mean_terminated_length": 62.175,
105
+ "completions/min_length": 61.88,
106
+ "completions/min_terminated_length": 61.88,
107
+ "epoch": 0.0003552334772028916,
108
+ "frac_reward_zero_std": 0.56,
109
+ "grad_norm": 0.627755343914032,
110
  "kl": 0.0,
111
+ "learning_rate": 2.2090983770342578e-08,
112
+ "loss": -0.0005262196063995361,
113
+ "num_tokens": 174959.0,
114
+ "reward": 1.1920928955078125e-09,
115
+ "reward_std": 0.47037978172302247,
116
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.1920928955078125e-09,
117
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
118
  "step": 200
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
122
  "clip_ratio/high_mean": 0.0,
123
+ "clip_ratio/low_mean": 0.00016129031777381897,
124
+ "clip_ratio/low_min": 0.00016129031777381897,
125
+ "clip_ratio/region_mean": 0.00016129031777381897,
126
+ "completion_length": 65.835,
127
  "completions/clipped_ratio": 0.0,
128
+ "completions/max_length": 66.08,
129
+ "completions/max_terminated_length": 66.08,
130
+ "completions/mean_length": 65.835,
131
+ "completions/mean_terminated_length": 65.835,
132
+ "completions/min_length": 65.6,
133
+ "completions/min_terminated_length": 65.6,
134
+ "epoch": 0.0004440418465036145,
135
+ "frac_reward_zero_std": 0.52,
136
+ "grad_norm": 0.0,
137
  "kl": 0.0,
138
+ "learning_rate": 2.764148220510202e-08,
139
+ "loss": -0.00036669328808784483,
140
+ "num_tokens": 222270.0,
141
+ "reward": 3.874301910400391e-09,
142
+ "reward_std": 0.5131415796279907,
143
+ "rewards/TranscriptCorrectionGrpoReward/mean": 3.874301910400391e-09,
144
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.5131415748596191,
145
  "step": 250
146
  },
147
  {
 
150
  "clip_ratio/low_mean": 0.0,
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
+ "completion_length": 68.75,
154
  "completions/clipped_ratio": 0.0,
155
+ "completions/max_length": 69.04,
156
+ "completions/max_terminated_length": 69.04,
157
+ "completions/mean_length": 68.75,
158
+ "completions/mean_terminated_length": 68.75,
159
+ "completions/min_length": 68.56,
160
+ "completions/min_terminated_length": 68.56,
161
+ "epoch": 0.0005328502158043374,
162
+ "frac_reward_zero_std": 0.6,
163
+ "grad_norm": 0.0,
164
  "kl": 0.0,
165
+ "learning_rate": 3.3191980639861464e-08,
166
+ "loss": 0.0001394149661064148,
167
+ "num_tokens": 271740.0,
168
+ "reward": 6.258487701416015e-09,
169
+ "reward_std": 0.4276179838180542,
170
+ "rewards/TranscriptCorrectionGrpoReward/mean": 6.258487701416015e-09,
171
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.42761797904968263,
172
  "step": 300
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
176
  "clip_ratio/high_mean": 0.0,
177
+ "clip_ratio/low_mean": 0.0,
178
+ "clip_ratio/low_min": 0.0,
179
+ "clip_ratio/region_mean": 0.0,
180
+ "completion_length": 61.07,
181
  "completions/clipped_ratio": 0.0,
182
+ "completions/max_length": 61.2,
183
+ "completions/max_terminated_length": 61.2,
184
+ "completions/mean_length": 61.07,
185
+ "completions/mean_terminated_length": 61.07,
186
+ "completions/min_length": 60.96,
187
+ "completions/min_terminated_length": 60.96,
188
+ "epoch": 0.0006216585851050603,
189
+ "frac_reward_zero_std": 0.72,
190
+ "grad_norm": 0.0,
191
  "kl": 0.0,
192
+ "learning_rate": 3.8742479074620904e-08,
193
+ "loss": -5.508854985237121e-05,
194
+ "num_tokens": 316354.0,
195
+ "reward": 2.384185791015625e-09,
196
+ "reward_std": 0.29933258533477786,
197
+ "rewards/TranscriptCorrectionGrpoReward/mean": 2.384185791015625e-09,
198
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.2993325901031494,
199
  "step": 350
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
203
  "clip_ratio/high_mean": 0.0,
204
+ "clip_ratio/low_mean": 0.0,
205
+ "clip_ratio/low_min": 0.0,
206
+ "clip_ratio/region_mean": 0.0,
207
+ "completion_length": 64.22,
208
  "completions/clipped_ratio": 0.0,
209
+ "completions/max_length": 64.6,
210
+ "completions/max_terminated_length": 64.6,
211
+ "completions/mean_length": 64.22,
212
+ "completions/mean_terminated_length": 64.22,
213
+ "completions/min_length": 63.92,
214
+ "completions/min_terminated_length": 63.92,
215
+ "epoch": 0.0007104669544057832,
216
+ "frac_reward_zero_std": 0.56,
217
+ "grad_norm": 0.0,
218
  "kl": 0.0,
219
+ "learning_rate": 4.429297750938035e-08,
220
+ "loss": -0.0010024748742580413,
221
+ "num_tokens": 362550.0,
222
+ "reward": 5.960464477539063e-10,
223
+ "reward_std": 0.4703797769546509,
224
+ "rewards/TranscriptCorrectionGrpoReward/mean": 5.960464477539063e-10,
225
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4703797769546509,
226
  "step": 400
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
230
  "clip_ratio/high_mean": 0.0,
231
+ "clip_ratio/low_mean": 0.0,
232
+ "clip_ratio/low_min": 0.0,
233
+ "clip_ratio/region_mean": 0.0,
234
+ "completion_length": 65.25,
235
  "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 65.36,
237
+ "completions/max_terminated_length": 65.36,
238
+ "completions/mean_length": 65.25,
239
+ "completions/mean_terminated_length": 65.25,
240
+ "completions/min_length": 65.04,
241
+ "completions/min_terminated_length": 65.04,
242
+ "epoch": 0.000799275323706506,
243
+ "frac_reward_zero_std": 0.6,
244
+ "grad_norm": 0.0,
245
  "kl": 0.0,
246
+ "learning_rate": 4.984347594413978e-08,
247
+ "loss": -0.0002909022569656372,
248
+ "num_tokens": 409416.0,
249
+ "reward": 1.7881393432617187e-09,
250
+ "reward_std": 0.4276179838180542,
251
+ "rewards/TranscriptCorrectionGrpoReward/mean": 1.7881393432617187e-09,
252
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.4276179838180542,
253
  "step": 450
254
  },
255
  {
 
258
  "clip_ratio/low_mean": 0.0,
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
+ "completion_length": 65.76,
262
  "completions/clipped_ratio": 0.0,
263
+ "completions/max_length": 65.92,
264
+ "completions/max_terminated_length": 65.92,
265
+ "completions/mean_length": 65.76,
266
+ "completions/mean_terminated_length": 65.76,
267
+ "completions/min_length": 65.68,
268
+ "completions/min_terminated_length": 65.68,
269
+ "epoch": 0.000888083693007229,
270
+ "frac_reward_zero_std": 0.84,
271
+ "grad_norm": 0.035873379558324814,
272
  "kl": 0.0,
273
+ "learning_rate": 5.5393974378899236e-08,
274
+ "loss": -0.000893859937787056,
275
+ "num_tokens": 456024.0,
276
+ "reward": 0.0,
277
+ "reward_std": 0.17104718685150147,
278
+ "rewards/TranscriptCorrectionGrpoReward/mean": 0.0,
279
+ "rewards/TranscriptCorrectionGrpoReward/std": 0.17104719161987306,
280
  "step": 500
281
  },
282
  {
283
+ "eval_cer_subset": 0.012913837184685785,
284
+ "eval_cer_subset_edit_distance": 793,
285
  "eval_cer_subset_groups": 250,
286
  "eval_cer_subset_items": 250,
287
  "eval_cer_subset_ref_chars": 61407,
288
+ "eval_loss": 0.03959455178967374,
289
  "step": 507
290
  }
291
  ],
292
  "logging_steps": 50,
293
+ "max_steps": 2252040,
294
+ "num_input_tokens_seen": 462420,
295
  "num_train_epochs": 4,
296
  "save_steps": 507,
297
  "stateful_callbacks": {
 
307
  }
308
  },
309
  "total_flos": 0.0,
310
+ "train_batch_size": 1,
311
  "trial_name": null,
312
  "trial_params": null
313
  }
meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efd03f5456a4c15d2256ba2960a071c6dc2cb8a2ff8cf43b4f1331b6c112c442
3
  size 6737
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdad4bf60f02650b7130e20d3de1e81654197d408ff77954683d133ac0bce86
3
  size 6737
meta-llama__llama-3.2-1b/grpo/run_manifest.json CHANGED
@@ -1,187 +1,25 @@
1
  {
2
  "schema_version": 1,
3
- "run_id": "3c42986d-e4d4-455e-95c4-4a4d5ecb544c",
4
  "run_type": "grpo",
5
  "profile_name": "llama3_2_1b_base_grpo",
6
  "model_name": "meta-llama/Llama-3.2-1B",
7
  "status": "running",
8
  "best_metric_name": "eval_cer_subset",
9
- "best_metric_value": 0.01195303467031446,
10
- "best_step": 3042,
11
- "best_checkpoint_step": 3042,
12
- "latest_checkpoint_step": 14196,
13
  "checkpoints": [
14
  {
15
  "step": 507,
16
- "epoch": 0.0009005200636580653,
17
  "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507",
18
- "timestamp": "2026-04-09T01:05:45.596061+00:00"
19
- },
20
- {
21
- "step": 1014,
22
- "epoch": 0.0018010401273161306,
23
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1014",
24
- "timestamp": "2026-04-09T01:10:57.965692+00:00"
25
- },
26
- {
27
- "step": 1521,
28
- "epoch": 0.0027015601909741956,
29
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-1521",
30
- "timestamp": "2026-04-09T01:17:04.646992+00:00"
31
- },
32
- {
33
- "step": 2028,
34
- "epoch": 0.003602080254632261,
35
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-2028",
36
- "timestamp": "2026-04-09T01:23:03.985957+00:00"
37
- },
38
- {
39
- "step": 2535,
40
- "epoch": 0.004502600318290326,
41
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-2535",
42
- "timestamp": "2026-04-09T01:29:18.820055+00:00"
43
- },
44
- {
45
- "step": 3042,
46
- "epoch": 0.005403120381948391,
47
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-3042",
48
- "timestamp": "2026-04-09T01:34:32.562568+00:00"
49
- },
50
- {
51
- "step": 3549,
52
- "epoch": 0.006303640445606456,
53
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-3549",
54
- "timestamp": "2026-04-09T01:39:39.758317+00:00"
55
- },
56
- {
57
- "step": 4056,
58
- "epoch": 0.007204160509264522,
59
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-4056",
60
- "timestamp": "2026-04-09T01:44:53.215161+00:00"
61
- },
62
- {
63
- "step": 4563,
64
- "epoch": 0.008104680572922587,
65
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-4563",
66
- "timestamp": "2026-04-09T01:50:12.247540+00:00"
67
- },
68
- {
69
- "step": 5070,
70
- "epoch": 0.009005200636580652,
71
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-5070",
72
- "timestamp": "2026-04-09T01:55:16.037586+00:00"
73
- },
74
- {
75
- "step": 5577,
76
- "epoch": 0.009905720700238718,
77
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-5577",
78
- "timestamp": "2026-04-09T02:00:26.191388+00:00"
79
- },
80
- {
81
- "step": 6084,
82
- "epoch": 0.010806240763896783,
83
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-6084",
84
- "timestamp": "2026-04-09T02:05:34.159056+00:00"
85
- },
86
- {
87
- "step": 6591,
88
- "epoch": 0.011706760827554848,
89
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-6591",
90
- "timestamp": "2026-04-09T02:10:28.495758+00:00"
91
- },
92
- {
93
- "step": 7098,
94
- "epoch": 0.012607280891212913,
95
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-7098",
96
- "timestamp": "2026-04-09T02:15:29.948111+00:00"
97
- },
98
- {
99
- "step": 7605,
100
- "epoch": 0.01350780095487098,
101
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-7605",
102
- "timestamp": "2026-04-09T02:20:27.241605+00:00"
103
- },
104
- {
105
- "step": 8112,
106
- "epoch": 0.014408321018529045,
107
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-8112",
108
- "timestamp": "2026-04-09T02:26:44.879194+00:00"
109
- },
110
- {
111
- "step": 8619,
112
- "epoch": 0.01530884108218711,
113
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-8619",
114
- "timestamp": "2026-04-09T02:32:09.353801+00:00"
115
- },
116
- {
117
- "step": 9126,
118
- "epoch": 0.016209361145845175,
119
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-9126",
120
- "timestamp": "2026-04-09T02:37:18.094294+00:00"
121
- },
122
- {
123
- "step": 9633,
124
- "epoch": 0.017109881209503238,
125
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-9633",
126
- "timestamp": "2026-04-09T02:42:25.354730+00:00"
127
- },
128
- {
129
- "step": 10140,
130
- "epoch": 0.018010401273161305,
131
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-10140",
132
- "timestamp": "2026-04-09T02:47:29.896532+00:00"
133
- },
134
- {
135
- "step": 10647,
136
- "epoch": 0.01891092133681937,
137
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-10647",
138
- "timestamp": "2026-04-09T02:52:33.759945+00:00"
139
- },
140
- {
141
- "step": 11154,
142
- "epoch": 0.019811441400477435,
143
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-11154",
144
- "timestamp": "2026-04-09T02:57:47.308962+00:00"
145
- },
146
- {
147
- "step": 11661,
148
- "epoch": 0.020711961464135502,
149
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-11661",
150
- "timestamp": "2026-04-09T03:03:03.600465+00:00"
151
- },
152
- {
153
- "step": 12168,
154
- "epoch": 0.021612481527793565,
155
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-12168",
156
- "timestamp": "2026-04-09T03:08:25.241749+00:00"
157
- },
158
- {
159
- "step": 12675,
160
- "epoch": 0.022513001591451632,
161
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-12675",
162
- "timestamp": "2026-04-09T03:13:36.439541+00:00"
163
- },
164
- {
165
- "step": 13182,
166
- "epoch": 0.023413521655109695,
167
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-13182",
168
- "timestamp": "2026-04-09T03:18:46.544532+00:00"
169
- },
170
- {
171
- "step": 13689,
172
- "epoch": 0.024314041718767762,
173
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-13689",
174
- "timestamp": "2026-04-09T03:24:16.394544+00:00"
175
- },
176
- {
177
- "step": 14196,
178
- "epoch": 0.025214561782425825,
179
- "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-14196",
180
- "timestamp": "2026-04-09T03:29:35.818009+00:00"
181
  }
182
  ],
183
  "epoch_artifacts": [],
184
  "resume_history": [],
185
- "created_at": "2026-04-09T00:54:21.973160+00:00",
186
- "updated_at": "2026-04-09T03:29:35.818024+00:00"
187
  }
 
1
  {
2
  "schema_version": 1,
3
+ "run_id": "09bed511-51e5-467b-b9b7-356e13c38e62",
4
  "run_type": "grpo",
5
  "profile_name": "llama3_2_1b_base_grpo",
6
  "model_name": "meta-llama/Llama-3.2-1B",
7
  "status": "running",
8
  "best_metric_name": "eval_cer_subset",
9
+ "best_metric_value": 0.012913837184685785,
10
+ "best_step": 507,
11
+ "best_checkpoint_step": 507,
12
+ "latest_checkpoint_step": 507,
13
  "checkpoints": [
14
  {
15
  "step": 507,
16
+ "epoch": 0.0009005168647093303,
17
  "repo_path": "meta-llama__llama-3.2-1b/grpo/checkpoints/checkpoint-507",
18
+ "timestamp": "2026-04-09T03:42:39.674363+00:00"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "epoch_artifacts": [],
22
  "resume_history": [],
23
+ "created_at": "2026-04-09T03:35:11.674696+00:00",
24
+ "updated_at": "2026-04-09T03:42:39.674379+00:00"
25
  }