FormlessAI commited on
Commit
e4d44a2
·
verified ·
1 Parent(s): 9bfdb7e

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85802343458730f4e02e6074fcd5b9f45d40f89f9783df5919ba436df9b54d9
3
  size 3422229144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56b5df78edf5b3fb567bbf51cfe04d6b9650cbc6910e4b129e3fd4ded457bfc
3
  size 3422229144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12e7e2c13bac2182a08f20e726e2f466f1d33a94fb79c50135c8b21f5d62c28b
3
  size 1738272709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd83f0b7b55b377877d04abc7e1db1bc6ad975a510498e854f134608cae3e1e1
3
  size 1738272709
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:798d7db74a31e880da9afa707fdd136569473cae8b0e37ecd75546b796dce264
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebcc2399037993f023f137c65edec3c249a67d9c697cb3b86a0c75a31475d419
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.5840160250663757,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4889975550122249,
6
  "eval_steps": 50,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -174,6 +174,172 @@
174
  "eval_samples_per_second": 11.253,
175
  "eval_steps_per_second": 0.719,
176
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  }
178
  ],
179
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.3611069321632385,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9779951100244498,
6
  "eval_steps": 50,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
174
  "eval_samples_per_second": 11.253,
175
  "eval_steps_per_second": 0.719,
176
  "step": 50
177
+ },
178
+ {
179
+ "epoch": 0.5378973105134475,
180
+ "grad_norm": 0.0003851531946565956,
181
+ "learning_rate": 8.442723263480497e-06,
182
+ "logits/chosen": NaN,
183
+ "logits/rejected": 0.974880039691925,
184
+ "logps/chosen": -64.03724670410156,
185
+ "logps/rejected": -108.10626220703125,
186
+ "loss": 0.3436,
187
+ "rewards/accuracies": 0.8125,
188
+ "rewards/chosen": 68.7229995727539,
189
+ "rewards/margins": 85.6202621459961,
190
+ "rewards/rejected": -16.897260665893555,
191
+ "step": 55
192
+ },
193
+ {
194
+ "epoch": 0.58679706601467,
195
+ "grad_norm": 0.0005342594813555479,
196
+ "learning_rate": 9.22445689898795e-06,
197
+ "logits/chosen": NaN,
198
+ "logits/rejected": 0.6203755140304565,
199
+ "logps/chosen": -81.26509857177734,
200
+ "logps/rejected": -104.5843276977539,
201
+ "loss": 0.3086,
202
+ "rewards/accuracies": 0.8843749761581421,
203
+ "rewards/chosen": 69.86774444580078,
204
+ "rewards/margins": 82.07366180419922,
205
+ "rewards/rejected": -12.205923080444336,
206
+ "step": 60
207
+ },
208
+ {
209
+ "epoch": 0.6356968215158925,
210
+ "grad_norm": 4.174908845282904e-13,
211
+ "learning_rate": 1.0006190534495405e-05,
212
+ "logits/chosen": NaN,
213
+ "logits/rejected": 0.4278396666049957,
214
+ "logps/chosen": -39.61051940917969,
215
+ "logps/rejected": -92.36933898925781,
216
+ "loss": 0.2063,
217
+ "rewards/accuracies": 0.745312511920929,
218
+ "rewards/chosen": 72.73802947998047,
219
+ "rewards/margins": 85.91956329345703,
220
+ "rewards/rejected": -13.18153190612793,
221
+ "step": 65
222
+ },
223
+ {
224
+ "epoch": 0.684596577017115,
225
+ "grad_norm": 0.002292018150910735,
226
+ "learning_rate": 1.0787924170002858e-05,
227
+ "logits/chosen": NaN,
228
+ "logits/rejected": 0.28609800338745117,
229
+ "logps/chosen": -33.6094856262207,
230
+ "logps/rejected": -84.01991271972656,
231
+ "loss": 0.1113,
232
+ "rewards/accuracies": 0.859375,
233
+ "rewards/chosen": 91.45848083496094,
234
+ "rewards/margins": 93.42339324951172,
235
+ "rewards/rejected": -1.96491277217865,
236
+ "step": 70
237
+ },
238
+ {
239
+ "epoch": 0.7334963325183375,
240
+ "grad_norm": 0.6431168913841248,
241
+ "learning_rate": 1.156965780551031e-05,
242
+ "logits/chosen": NaN,
243
+ "logits/rejected": 0.2203584611415863,
244
+ "logps/chosen": -40.122108459472656,
245
+ "logps/rejected": -93.89823150634766,
246
+ "loss": 0.1585,
247
+ "rewards/accuracies": 0.7718750238418579,
248
+ "rewards/chosen": 74.9581069946289,
249
+ "rewards/margins": 88.4476318359375,
250
+ "rewards/rejected": -13.489527702331543,
251
+ "step": 75
252
+ },
253
+ {
254
+ "epoch": 0.78239608801956,
255
+ "grad_norm": 18.794679641723633,
256
+ "learning_rate": 1.2351391441017764e-05,
257
+ "logits/chosen": NaN,
258
+ "logits/rejected": 0.35011741518974304,
259
+ "logps/chosen": -43.437705993652344,
260
+ "logps/rejected": -116.1953353881836,
261
+ "loss": 0.1879,
262
+ "rewards/accuracies": 0.78125,
263
+ "rewards/chosen": 74.13380432128906,
264
+ "rewards/margins": 96.4022445678711,
265
+ "rewards/rejected": -22.268436431884766,
266
+ "step": 80
267
+ },
268
+ {
269
+ "epoch": 0.8312958435207825,
270
+ "grad_norm": 53.7838020324707,
271
+ "learning_rate": 1.3133125076525218e-05,
272
+ "logits/chosen": NaN,
273
+ "logits/rejected": 0.5478571653366089,
274
+ "logps/chosen": -37.990631103515625,
275
+ "logps/rejected": -131.7003173828125,
276
+ "loss": 0.2416,
277
+ "rewards/accuracies": 0.815625011920929,
278
+ "rewards/chosen": 83.08360290527344,
279
+ "rewards/margins": 111.3319320678711,
280
+ "rewards/rejected": -28.248327255249023,
281
+ "step": 85
282
+ },
283
+ {
284
+ "epoch": 0.8801955990220048,
285
+ "grad_norm": 33.947601318359375,
286
+ "learning_rate": 1.3914858712032673e-05,
287
+ "logits/chosen": NaN,
288
+ "logits/rejected": 0.736972451210022,
289
+ "logps/chosen": -23.155391693115234,
290
+ "logps/rejected": -108.7579116821289,
291
+ "loss": 0.4442,
292
+ "rewards/accuracies": 0.676562488079071,
293
+ "rewards/chosen": 70.76764678955078,
294
+ "rewards/margins": 92.23793029785156,
295
+ "rewards/rejected": -21.47028923034668,
296
+ "step": 90
297
+ },
298
+ {
299
+ "epoch": 0.9290953545232273,
300
+ "grad_norm": 9.191290306978517e-13,
301
+ "learning_rate": 1.4696592347540126e-05,
302
+ "logits/chosen": NaN,
303
+ "logits/rejected": 0.5948934555053711,
304
+ "logps/chosen": -22.43905258178711,
305
+ "logps/rejected": -86.31062316894531,
306
+ "loss": 0.1939,
307
+ "rewards/accuracies": 0.7203124761581421,
308
+ "rewards/chosen": 75.99381256103516,
309
+ "rewards/margins": 85.18391418457031,
310
+ "rewards/rejected": -9.190110206604004,
311
+ "step": 95
312
+ },
313
+ {
314
+ "epoch": 0.9779951100244498,
315
+ "grad_norm": 2.054981402058176e-12,
316
+ "learning_rate": 1.547832598304758e-05,
317
+ "logits/chosen": NaN,
318
+ "logits/rejected": 0.5537346005439758,
319
+ "logps/chosen": -28.839187622070312,
320
+ "logps/rejected": -72.35997009277344,
321
+ "loss": 0.2644,
322
+ "rewards/accuracies": 0.6937500238418579,
323
+ "rewards/chosen": 70.51815795898438,
324
+ "rewards/margins": 75.06769561767578,
325
+ "rewards/rejected": -4.549544334411621,
326
+ "step": 100
327
+ },
328
+ {
329
+ "epoch": 0.9779951100244498,
330
+ "eval_logits/chosen": NaN,
331
+ "eval_logits/rejected": -0.4462790787220001,
332
+ "eval_logps/chosen": -201.78216552734375,
333
+ "eval_logps/rejected": -73.33938598632812,
334
+ "eval_loss": 0.3611069321632385,
335
+ "eval_rewards/accuracies": 0.7414772510528564,
336
+ "eval_rewards/chosen": 1.3431318998336792,
337
+ "eval_rewards/margins": 1.3155231475830078,
338
+ "eval_rewards/rejected": 0.027608675882220268,
339
+ "eval_runtime": 61.2942,
340
+ "eval_samples_per_second": 11.241,
341
+ "eval_steps_per_second": 0.718,
342
+ "step": 100
343
  }
344
  ],
345
  "logging_steps": 5,