willyli commited on
Commit
64cb707
·
verified ·
1 Parent(s): fd85b3d

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d7bb49a546070d0f06db42d905c28e11210d2dd3a84556f9776bbdb1c72515
3
  size 4962001760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498927c08771e04a831f83e7a148866b9ec2d9236382d3fca73a82829ba3e116
3
  size 4962001760
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7cf2d25394898ae6c044195e091f0137da5d97c36fdcf256310291b6c788153
3
  size 4915916160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf18cb7323bdf103c060825194b12041cd9f65527e6801e58f01c2af2518636
3
  size 4915916160
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7b626ddfa7b2b2f6c99cb6edd374324fe1cc409795921848907a0fe580e3ae5
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf047b0776c247cd18bfec687a84a46e4c272eaf95d4cf533bebf3adf64f86ee
3
  size 4999819336
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9155e26da421bd5d4381861155dc2bb5b7da6d04160fa66c7db3e3f4bc84ac41
3
  size 1623221024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83528af594926033e99367dc5529ec2083f14bb66aeb7d9573f7d56ce7848fe
3
  size 1623221024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d331ef78c125b3b95b357c580a6966cc0242f5c30f3e85c0ea4b7924ca469750
3
- size 13053643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa69d5d5f89ce23f9d6b71b738413f9206b416237feabf7585aeab96db924124
3
+ size 13053963
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:575512e9c00b18ee70c366f67b2b0f002290285083714312b8bf78b36aeea0c5
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289f39013137940cf1ee5449b041384da04bc9bb685618953fd31d6c772eea95
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:870871eb5e1c3f4436d61b35649980a174de89fb8ad9788f2d73e82294209354
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989bd77ff2885f40e3c715332e40e1c540ff701d6b100ef8efad9627d7d3c776
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ff12f33aef0e4cfef98b601f4c6d3d0beb4abced43c5f557c11bc4ce3ddf53
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377f1d7a96c328bb457e236cce32a38315a6883ea0ab33abae7d9374afeb92c7
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fbf244dea2f57f580c016311c28847fdf6a8d0d8af08ed4d78d75d102da168c
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0739a97c2722739882afaf6b544115b335b7f2dc177e7fce8c522b3da6991521
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99908a68d404fdefbbfa797eb2f5eb9f75a3a37d1b8fdab17f87efbde6f4ce4f
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e3fea8223898c73ab613293e12de1974fe1688bc3594682f3c0aa0de08b3c6
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52f189356a4e5d077c8438dd9c08ba1a692ac0e78f07054f726870cec5093c7e
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968b0e878dc8c9dab1af28d2a5a3c5c25fb01d09bbb5d144524ae269d784486a
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211015dbfc9200331d3401fde9ae901b6bd709c00b3c545bad1187ac833c5739
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79c15f80530d12a2c7921202ae7531199c99ba76f622702271dce29c3ab78c7
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:730c163411d01a0423c98cd2284d621aa5f8be1b009f2a5fcf413bdbd89e32c4
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925732ed115662d23fac8942143e41da75815df4736560086168dcda367dc0c5
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87ebe31a6312a81e5b51b292be97bf6be0c0e133978c540813601a19980f8814
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbc8305b7ca0c6d67c3d651c7ca33470c406cdfe738d0a9715f708a4a927f87
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.6666666666666665,
6
  "eval_steps": 100,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -255,6 +255,124 @@
255
  "loss": 0.4785,
256
  "rewards/rejected": 0.5283474445343017,
257
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  }
259
  ],
260
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
  "eval_steps": 100,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
255
  "loss": 0.4785,
256
  "rewards/rejected": 0.5283474445343017,
257
  "step": 200
258
+ },
259
+ {
260
+ "epoch": 2.8,
261
+ "grad_norm": 59.25,
262
+ "kl": 1.2280102968215942,
263
+ "learning_rate": 7.213333333333334e-07,
264
+ "logits/rejected": 1761185177.6,
265
+ "logps/rejected": -1594.97919921875,
266
+ "loss": 0.4105,
267
+ "rewards/rejected": -0.23803796768188476,
268
+ "step": 210
269
+ },
270
+ {
271
+ "epoch": 2.9333333333333336,
272
+ "grad_norm": 70.5,
273
+ "kl": 0.0,
274
+ "learning_rate": 7.079999999999999e-07,
275
+ "logits/rejected": 1738660864.0,
276
+ "logps/rejected": -1625.093359375,
277
+ "loss": 0.2885,
278
+ "rewards/rejected": -1.2162075996398927,
279
+ "step": 220
280
+ },
281
+ {
282
+ "epoch": 3.066666666666667,
283
+ "grad_norm": 75.5,
284
+ "kl": 14.01048469543457,
285
+ "learning_rate": 6.946666666666666e-07,
286
+ "logits/chosen": 1750113075.2,
287
+ "logits/rejected": 1743708160.0,
288
+ "logps/chosen": -1658.2826171875,
289
+ "logps/rejected": -1375.04150390625,
290
+ "loss": 0.3769,
291
+ "rewards/chosen": 2.9034093856811523,
292
+ "rewards/margins": 4.364873313903809,
293
+ "rewards/rejected": -1.4614639282226562,
294
+ "step": 230
295
+ },
296
+ {
297
+ "epoch": 3.2,
298
+ "grad_norm": 63.0,
299
+ "kl": 37.20917510986328,
300
+ "learning_rate": 6.813333333333333e-07,
301
+ "logits/chosen": 1746606899.2,
302
+ "logps/chosen": -1536.5771484375,
303
+ "loss": 0.4982,
304
+ "rewards/chosen": 3.75426025390625,
305
+ "step": 240
306
+ },
307
+ {
308
+ "epoch": 3.3333333333333335,
309
+ "grad_norm": 54.5,
310
+ "kl": 43.48841094970703,
311
+ "learning_rate": 6.68e-07,
312
+ "logits/chosen": 1757109657.6,
313
+ "logps/chosen": -1617.16201171875,
314
+ "loss": 0.5003,
315
+ "rewards/chosen": 4.342501831054688,
316
+ "step": 250
317
+ },
318
+ {
319
+ "epoch": 3.466666666666667,
320
+ "grad_norm": 52.5,
321
+ "kl": 51.4334716796875,
322
+ "learning_rate": 6.546666666666665e-07,
323
+ "logits/chosen": 1780435148.8,
324
+ "logps/chosen": -1622.2716796875,
325
+ "loss": 0.4796,
326
+ "rewards/chosen": 5.217120742797851,
327
+ "step": 260
328
+ },
329
+ {
330
+ "epoch": 3.6,
331
+ "grad_norm": 67.5,
332
+ "kl": 19.67037582397461,
333
+ "learning_rate": 6.413333333333333e-07,
334
+ "logits/chosen": 1755938050.3925233,
335
+ "logits/rejected": 1727550343.8122065,
336
+ "logps/chosen": -1448.7593457943926,
337
+ "logps/rejected": -1504.9633215962442,
338
+ "loss": 0.4334,
339
+ "rewards/chosen": 5.442524277161215,
340
+ "rewards/margins": 5.293697337761961,
341
+ "rewards/rejected": 0.1488269393992536,
342
+ "step": 270
343
+ },
344
+ {
345
+ "epoch": 3.7333333333333334,
346
+ "grad_norm": 51.75,
347
+ "kl": 0.6852197647094727,
348
+ "learning_rate": 6.28e-07,
349
+ "logits/rejected": 1722981785.6,
350
+ "logps/rejected": -1600.43251953125,
351
+ "loss": 0.3926,
352
+ "rewards/rejected": -0.49457273483276365,
353
+ "step": 280
354
+ },
355
+ {
356
+ "epoch": 3.8666666666666667,
357
+ "grad_norm": 49.0,
358
+ "kl": 0.2839541435241699,
359
+ "learning_rate": 6.146666666666667e-07,
360
+ "logits/rejected": 1752589516.8,
361
+ "logps/rejected": -1619.22822265625,
362
+ "loss": 0.2889,
363
+ "rewards/rejected": -1.1866175651550293,
364
+ "step": 290
365
+ },
366
+ {
367
+ "epoch": 4.0,
368
+ "grad_norm": 37.0,
369
+ "kl": 0.0,
370
+ "learning_rate": 6.013333333333334e-07,
371
+ "logits/rejected": 1728595353.6,
372
+ "logps/rejected": -1498.5271484375,
373
+ "loss": 0.2549,
374
+ "rewards/rejected": -1.526987361907959,
375
+ "step": 300
376
  }
377
  ],
378
  "logging_steps": 10,