willyli commited on
Commit
338e801
·
verified ·
1 Parent(s): a9b8b58

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5fa05163bed9a1cd36363dd4ff80562137d5f68770c186539473124d4551083
3
  size 4962001760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4557fcf116808896585a0b9fba254579763e6927b7ad0e5de2ed4ff34077d20
3
  size 4962001760
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da931ba763893ccb98dc512107b6b5c263eca97cd13e5b05ac86947b959fc7da
3
  size 4915916160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9891b26db54b0fe9429d1eaae88b62639a1a39212715f41b52512ae92fa70832
3
  size 4915916160
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52dcde588022fc935bedd4afcd81e6f4e1b41f079db26834e38267f75b98e817
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138e574470736c7298b3c5a8a8b3b18aefe390eaac6f2f638335790ebc1cb485
3
  size 4999819336
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1642b4a6158c2fd2540216fefd044c2d03795ab85a18a37a96ff8990efe21e7a
3
  size 1623221024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6041069b5d927dee319955f65316f6e8fdb6f91cb75c2ce021a6fda24bfc8e7
3
  size 1623221024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb389ace5a850a48e09909591bc0839eabefd253110f1ce6ef4f82c311105c39
3
  size 13053963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae74fef979973d18fe3467c0e7c2f65c81fbe34c1496da6ee9e440f4ed289ce
3
  size 13053963
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0da242ef698d1e1ed5421e6b23d1b44f93a6a55dcb1a127cd8931c14bd84e798
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47aaa76b87f4c23b4d68cb4239507aa4eae30749e5f68da918a711350817ab21
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84fd50bb0fe9f0aa26725966ffc16f69210c4dc450904bc61526922ca0cae8b
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bc421bbf7cb973e5a2ee6575b3d963663503f32fac096384d80658aa160982
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d28d7579b85bd847de486eff8a19da23bfa22d830f81a69f170a6ac10defb69
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c6958cc066428807adb00f4692d3c352b3308f598be90eebe8e0669807f9ab
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed5d0fa36258fc125bd8dc7fbf20d2fa389251a7d5e275c5c7a393818ce55fa5
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fd83ff2a53a6695bc067ad08f0a8eba7e528c1e79e11672a503c27108310a2
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:726101b9f924215ed50f082b1e89969dff616b826c0c2b64bd1221baa108c357
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0f3bfe3c5520a616527742a7e4aa9e3609c6434bc9e0a1ce9481706223bf33
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c55b96aa4759bfcbcc6b0d85681eebff0595b41c99b9a11340f9a8e78bff69
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:485aa82032edff97f9113166b023e2a53f833c71ba864fa3342d7671bea4fef4
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2fbc025fa229f4a96f9a30abe9f81e3829db7eae80a3909a4d421e8abeee73
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1713e23bed215f279ff7d13156cf1fcb32a197abfcc8243ab01a3f13f7a8e4fb
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5ad9877076fc54a96b83cdb0188bb8bad71aa0e4038145c7fc0725631af546
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9d696391ce1a68adcb25acaf87c9c5f9d4a3c8c012dc0ada9067d146c6688a
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:537e8778e25faae48041764f389414d8ae2bc54743a220f0eba2e87e988c3690
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8700635e39226de388de06c31a4d44ee2eac31b86ae5fe6d282b60ff442a2a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 9.333333333333334,
6
  "eval_steps": 100,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -845,6 +845,65 @@
845
  "loss": 0.4805,
846
  "rewards/chosen": 5.435222625732422,
847
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
  }
849
  ],
850
  "logging_steps": 10,
@@ -859,7 +918,7 @@
859
  "should_evaluate": false,
860
  "should_log": false,
861
  "should_save": true,
862
- "should_training_stop": false
863
  },
864
  "attributes": {}
865
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 10.0,
6
  "eval_steps": 100,
7
+ "global_step": 750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
845
  "loss": 0.4805,
846
  "rewards/chosen": 5.435222625732422,
847
  "step": 700
848
+ },
849
+ {
850
+ "epoch": 9.466666666666667,
851
+ "grad_norm": 46.75,
852
+ "kl": 56.87353515625,
853
+ "learning_rate": 5.4666666666666666e-08,
854
+ "logits/chosen": 1751316684.8,
855
+ "logps/chosen": -1617.39033203125,
856
+ "loss": 0.4885,
857
+ "rewards/chosen": 5.705249786376953,
858
+ "step": 710
859
+ },
860
+ {
861
+ "epoch": 9.6,
862
+ "grad_norm": 38.25,
863
+ "kl": 17.8724308013916,
864
+ "learning_rate": 4.133333333333333e-08,
865
+ "logits/chosen": 1729312212.9345794,
866
+ "logits/rejected": 1699881844.5821595,
867
+ "logps/chosen": -1447.3971962616822,
868
+ "logps/rejected": -1524.6291079812206,
869
+ "loss": 0.2937,
870
+ "rewards/chosen": 5.578735921984521,
871
+ "rewards/margins": 7.396489386947476,
872
+ "rewards/rejected": -1.8177534649629548,
873
+ "step": 720
874
+ },
875
+ {
876
+ "epoch": 9.733333333333333,
877
+ "grad_norm": 30.375,
878
+ "kl": 0.0,
879
+ "learning_rate": 2.8e-08,
880
+ "logits/rejected": 1695596134.4,
881
+ "logps/rejected": -1616.91201171875,
882
+ "loss": 0.2028,
883
+ "rewards/rejected": -2.142536735534668,
884
+ "step": 730
885
+ },
886
+ {
887
+ "epoch": 9.866666666666667,
888
+ "grad_norm": 42.5,
889
+ "kl": 0.0,
890
+ "learning_rate": 1.4666666666666666e-08,
891
+ "logits/rejected": 1732339097.6,
892
+ "logps/rejected": -1627.45458984375,
893
+ "loss": 0.211,
894
+ "rewards/rejected": -2.0092498779296877,
895
+ "step": 740
896
+ },
897
+ {
898
+ "epoch": 10.0,
899
+ "grad_norm": 42.0,
900
+ "kl": 0.0,
901
+ "learning_rate": 1.3333333333333333e-09,
902
+ "logits/rejected": 1714343731.2,
903
+ "logps/rejected": -1500.21591796875,
904
+ "loss": 0.2487,
905
+ "rewards/rejected": -1.6958515167236328,
906
+ "step": 750
907
  }
908
  ],
909
  "logging_steps": 10,
 
918
  "should_evaluate": false,
919
  "should_log": false,
920
  "should_save": true,
921
+ "should_training_stop": true
922
  },
923
  "attributes": {}
924
  }