Training in progress, step 700, checkpoint
Browse files- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +120 -2
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4962001760
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5fa05163bed9a1cd36363dd4ff80562137d5f68770c186539473124d4551083
|
| 3 |
size 4962001760
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da931ba763893ccb98dc512107b6b5c263eca97cd13e5b05ac86947b959fc7da
|
| 3 |
size 4915916160
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999819336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52dcde588022fc935bedd4afcd81e6f4e1b41f079db26834e38267f75b98e817
|
| 3 |
size 4999819336
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1623221024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1642b4a6158c2fd2540216fefd044c2d03795ab85a18a37a96ff8990efe21e7a
|
| 3 |
size 1623221024
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13053963
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb389ace5a850a48e09909591bc0839eabefd253110f1ce6ef4f82c311105c39
|
| 3 |
size 13053963
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0da242ef698d1e1ed5421e6b23d1b44f93a6a55dcb1a127cd8931c14bd84e798
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c84fd50bb0fe9f0aa26725966ffc16f69210c4dc450904bc61526922ca0cae8b
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d28d7579b85bd847de486eff8a19da23bfa22d830f81a69f170a6ac10defb69
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed5d0fa36258fc125bd8dc7fbf20d2fa389251a7d5e275c5c7a393818ce55fa5
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:726101b9f924215ed50f082b1e89969dff616b826c0c2b64bd1221baa108c357
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94c55b96aa4759bfcbcc6b0d85681eebff0595b41c99b9a11340f9a8e78bff69
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f2fbc025fa229f4a96f9a30abe9f81e3829db7eae80a3909a4d421e8abeee73
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab5ad9877076fc54a96b83cdb0188bb8bad71aa0e4038145c7fc0725631af546
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:537e8778e25faae48041764f389414d8ae2bc54743a220f0eba2e87e988c3690
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -727,6 +727,124 @@
|
|
| 727 |
"loss": 0.241,
|
| 728 |
"rewards/rejected": -1.795237922668457,
|
| 729 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
}
|
| 731 |
],
|
| 732 |
"logging_steps": 10,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 9.333333333333334,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 727 |
"loss": 0.241,
|
| 728 |
"rewards/rejected": -1.795237922668457,
|
| 729 |
"step": 600
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"epoch": 8.133333333333333,
|
| 733 |
+
"grad_norm": 55.0,
|
| 734 |
+
"kl": 51.67559051513672,
|
| 735 |
+
"learning_rate": 1.88e-07,
|
| 736 |
+
"logits/chosen": 1727099904.0,
|
| 737 |
+
"logps/chosen": -1532.07939453125,
|
| 738 |
+
"loss": 0.4961,
|
| 739 |
+
"rewards/chosen": 5.208480453491211,
|
| 740 |
+
"step": 610
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 8.266666666666667,
|
| 744 |
+
"grad_norm": 64.5,
|
| 745 |
+
"kl": 54.74528884887695,
|
| 746 |
+
"learning_rate": 1.7466666666666667e-07,
|
| 747 |
+
"logits/chosen": 1733115904.0,
|
| 748 |
+
"logps/chosen": -1624.519921875,
|
| 749 |
+
"loss": 0.5013,
|
| 750 |
+
"rewards/chosen": 5.472665786743164,
|
| 751 |
+
"step": 620
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 8.4,
|
| 755 |
+
"grad_norm": 54.75,
|
| 756 |
+
"kl": 54.551849365234375,
|
| 757 |
+
"learning_rate": 1.6133333333333332e-07,
|
| 758 |
+
"logits/chosen": 1742884659.2,
|
| 759 |
+
"logps/chosen": -1564.97412109375,
|
| 760 |
+
"loss": 0.4915,
|
| 761 |
+
"rewards/chosen": 5.482054138183594,
|
| 762 |
+
"step": 630
|
| 763 |
+
},
|
| 764 |
+
{
|
| 765 |
+
"epoch": 8.533333333333333,
|
| 766 |
+
"grad_norm": 30.625,
|
| 767 |
+
"kl": 46.306522369384766,
|
| 768 |
+
"learning_rate": 1.4799999999999998e-07,
|
| 769 |
+
"logits/chosen": 1739496766.3220973,
|
| 770 |
+
"logits/rejected": 1702134687.3962264,
|
| 771 |
+
"logps/chosen": -1588.0999531835207,
|
| 772 |
+
"logps/rejected": -1696.4740566037735,
|
| 773 |
+
"loss": 0.4184,
|
| 774 |
+
"rewards/chosen": 5.652020515127575,
|
| 775 |
+
"rewards/margins": 7.627730826153518,
|
| 776 |
+
"rewards/rejected": -1.9757103110259433,
|
| 777 |
+
"step": 640
|
| 778 |
+
},
|
| 779 |
+
{
|
| 780 |
+
"epoch": 8.666666666666666,
|
| 781 |
+
"grad_norm": 56.5,
|
| 782 |
+
"kl": 0.0,
|
| 783 |
+
"learning_rate": 1.3466666666666665e-07,
|
| 784 |
+
"logits/rejected": 1696173260.8,
|
| 785 |
+
"logps/rejected": -1544.7572265625,
|
| 786 |
+
"loss": 0.2282,
|
| 787 |
+
"rewards/rejected": -1.9369186401367187,
|
| 788 |
+
"step": 650
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"epoch": 8.8,
|
| 792 |
+
"grad_norm": 36.75,
|
| 793 |
+
"kl": 0.0,
|
| 794 |
+
"learning_rate": 1.2133333333333333e-07,
|
| 795 |
+
"logits/rejected": 1725317120.0,
|
| 796 |
+
"logps/rejected": -1612.53291015625,
|
| 797 |
+
"loss": 0.2132,
|
| 798 |
+
"rewards/rejected": -1.9934148788452148,
|
| 799 |
+
"step": 660
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
"epoch": 8.933333333333334,
|
| 803 |
+
"grad_norm": 43.25,
|
| 804 |
+
"kl": 0.0,
|
| 805 |
+
"learning_rate": 1.0799999999999999e-07,
|
| 806 |
+
"logits/rejected": 1711136563.2,
|
| 807 |
+
"logps/rejected": -1633.72958984375,
|
| 808 |
+
"loss": 0.2084,
|
| 809 |
+
"rewards/rejected": -2.079827880859375,
|
| 810 |
+
"step": 670
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"epoch": 9.066666666666666,
|
| 814 |
+
"grad_norm": 74.5,
|
| 815 |
+
"kl": 26.150564193725586,
|
| 816 |
+
"learning_rate": 9.466666666666665e-08,
|
| 817 |
+
"logits/chosen": 1734777036.8,
|
| 818 |
+
"logits/rejected": 1720304844.8,
|
| 819 |
+
"logps/chosen": -1634.3982421875,
|
| 820 |
+
"logps/rejected": -1376.32314453125,
|
| 821 |
+
"loss": 0.3847,
|
| 822 |
+
"rewards/chosen": 5.2918556213378904,
|
| 823 |
+
"rewards/margins": 6.881484413146973,
|
| 824 |
+
"rewards/rejected": -1.5896287918090821,
|
| 825 |
+
"step": 680
|
| 826 |
+
},
|
| 827 |
+
{
|
| 828 |
+
"epoch": 9.2,
|
| 829 |
+
"grad_norm": 45.0,
|
| 830 |
+
"kl": 54.32807540893555,
|
| 831 |
+
"learning_rate": 8.133333333333332e-08,
|
| 832 |
+
"logits/chosen": 1726023270.4,
|
| 833 |
+
"logps/chosen": -1519.271875,
|
| 834 |
+
"loss": 0.4882,
|
| 835 |
+
"rewards/chosen": 5.484774017333985,
|
| 836 |
+
"step": 690
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 9.333333333333334,
|
| 840 |
+
"grad_norm": 51.75,
|
| 841 |
+
"kl": 53.640716552734375,
|
| 842 |
+
"learning_rate": 6.8e-08,
|
| 843 |
+
"logits/chosen": 1733355929.6,
|
| 844 |
+
"logps/chosen": -1606.2349609375,
|
| 845 |
+
"loss": 0.4805,
|
| 846 |
+
"rewards/chosen": 5.435222625732422,
|
| 847 |
+
"step": 700
|
| 848 |
}
|
| 849 |
],
|
| 850 |
"logging_steps": 10,
|