Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +169 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 313559072
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd2b719fb970936323e8fd411b05e2a84f4667ae561563038876cae704ccbd87
|
| 3 |
size 313559072
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 159513573
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4fb6dfa3b3be9e25a98aff9a923fc2480a580b7dc0af952850a811804297821
|
| 3 |
size 159513573
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1937a443d7368c8ec9254650849425295f524b6811196164c9f8145ae9528880
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0a996684adc127ea8c51ebb0d616f5ff3e480192cd01de6d293712583e60f2b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18dcda78addf690b92cd7056f07582eb468846f2e21bc29981e4ee2c6c66d84b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5b1adb37a2fab20cad86ca7dee4e55987e43f200ac69e3c2cd774e08f39674a
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50c740af648e9b0e31ce07fca76df019b6fed40e6c01c05d0cca1cf517f5992b
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3826,6 +3826,172 @@
|
|
| 3826 |
"eval_samples_per_second": 39.196,
|
| 3827 |
"eval_steps_per_second": 4.899,
|
| 3828 |
"step": 1150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3829 |
}
|
| 3830 |
],
|
| 3831 |
"logging_steps": 5,
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.5811416506767273,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.3819223424570337,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3826 |
"eval_samples_per_second": 39.196,
|
| 3827 |
"eval_steps_per_second": 4.899,
|
| 3828 |
"step": 1150
|
| 3829 |
+
},
|
| 3830 |
+
{
|
| 3831 |
+
"epoch": 0.367600254614895,
|
| 3832 |
+
"grad_norm": 16.311731338500977,
|
| 3833 |
+
"learning_rate": 3.2064798142062343e-06,
|
| 3834 |
+
"logits/chosen": NaN,
|
| 3835 |
+
"logits/rejected": 4.429045677185059,
|
| 3836 |
+
"logps/chosen": -610.5453491210938,
|
| 3837 |
+
"logps/rejected": -508.1640625,
|
| 3838 |
+
"loss": 0.5746,
|
| 3839 |
+
"rewards/accuracies": 0.6625000238418579,
|
| 3840 |
+
"rewards/chosen": 0.05947621911764145,
|
| 3841 |
+
"rewards/margins": 0.5095695853233337,
|
| 3842 |
+
"rewards/rejected": -0.4500933587551117,
|
| 3843 |
+
"step": 1155
|
| 3844 |
+
},
|
| 3845 |
+
{
|
| 3846 |
+
"epoch": 0.36919159770846594,
|
| 3847 |
+
"grad_norm": 17.13549041748047,
|
| 3848 |
+
"learning_rate": 3.1749928785492673e-06,
|
| 3849 |
+
"logits/chosen": NaN,
|
| 3850 |
+
"logits/rejected": NaN,
|
| 3851 |
+
"logps/chosen": -717.437255859375,
|
| 3852 |
+
"logps/rejected": -517.4379272460938,
|
| 3853 |
+
"loss": 0.5966,
|
| 3854 |
+
"rewards/accuracies": 0.675000011920929,
|
| 3855 |
+
"rewards/chosen": 0.04769650846719742,
|
| 3856 |
+
"rewards/margins": 0.4534810483455658,
|
| 3857 |
+
"rewards/rejected": -0.405784547328949,
|
| 3858 |
+
"step": 1160
|
| 3859 |
+
},
|
| 3860 |
+
{
|
| 3861 |
+
"epoch": 0.37078294080203694,
|
| 3862 |
+
"grad_norm": 22.3413143157959,
|
| 3863 |
+
"learning_rate": 3.1435533379641483e-06,
|
| 3864 |
+
"logits/chosen": 4.362832069396973,
|
| 3865 |
+
"logits/rejected": 4.43159818649292,
|
| 3866 |
+
"logps/chosen": -721.2758178710938,
|
| 3867 |
+
"logps/rejected": -544.1013793945312,
|
| 3868 |
+
"loss": 0.5884,
|
| 3869 |
+
"rewards/accuracies": 0.643750011920929,
|
| 3870 |
+
"rewards/chosen": 0.16573062539100647,
|
| 3871 |
+
"rewards/margins": 0.47569626569747925,
|
| 3872 |
+
"rewards/rejected": -0.30996567010879517,
|
| 3873 |
+
"step": 1165
|
| 3874 |
+
},
|
| 3875 |
+
{
|
| 3876 |
+
"epoch": 0.3723742838956079,
|
| 3877 |
+
"grad_norm": 14.433586120605469,
|
| 3878 |
+
"learning_rate": 3.1121633413017148e-06,
|
| 3879 |
+
"logits/chosen": NaN,
|
| 3880 |
+
"logits/rejected": NaN,
|
| 3881 |
+
"logps/chosen": -649.4559326171875,
|
| 3882 |
+
"logps/rejected": -527.7597045898438,
|
| 3883 |
+
"loss": 0.6052,
|
| 3884 |
+
"rewards/accuracies": 0.643750011920929,
|
| 3885 |
+
"rewards/chosen": 0.04959065467119217,
|
| 3886 |
+
"rewards/margins": 0.39057812094688416,
|
| 3887 |
+
"rewards/rejected": -0.3409874439239502,
|
| 3888 |
+
"step": 1170
|
| 3889 |
+
},
|
| 3890 |
+
{
|
| 3891 |
+
"epoch": 0.37396562698917885,
|
| 3892 |
+
"grad_norm": 16.829557418823242,
|
| 3893 |
+
"learning_rate": 3.080825034026538e-06,
|
| 3894 |
+
"logits/chosen": 4.339395999908447,
|
| 3895 |
+
"logits/rejected": NaN,
|
| 3896 |
+
"logps/chosen": -715.9188232421875,
|
| 3897 |
+
"logps/rejected": -491.35406494140625,
|
| 3898 |
+
"loss": 0.605,
|
| 3899 |
+
"rewards/accuracies": 0.6499999761581421,
|
| 3900 |
+
"rewards/chosen": -0.03876256197690964,
|
| 3901 |
+
"rewards/margins": 0.406755268573761,
|
| 3902 |
+
"rewards/rejected": -0.4455178380012512,
|
| 3903 |
+
"step": 1175
|
| 3904 |
+
},
|
| 3905 |
+
{
|
| 3906 |
+
"epoch": 0.37555697008274985,
|
| 3907 |
+
"grad_norm": 17.50708770751953,
|
| 3908 |
+
"learning_rate": 3.0495405580702946e-06,
|
| 3909 |
+
"logits/chosen": NaN,
|
| 3910 |
+
"logits/rejected": NaN,
|
| 3911 |
+
"logps/chosen": -682.1160278320312,
|
| 3912 |
+
"logps/rejected": -503.6424865722656,
|
| 3913 |
+
"loss": 0.542,
|
| 3914 |
+
"rewards/accuracies": 0.6875,
|
| 3915 |
+
"rewards/chosen": 0.041110388934612274,
|
| 3916 |
+
"rewards/margins": 0.5532486438751221,
|
| 3917 |
+
"rewards/rejected": -0.5121382474899292,
|
| 3918 |
+
"step": 1180
|
| 3919 |
+
},
|
| 3920 |
+
{
|
| 3921 |
+
"epoch": 0.3771483131763208,
|
| 3922 |
+
"grad_norm": 14.150347709655762,
|
| 3923 |
+
"learning_rate": 3.0183120516853587e-06,
|
| 3924 |
+
"logits/chosen": 4.216782093048096,
|
| 3925 |
+
"logits/rejected": 4.357659816741943,
|
| 3926 |
+
"logps/chosen": -658.0750122070312,
|
| 3927 |
+
"logps/rejected": -509.7039489746094,
|
| 3928 |
+
"loss": 0.5811,
|
| 3929 |
+
"rewards/accuracies": 0.6812499761581421,
|
| 3930 |
+
"rewards/chosen": 0.17130543291568756,
|
| 3931 |
+
"rewards/margins": 0.45563697814941406,
|
| 3932 |
+
"rewards/rejected": -0.2843315005302429,
|
| 3933 |
+
"step": 1185
|
| 3934 |
+
},
|
| 3935 |
+
{
|
| 3936 |
+
"epoch": 0.3787396562698918,
|
| 3937 |
+
"grad_norm": 14.595678329467773,
|
| 3938 |
+
"learning_rate": 2.9871416492986604e-06,
|
| 3939 |
+
"logits/chosen": 4.271334171295166,
|
| 3940 |
+
"logits/rejected": NaN,
|
| 3941 |
+
"logps/chosen": -624.3214111328125,
|
| 3942 |
+
"logps/rejected": -466.72412109375,
|
| 3943 |
+
"loss": 0.5109,
|
| 3944 |
+
"rewards/accuracies": 0.7250000238418579,
|
| 3945 |
+
"rewards/chosen": 0.3722537159919739,
|
| 3946 |
+
"rewards/margins": 0.7206977605819702,
|
| 3947 |
+
"rewards/rejected": -0.34844404458999634,
|
| 3948 |
+
"step": 1190
|
| 3949 |
+
},
|
| 3950 |
+
{
|
| 3951 |
+
"epoch": 0.38033099936346276,
|
| 3952 |
+
"grad_norm": 14.158003807067871,
|
| 3953 |
+
"learning_rate": 2.9560314813657966e-06,
|
| 3954 |
+
"logits/chosen": 4.302981376647949,
|
| 3955 |
+
"logits/rejected": NaN,
|
| 3956 |
+
"logps/chosen": -727.4720458984375,
|
| 3957 |
+
"logps/rejected": -558.3084106445312,
|
| 3958 |
+
"loss": 0.5589,
|
| 3959 |
+
"rewards/accuracies": 0.65625,
|
| 3960 |
+
"rewards/chosen": 0.24353870749473572,
|
| 3961 |
+
"rewards/margins": 0.5566657781600952,
|
| 3962 |
+
"rewards/rejected": -0.3131270408630371,
|
| 3963 |
+
"step": 1195
|
| 3964 |
+
},
|
| 3965 |
+
{
|
| 3966 |
+
"epoch": 0.3819223424570337,
|
| 3967 |
+
"grad_norm": 17.142292022705078,
|
| 3968 |
+
"learning_rate": 2.9249836742254207e-06,
|
| 3969 |
+
"logits/chosen": 4.26998233795166,
|
| 3970 |
+
"logits/rejected": NaN,
|
| 3971 |
+
"logps/chosen": -662.781982421875,
|
| 3972 |
+
"logps/rejected": -481.7264099121094,
|
| 3973 |
+
"loss": 0.6372,
|
| 3974 |
+
"rewards/accuracies": 0.6499999761581421,
|
| 3975 |
+
"rewards/chosen": 0.05399315431714058,
|
| 3976 |
+
"rewards/margins": 0.329118013381958,
|
| 3977 |
+
"rewards/rejected": -0.2751248776912689,
|
| 3978 |
+
"step": 1200
|
| 3979 |
+
},
|
| 3980 |
+
{
|
| 3981 |
+
"epoch": 0.3819223424570337,
|
| 3982 |
+
"eval_logits/chosen": NaN,
|
| 3983 |
+
"eval_logits/rejected": NaN,
|
| 3984 |
+
"eval_logps/chosen": -675.000244140625,
|
| 3985 |
+
"eval_logps/rejected": -528.8659057617188,
|
| 3986 |
+
"eval_loss": 0.5811416506767273,
|
| 3987 |
+
"eval_rewards/accuracies": 0.6856763958930969,
|
| 3988 |
+
"eval_rewards/chosen": 0.09323292225599289,
|
| 3989 |
+
"eval_rewards/margins": 0.5122284889221191,
|
| 3990 |
+
"eval_rewards/rejected": -0.4189954996109009,
|
| 3991 |
+
"eval_runtime": 75.1263,
|
| 3992 |
+
"eval_samples_per_second": 40.146,
|
| 3993 |
+
"eval_steps_per_second": 5.018,
|
| 3994 |
+
"step": 1200
|
| 3995 |
}
|
| 3996 |
],
|
| 3997 |
"logging_steps": 5,
|