Training in progress, step 3354, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9449344
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7d805d224770203f87507cd409ca03c7d379294618bc6df66dcdb8923b5024d
|
| 3 |
size 9449344
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18957003
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cf95319f4c58dbfc6de8610b486ecae741191a50a79dd6828f045be1cd3b60e
|
| 3 |
size 18957003
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd6db25db4cf05e7919262d88ce7dfa06447501cda328a167e8debfb09b35275
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4808,6 +4808,231 @@
|
|
| 4808 |
"rewards/margins": 25.77750015258789,
|
| 4809 |
"rewards/rejected": -12.244199752807617,
|
| 4810 |
"step": 3200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4811 |
}
|
| 4812 |
],
|
| 4813 |
"logging_steps": 10,
|
|
@@ -4822,7 +5047,7 @@
|
|
| 4822 |
"should_evaluate": false,
|
| 4823 |
"should_log": false,
|
| 4824 |
"should_save": true,
|
| 4825 |
-
"should_training_stop":
|
| 4826 |
},
|
| 4827 |
"attributes": {}
|
| 4828 |
}
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 3354,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4808 |
"rewards/margins": 25.77750015258789,
|
| 4809 |
"rewards/rejected": -12.244199752807617,
|
| 4810 |
"step": 3200
|
| 4811 |
+
},
|
| 4812 |
+
{
|
| 4813 |
+
"epoch": 1.914268674519159,
|
| 4814 |
+
"grad_norm": 3.401453625428985e-07,
|
| 4815 |
+
"learning_rate": 2.4456893515647507e-07,
|
| 4816 |
+
"logits/chosen": -55.8200569152832,
|
| 4817 |
+
"logits/rejected": -60.309791564941406,
|
| 4818 |
+
"logps/chosen": -3641.48876953125,
|
| 4819 |
+
"logps/rejected": -4029.09130859375,
|
| 4820 |
+
"loss": 0.0,
|
| 4821 |
+
"rewards/accuracies": 1.0,
|
| 4822 |
+
"rewards/chosen": 12.986165046691895,
|
| 4823 |
+
"rewards/margins": 28.325210571289062,
|
| 4824 |
+
"rewards/rejected": -15.339044570922852,
|
| 4825 |
+
"step": 3210
|
| 4826 |
+
},
|
| 4827 |
+
{
|
| 4828 |
+
"epoch": 1.9202325928134785,
|
| 4829 |
+
"grad_norm": 7.95227883543248e-09,
|
| 4830 |
+
"learning_rate": 2.1204464497393828e-07,
|
| 4831 |
+
"logits/chosen": -58.28154754638672,
|
| 4832 |
+
"logits/rejected": -63.397918701171875,
|
| 4833 |
+
"logps/chosen": -3738.774169921875,
|
| 4834 |
+
"logps/rejected": -4127.396484375,
|
| 4835 |
+
"loss": 0.0,
|
| 4836 |
+
"rewards/accuracies": 1.0,
|
| 4837 |
+
"rewards/chosen": 12.601153373718262,
|
| 4838 |
+
"rewards/margins": 32.9376106262207,
|
| 4839 |
+
"rewards/rejected": -20.33645248413086,
|
| 4840 |
+
"step": 3220
|
| 4841 |
+
},
|
| 4842 |
+
{
|
| 4843 |
+
"epoch": 1.9261965111077979,
|
| 4844 |
+
"grad_norm": 5.5718683142913505e-06,
|
| 4845 |
+
"learning_rate": 1.818308335493707e-07,
|
| 4846 |
+
"logits/chosen": -57.85089874267578,
|
| 4847 |
+
"logits/rejected": -62.124717712402344,
|
| 4848 |
+
"logps/chosen": -3759.594482421875,
|
| 4849 |
+
"logps/rejected": -4074.05859375,
|
| 4850 |
+
"loss": 0.0,
|
| 4851 |
+
"rewards/accuracies": 1.0,
|
| 4852 |
+
"rewards/chosen": 12.022012710571289,
|
| 4853 |
+
"rewards/margins": 27.768798828125,
|
| 4854 |
+
"rewards/rejected": -15.746786117553711,
|
| 4855 |
+
"step": 3230
|
| 4856 |
+
},
|
| 4857 |
+
{
|
| 4858 |
+
"epoch": 1.932160429402117,
|
| 4859 |
+
"grad_norm": 2.9493682518477726e-07,
|
| 4860 |
+
"learning_rate": 1.539303171041423e-07,
|
| 4861 |
+
"logits/chosen": -56.495628356933594,
|
| 4862 |
+
"logits/rejected": -59.803489685058594,
|
| 4863 |
+
"logps/chosen": -3937.475341796875,
|
| 4864 |
+
"logps/rejected": -4117.28955078125,
|
| 4865 |
+
"loss": 0.0181,
|
| 4866 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 4867 |
+
"rewards/chosen": 13.402850151062012,
|
| 4868 |
+
"rewards/margins": 29.56199073791504,
|
| 4869 |
+
"rewards/rejected": -16.159137725830078,
|
| 4870 |
+
"step": 3240
|
| 4871 |
+
},
|
| 4872 |
+
{
|
| 4873 |
+
"epoch": 1.9381243476964367,
|
| 4874 |
+
"grad_norm": 2.0169439451933613e-09,
|
| 4875 |
+
"learning_rate": 1.2834569623800806e-07,
|
| 4876 |
+
"logits/chosen": -56.14719772338867,
|
| 4877 |
+
"logits/rejected": -61.06328201293945,
|
| 4878 |
+
"logps/chosen": -3900.921142578125,
|
| 4879 |
+
"logps/rejected": -3950.700439453125,
|
| 4880 |
+
"loss": 0.0,
|
| 4881 |
+
"rewards/accuracies": 1.0,
|
| 4882 |
+
"rewards/chosen": 11.294220924377441,
|
| 4883 |
+
"rewards/margins": 27.905467987060547,
|
| 4884 |
+
"rewards/rejected": -16.611248016357422,
|
| 4885 |
+
"step": 3250
|
| 4886 |
+
},
|
| 4887 |
+
{
|
| 4888 |
+
"epoch": 1.944088265990756,
|
| 4889 |
+
"grad_norm": 2.454358383197075e-10,
|
| 4890 |
+
"learning_rate": 1.0507935568670469e-07,
|
| 4891 |
+
"logits/chosen": -59.91279983520508,
|
| 4892 |
+
"logits/rejected": -64.65019226074219,
|
| 4893 |
+
"logps/chosen": -4057.24365234375,
|
| 4894 |
+
"logps/rejected": -4658.7998046875,
|
| 4895 |
+
"loss": 0.0,
|
| 4896 |
+
"rewards/accuracies": 1.0,
|
| 4897 |
+
"rewards/chosen": 13.831171035766602,
|
| 4898 |
+
"rewards/margins": 30.55777359008789,
|
| 4899 |
+
"rewards/rejected": -16.726600646972656,
|
| 4900 |
+
"step": 3260
|
| 4901 |
+
},
|
| 4902 |
+
{
|
| 4903 |
+
"epoch": 1.9500521842850753,
|
| 4904 |
+
"grad_norm": 6.98909570928663e-05,
|
| 4905 |
+
"learning_rate": 8.413346409967548e-08,
|
| 4906 |
+
"logits/chosen": -57.639564514160156,
|
| 4907 |
+
"logits/rejected": -60.486106872558594,
|
| 4908 |
+
"logps/chosen": -3805.05908203125,
|
| 4909 |
+
"logps/rejected": -3502.89208984375,
|
| 4910 |
+
"loss": 0.0,
|
| 4911 |
+
"rewards/accuracies": 1.0,
|
| 4912 |
+
"rewards/chosen": 13.168159484863281,
|
| 4913 |
+
"rewards/margins": 26.42257308959961,
|
| 4914 |
+
"rewards/rejected": -13.254412651062012,
|
| 4915 |
+
"step": 3270
|
| 4916 |
+
},
|
| 4917 |
+
{
|
| 4918 |
+
"epoch": 1.9560161025793947,
|
| 4919 |
+
"grad_norm": 9.799998590409587e-10,
|
| 4920 |
+
"learning_rate": 6.5509973837935e-08,
|
| 4921 |
+
"logits/chosen": -56.7869987487793,
|
| 4922 |
+
"logits/rejected": -63.3160285949707,
|
| 4923 |
+
"logps/chosen": -3626.40771484375,
|
| 4924 |
+
"logps/rejected": -4573.03466796875,
|
| 4925 |
+
"loss": 0.0,
|
| 4926 |
+
"rewards/accuracies": 1.0,
|
| 4927 |
+
"rewards/chosen": 14.337489128112793,
|
| 4928 |
+
"rewards/margins": 31.131816864013672,
|
| 4929 |
+
"rewards/rejected": -16.794330596923828,
|
| 4930 |
+
"step": 3280
|
| 4931 |
+
},
|
| 4932 |
+
{
|
| 4933 |
+
"epoch": 1.961980020873714,
|
| 4934 |
+
"grad_norm": 8.010190867935307e-06,
|
| 4935 |
+
"learning_rate": 4.921062079207839e-08,
|
| 4936 |
+
"logits/chosen": -58.120140075683594,
|
| 4937 |
+
"logits/rejected": -62.119712829589844,
|
| 4938 |
+
"logps/chosen": -3722.432861328125,
|
| 4939 |
+
"logps/rejected": -4000.43701171875,
|
| 4940 |
+
"loss": 0.0,
|
| 4941 |
+
"rewards/accuracies": 1.0,
|
| 4942 |
+
"rewards/chosen": 15.0454740524292,
|
| 4943 |
+
"rewards/margins": 28.300750732421875,
|
| 4944 |
+
"rewards/rejected": -13.255276679992676,
|
| 4945 |
+
"step": 3290
|
| 4946 |
+
},
|
| 4947 |
+
{
|
| 4948 |
+
"epoch": 1.9679439391680336,
|
| 4949 |
+
"grad_norm": 0.012528502382338047,
|
| 4950 |
+
"learning_rate": 3.5236924220494186e-08,
|
| 4951 |
+
"logits/chosen": -57.49330520629883,
|
| 4952 |
+
"logits/rejected": -60.7618522644043,
|
| 4953 |
+
"logps/chosen": -4048.438232421875,
|
| 4954 |
+
"logps/rejected": -4347.9658203125,
|
| 4955 |
+
"loss": 0.0,
|
| 4956 |
+
"rewards/accuracies": 1.0,
|
| 4957 |
+
"rewards/chosen": 13.4490966796875,
|
| 4958 |
+
"rewards/margins": 29.223918914794922,
|
| 4959 |
+
"rewards/rejected": -15.774820327758789,
|
| 4960 |
+
"step": 3300
|
| 4961 |
+
},
|
| 4962 |
+
{
|
| 4963 |
+
"epoch": 1.9739078574623528,
|
| 4964 |
+
"grad_norm": 5.5174933066837184e-08,
|
| 4965 |
+
"learning_rate": 2.3590186607733154e-08,
|
| 4966 |
+
"logits/chosen": -57.35243606567383,
|
| 4967 |
+
"logits/rejected": -63.4393310546875,
|
| 4968 |
+
"logps/chosen": -3737.04150390625,
|
| 4969 |
+
"logps/rejected": -4001.02978515625,
|
| 4970 |
+
"loss": 0.0,
|
| 4971 |
+
"rewards/accuracies": 1.0,
|
| 4972 |
+
"rewards/chosen": 14.624621391296387,
|
| 4973 |
+
"rewards/margins": 27.510913848876953,
|
| 4974 |
+
"rewards/rejected": -12.886293411254883,
|
| 4975 |
+
"step": 3310
|
| 4976 |
+
},
|
| 4977 |
+
{
|
| 4978 |
+
"epoch": 1.9798717757566722,
|
| 4979 |
+
"grad_norm": 2.4484758665010986e-10,
|
| 4980 |
+
"learning_rate": 1.4271493543133174e-08,
|
| 4981 |
+
"logits/chosen": -59.93109130859375,
|
| 4982 |
+
"logits/rejected": -63.264732360839844,
|
| 4983 |
+
"logps/chosen": -3958.082763671875,
|
| 4984 |
+
"logps/rejected": -4254.8232421875,
|
| 4985 |
+
"loss": 0.0,
|
| 4986 |
+
"rewards/accuracies": 1.0,
|
| 4987 |
+
"rewards/chosen": 13.18006420135498,
|
| 4988 |
+
"rewards/margins": 28.5799503326416,
|
| 4989 |
+
"rewards/rejected": -15.399887084960938,
|
| 4990 |
+
"step": 3320
|
| 4991 |
+
},
|
| 4992 |
+
{
|
| 4993 |
+
"epoch": 1.9858356940509916,
|
| 4994 |
+
"grad_norm": 4.756313121134781e-09,
|
| 4995 |
+
"learning_rate": 7.281713619605723e-09,
|
| 4996 |
+
"logits/chosen": -56.019737243652344,
|
| 4997 |
+
"logits/rejected": -59.69663619995117,
|
| 4998 |
+
"logps/chosen": -4050.05908203125,
|
| 4999 |
+
"logps/rejected": -4002.97802734375,
|
| 5000 |
+
"loss": 0.0,
|
| 5001 |
+
"rewards/accuracies": 1.0,
|
| 5002 |
+
"rewards/chosen": 13.66505241394043,
|
| 5003 |
+
"rewards/margins": 27.011245727539062,
|
| 5004 |
+
"rewards/rejected": -13.346193313598633,
|
| 5005 |
+
"step": 3330
|
| 5006 |
+
},
|
| 5007 |
+
{
|
| 5008 |
+
"epoch": 1.9917996123453108,
|
| 5009 |
+
"grad_norm": 1.108175638364628e-05,
|
| 5010 |
+
"learning_rate": 2.6214983526867686e-09,
|
| 5011 |
+
"logits/chosen": -56.985069274902344,
|
| 5012 |
+
"logits/rejected": -62.2716178894043,
|
| 5013 |
+
"logps/chosen": -3683.37255859375,
|
| 5014 |
+
"logps/rejected": -3870.92041015625,
|
| 5015 |
+
"loss": 0.0,
|
| 5016 |
+
"rewards/accuracies": 1.0,
|
| 5017 |
+
"rewards/chosen": 13.052160263061523,
|
| 5018 |
+
"rewards/margins": 27.678844451904297,
|
| 5019 |
+
"rewards/rejected": -14.626681327819824,
|
| 5020 |
+
"step": 3340
|
| 5021 |
+
},
|
| 5022 |
+
{
|
| 5023 |
+
"epoch": 1.9977635306396302,
|
| 5024 |
+
"grad_norm": 2.601581456929125e-07,
|
| 5025 |
+
"learning_rate": 2.912821198075566e-10,
|
| 5026 |
+
"logits/chosen": -56.11452102661133,
|
| 5027 |
+
"logits/rejected": -60.453453063964844,
|
| 5028 |
+
"logps/chosen": -3346.564453125,
|
| 5029 |
+
"logps/rejected": -3288.348388671875,
|
| 5030 |
+
"loss": 0.0,
|
| 5031 |
+
"rewards/accuracies": 1.0,
|
| 5032 |
+
"rewards/chosen": 12.876147270202637,
|
| 5033 |
+
"rewards/margins": 26.675273895263672,
|
| 5034 |
+
"rewards/rejected": -13.799127578735352,
|
| 5035 |
+
"step": 3350
|
| 5036 |
}
|
| 5037 |
],
|
| 5038 |
"logging_steps": 10,
|
|
|
|
| 5047 |
"should_evaluate": false,
|
| 5048 |
"should_log": false,
|
| 5049 |
"should_save": true,
|
| 5050 |
+
"should_training_stop": true
|
| 5051 |
},
|
| 5052 |
"attributes": {}
|
| 5053 |
}
|