Training in progress, step 6700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35668592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cad0a278102bf9770bc41be47d29e569bd4f2693c87eb7700d1c115f88c29cf
|
| 3 |
size 35668592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18257163
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b424d2e19ea94a34391764dcea1aed8baff5e017c96bfe359256150da0292cd4
|
| 3 |
size 18257163
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b8dde9f5e22222ecfd67853fa8d0b9ba72a03a3881426f7471f24522b0e590e
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc7b581468d312885ca28af1cc20f9f1ad44a8af1db5ffa003b6beff388c6ef3
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -9908,6 +9908,156 @@
|
|
| 9908 |
"rewards/margins": 17.756301879882812,
|
| 9909 |
"rewards/rejected": -16.33933448791504,
|
| 9910 |
"step": 6600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9911 |
}
|
| 9912 |
],
|
| 9913 |
"logging_steps": 10,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.5964138917019122,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 9908 |
"rewards/margins": 17.756301879882812,
|
| 9909 |
"rewards/rejected": -16.33933448791504,
|
| 9910 |
"step": 6600
|
| 9911 |
+
},
|
| 9912 |
+
{
|
| 9913 |
+
"epoch": 1.574968725799726,
|
| 9914 |
+
"grad_norm": 1.6681083252478857e-06,
|
| 9915 |
+
"learning_rate": 2.7190977122535254e-05,
|
| 9916 |
+
"logits/chosen": 9.289986610412598,
|
| 9917 |
+
"logits/rejected": 8.851910591125488,
|
| 9918 |
+
"logps/chosen": -3.9985766410827637,
|
| 9919 |
+
"logps/rejected": -181.04971313476562,
|
| 9920 |
+
"loss": 0.0,
|
| 9921 |
+
"rewards/accuracies": 1.0,
|
| 9922 |
+
"rewards/chosen": 1.4206149578094482,
|
| 9923 |
+
"rewards/margins": 17.674718856811523,
|
| 9924 |
+
"rewards/rejected": -16.254104614257812,
|
| 9925 |
+
"step": 6610
|
| 9926 |
+
},
|
| 9927 |
+
{
|
| 9928 |
+
"epoch": 1.5773515220110799,
|
| 9929 |
+
"grad_norm": 8.596234692959115e-06,
|
| 9930 |
+
"learning_rate": 2.7121921381742445e-05,
|
| 9931 |
+
"logits/chosen": 9.695623397827148,
|
| 9932 |
+
"logits/rejected": 9.04432487487793,
|
| 9933 |
+
"logps/chosen": -3.8987979888916016,
|
| 9934 |
+
"logps/rejected": -181.49603271484375,
|
| 9935 |
+
"loss": 0.0,
|
| 9936 |
+
"rewards/accuracies": 1.0,
|
| 9937 |
+
"rewards/chosen": 1.3905894756317139,
|
| 9938 |
+
"rewards/margins": 17.71683120727539,
|
| 9939 |
+
"rewards/rejected": -16.326242446899414,
|
| 9940 |
+
"step": 6620
|
| 9941 |
+
},
|
| 9942 |
+
{
|
| 9943 |
+
"epoch": 1.579734318222434,
|
| 9944 |
+
"grad_norm": 9.528152986604255e-07,
|
| 9945 |
+
"learning_rate": 2.7052849329510616e-05,
|
| 9946 |
+
"logits/chosen": 9.253401756286621,
|
| 9947 |
+
"logits/rejected": 8.395661354064941,
|
| 9948 |
+
"logps/chosen": -4.091588497161865,
|
| 9949 |
+
"logps/rejected": -182.94107055664062,
|
| 9950 |
+
"loss": 0.0,
|
| 9951 |
+
"rewards/accuracies": 1.0,
|
| 9952 |
+
"rewards/chosen": 1.3835569620132446,
|
| 9953 |
+
"rewards/margins": 17.877513885498047,
|
| 9954 |
+
"rewards/rejected": -16.493959426879883,
|
| 9955 |
+
"step": 6630
|
| 9956 |
+
},
|
| 9957 |
+
{
|
| 9958 |
+
"epoch": 1.582117114433788,
|
| 9959 |
+
"grad_norm": 3.34082760673482e-05,
|
| 9960 |
+
"learning_rate": 2.698376149680408e-05,
|
| 9961 |
+
"logits/chosen": 9.560027122497559,
|
| 9962 |
+
"logits/rejected": 9.066927909851074,
|
| 9963 |
+
"logps/chosen": -3.810729503631592,
|
| 9964 |
+
"logps/rejected": -178.86024475097656,
|
| 9965 |
+
"loss": 0.0,
|
| 9966 |
+
"rewards/accuracies": 1.0,
|
| 9967 |
+
"rewards/chosen": 1.3871489763259888,
|
| 9968 |
+
"rewards/margins": 17.55055809020996,
|
| 9969 |
+
"rewards/rejected": -16.163410186767578,
|
| 9970 |
+
"step": 6640
|
| 9971 |
+
},
|
| 9972 |
+
{
|
| 9973 |
+
"epoch": 1.584499910645142,
|
| 9974 |
+
"grad_norm": 2.7486205453897128e-06,
|
| 9975 |
+
"learning_rate": 2.6914658414708483e-05,
|
| 9976 |
+
"logits/chosen": 9.511039733886719,
|
| 9977 |
+
"logits/rejected": 8.858368873596191,
|
| 9978 |
+
"logps/chosen": -4.18659782409668,
|
| 9979 |
+
"logps/rejected": -179.6385955810547,
|
| 9980 |
+
"loss": 0.0,
|
| 9981 |
+
"rewards/accuracies": 1.0,
|
| 9982 |
+
"rewards/chosen": 1.388300895690918,
|
| 9983 |
+
"rewards/margins": 17.517656326293945,
|
| 9984 |
+
"rewards/rejected": -16.12935447692871,
|
| 9985 |
+
"step": 6650
|
| 9986 |
+
},
|
| 9987 |
+
{
|
| 9988 |
+
"epoch": 1.5868827068564961,
|
| 9989 |
+
"grad_norm": 2.875137852242915e-06,
|
| 9990 |
+
"learning_rate": 2.684554061442669e-05,
|
| 9991 |
+
"logits/chosen": 8.98546314239502,
|
| 9992 |
+
"logits/rejected": 8.42377758026123,
|
| 9993 |
+
"logps/chosen": -4.1340131759643555,
|
| 9994 |
+
"logps/rejected": -183.18992614746094,
|
| 9995 |
+
"loss": 0.0,
|
| 9996 |
+
"rewards/accuracies": 1.0,
|
| 9997 |
+
"rewards/chosen": 1.3774447441101074,
|
| 9998 |
+
"rewards/margins": 17.857067108154297,
|
| 9999 |
+
"rewards/rejected": -16.47962188720703,
|
| 10000 |
+
"step": 6660
|
| 10001 |
+
},
|
| 10002 |
+
{
|
| 10003 |
+
"epoch": 1.5892655030678502,
|
| 10004 |
+
"grad_norm": 8.891835818758409e-07,
|
| 10005 |
+
"learning_rate": 2.6776408627274703e-05,
|
| 10006 |
+
"logits/chosen": 8.80525016784668,
|
| 10007 |
+
"logits/rejected": 8.30695629119873,
|
| 10008 |
+
"logps/chosen": -4.148054599761963,
|
| 10009 |
+
"logps/rejected": -184.2656707763672,
|
| 10010 |
+
"loss": 0.0,
|
| 10011 |
+
"rewards/accuracies": 1.0,
|
| 10012 |
+
"rewards/chosen": 1.4196046590805054,
|
| 10013 |
+
"rewards/margins": 17.907718658447266,
|
| 10014 |
+
"rewards/rejected": -16.48811149597168,
|
| 10015 |
+
"step": 6670
|
| 10016 |
+
},
|
| 10017 |
+
{
|
| 10018 |
+
"epoch": 1.5916482992792043,
|
| 10019 |
+
"grad_norm": 5.344471446733223e-06,
|
| 10020 |
+
"learning_rate": 2.6707262984677573e-05,
|
| 10021 |
+
"logits/chosen": 9.732979774475098,
|
| 10022 |
+
"logits/rejected": 9.241010665893555,
|
| 10023 |
+
"logps/chosen": -3.8970329761505127,
|
| 10024 |
+
"logps/rejected": -179.18814086914062,
|
| 10025 |
+
"loss": 0.0,
|
| 10026 |
+
"rewards/accuracies": 1.0,
|
| 10027 |
+
"rewards/chosen": 1.3821308612823486,
|
| 10028 |
+
"rewards/margins": 17.509174346923828,
|
| 10029 |
+
"rewards/rejected": -16.12704086303711,
|
| 10030 |
+
"step": 6680
|
| 10031 |
+
},
|
| 10032 |
+
{
|
| 10033 |
+
"epoch": 1.5940310954905583,
|
| 10034 |
+
"grad_norm": 1.1200794688193128e-05,
|
| 10035 |
+
"learning_rate": 2.6638104218165332e-05,
|
| 10036 |
+
"logits/chosen": 8.704614639282227,
|
| 10037 |
+
"logits/rejected": 8.213298797607422,
|
| 10038 |
+
"logps/chosen": -4.218519687652588,
|
| 10039 |
+
"logps/rejected": -181.6812744140625,
|
| 10040 |
+
"loss": 0.0,
|
| 10041 |
+
"rewards/accuracies": 1.0,
|
| 10042 |
+
"rewards/chosen": 1.414750576019287,
|
| 10043 |
+
"rewards/margins": 17.60392189025879,
|
| 10044 |
+
"rewards/rejected": -16.189170837402344,
|
| 10045 |
+
"step": 6690
|
| 10046 |
+
},
|
| 10047 |
+
{
|
| 10048 |
+
"epoch": 1.5964138917019122,
|
| 10049 |
+
"grad_norm": 3.7125805647519883e-06,
|
| 10050 |
+
"learning_rate": 2.6568932859368905e-05,
|
| 10051 |
+
"logits/chosen": 9.12821102142334,
|
| 10052 |
+
"logits/rejected": 8.672597885131836,
|
| 10053 |
+
"logps/chosen": -4.364258289337158,
|
| 10054 |
+
"logps/rejected": -182.12783813476562,
|
| 10055 |
+
"loss": 0.0,
|
| 10056 |
+
"rewards/accuracies": 1.0,
|
| 10057 |
+
"rewards/chosen": 1.3750208616256714,
|
| 10058 |
+
"rewards/margins": 17.705333709716797,
|
| 10059 |
+
"rewards/rejected": -16.330312728881836,
|
| 10060 |
+
"step": 6700
|
| 10061 |
}
|
| 10062 |
],
|
| 10063 |
"logging_steps": 10,
|