unfair221's picture
Upload folder using huggingface_hub
9f49b84 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.064,
"grad_norm": 32.64683532714844,
"learning_rate": 5.625e-07,
"logits/chosen": 0.08142563700675964,
"logits/rejected": 0.227357417345047,
"logps/chosen": -1236.4244384765625,
"logps/rejected": -2043.849853515625,
"loss": 0.6733,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.14909373223781586,
"rewards/margins": 0.2537827491760254,
"rewards/rejected": -0.40287646651268005,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 0.24403050541877747,
"learning_rate": 9.98867437523228e-07,
"logits/chosen": 0.10452975332736969,
"logits/rejected": 0.20738160610198975,
"logps/chosen": -1335.4716796875,
"logps/rejected": -2215.779541015625,
"loss": 0.1419,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -5.359838485717773,
"rewards/margins": 7.954916954040527,
"rewards/rejected": -13.3147554397583,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 0.18414834141731262,
"learning_rate": 9.788754083424652e-07,
"logits/chosen": 0.10984750092029572,
"logits/rejected": 0.09670254588127136,
"logps/chosen": -1364.2596435546875,
"logps/rejected": -2435.3935546875,
"loss": 0.124,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -13.650239944458008,
"rewards/margins": 25.381425857543945,
"rewards/rejected": -39.03166580200195,
"step": 30
},
{
"epoch": 0.256,
"grad_norm": 0.12591849267482758,
"learning_rate": 9.348705665778477e-07,
"logits/chosen": 0.18178126215934753,
"logits/rejected": 0.1629951149225235,
"logps/chosen": -1315.8212890625,
"logps/rejected": -2339.572021484375,
"loss": 0.1063,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -9.503636360168457,
"rewards/margins": 26.079574584960938,
"rewards/rejected": -35.583213806152344,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 0.14210306107997894,
"learning_rate": 8.690594987436704e-07,
"logits/chosen": 0.22390194237232208,
"logits/rejected": 0.22648893296718597,
"logps/chosen": -1331.87841796875,
"logps/rejected": -2378.160400390625,
"loss": 0.1001,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -3.748126745223999,
"rewards/margins": 25.280818939208984,
"rewards/rejected": -29.02894401550293,
"step": 50
},
{
"epoch": 0.384,
"grad_norm": 0.15365009009838104,
"learning_rate": 7.84742246584226e-07,
"logits/chosen": 0.23472987115383148,
"logits/rejected": 0.24391409754753113,
"logps/chosen": -1244.014892578125,
"logps/rejected": -2237.36669921875,
"loss": 0.0876,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 1.5522196292877197,
"rewards/margins": 24.502920150756836,
"rewards/rejected": -22.950700759887695,
"step": 60
},
{
"epoch": 0.448,
"grad_norm": 1.6987534761428833,
"learning_rate": 6.861468292009726e-07,
"logits/chosen": 0.2525210976600647,
"logits/rejected": 0.2721753716468811,
"logps/chosen": -1115.734130859375,
"logps/rejected": -2156.46875,
"loss": 0.0863,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.926352024078369,
"rewards/margins": 22.947837829589844,
"rewards/rejected": -17.021486282348633,
"step": 70
},
{
"epoch": 0.512,
"grad_norm": 0.10186341404914856,
"learning_rate": 5.782172325201155e-07,
"logits/chosen": 0.267190545797348,
"logits/rejected": 0.2903681993484497,
"logps/chosen": -1151.381103515625,
"logps/rejected": -2185.71044921875,
"loss": 0.0881,
"rewards/accuracies": 0.984375,
"rewards/chosen": 7.338265419006348,
"rewards/margins": 22.687971115112305,
"rewards/rejected": -15.349705696105957,
"step": 80
},
{
"epoch": 0.576,
"grad_norm": 0.10365493595600128,
"learning_rate": 4.6636549719398016e-07,
"logits/chosen": 0.24797296524047852,
"logits/rejected": 0.26663631200790405,
"logps/chosen": -1155.973876953125,
"logps/rejected": -2196.724609375,
"loss": 0.091,
"rewards/accuracies": 0.984375,
"rewards/chosen": 7.644052505493164,
"rewards/margins": 21.082582473754883,
"rewards/rejected": -13.438529968261719,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 0.08818171918392181,
"learning_rate": 3.562003362839914e-07,
"logits/chosen": 0.26866415143013,
"logits/rejected": 0.3038444519042969,
"logps/chosen": -1111.370849609375,
"logps/rejected": -2152.794677734375,
"loss": 0.0769,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 9.541829109191895,
"rewards/margins": 20.739093780517578,
"rewards/rejected": -11.197265625,
"step": 100
},
{
"epoch": 0.704,
"grad_norm": 0.09460079669952393,
"learning_rate": 2.5324589096782656e-07,
"logits/chosen": 0.261665940284729,
"logits/rejected": 0.3049652576446533,
"logps/chosen": -1119.0865478515625,
"logps/rejected": -2136.193603515625,
"loss": 0.1036,
"rewards/accuracies": 0.984375,
"rewards/chosen": 10.547566413879395,
"rewards/margins": 18.816768646240234,
"rewards/rejected": -8.269205093383789,
"step": 110
},
{
"epoch": 0.768,
"grad_norm": 33.47285461425781,
"learning_rate": 1.6266472703396284e-07,
"logits/chosen": 0.2868819236755371,
"logits/rejected": 0.3263227939605713,
"logps/chosen": -1207.3726806640625,
"logps/rejected": -2139.579833984375,
"loss": 0.1038,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 10.92021656036377,
"rewards/margins": 18.513587951660156,
"rewards/rejected": -7.5933709144592285,
"step": 120
},
{
"epoch": 0.832,
"grad_norm": 0.14188329875469208,
"learning_rate": 8.899896227604508e-08,
"logits/chosen": 0.28316810727119446,
"logits/rejected": 0.3238982856273651,
"logps/chosen": -1150.779296875,
"logps/rejected": -2133.03759765625,
"loss": 0.1056,
"rewards/accuracies": 0.9874999523162842,
"rewards/chosen": 11.900278091430664,
"rewards/margins": 18.61746597290039,
"rewards/rejected": -6.717187404632568,
"step": 130
},
{
"epoch": 0.896,
"grad_norm": 0.09306726604700089,
"learning_rate": 3.594250574048058e-08,
"logits/chosen": 0.26064103841781616,
"logits/rejected": 0.29156985878944397,
"logps/chosen": -1061.2724609375,
"logps/rejected": -2110.465087890625,
"loss": 0.0853,
"rewards/accuracies": 0.9874999523162842,
"rewards/chosen": 12.033426284790039,
"rewards/margins": 19.080835342407227,
"rewards/rejected": -7.047410488128662,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 0.09628653526306152,
"learning_rate": 6.15582970243117e-09,
"logits/chosen": 0.2842194139957428,
"logits/rejected": 0.30415046215057373,
"logps/chosen": -1132.39501953125,
"logps/rejected": -2141.17333984375,
"loss": 0.0888,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": 11.403092384338379,
"rewards/margins": 18.90988540649414,
"rewards/rejected": -7.506793975830078,
"step": 150
},
{
"epoch": 0.9984,
"step": 156,
"total_flos": 1.1538976647738819e+18,
"train_loss": 0.13609233192908457,
"train_runtime": 8850.4888,
"train_samples_per_second": 0.565,
"train_steps_per_second": 0.018
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1538976647738819e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}