llama_binary_4096_5000_dpo_sft01 / trainer_state.json
jinqij's picture
Upload folder using huggingface_hub
b2c790e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.064,
"grad_norm": 51.604713439941406,
"learning_rate": 6.249999999999999e-07,
"logits/chosen": -1.7511365413665771,
"logits/rejected": -0.9332168102264404,
"logps/chosen": -326.5755615234375,
"logps/rejected": -795.309326171875,
"loss": 0.6932,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": 0.008899950422346592,
"rewards/margins": 0.04068336635828018,
"rewards/rejected": -0.031783413141965866,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 11.927907943725586,
"learning_rate": 9.979871469976195e-07,
"logits/chosen": -1.7664214372634888,
"logits/rejected": -0.9802268743515015,
"logps/chosen": -345.8614501953125,
"logps/rejected": -836.3780517578125,
"loss": 0.3867,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.04140026494860649,
"rewards/margins": 1.189576506614685,
"rewards/rejected": -1.1481761932373047,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 0.19122953712940216,
"learning_rate": 9.755282581475767e-07,
"logits/chosen": -1.9365524053573608,
"logits/rejected": -1.328689694404602,
"logps/chosen": -346.8349914550781,
"logps/rejected": -943.77587890625,
"loss": 0.1303,
"rewards/accuracies": 0.984375,
"rewards/chosen": -3.2460498809814453,
"rewards/margins": 11.235220909118652,
"rewards/rejected": -14.481270790100098,
"step": 30
},
{
"epoch": 0.256,
"grad_norm": 0.22885994613170624,
"learning_rate": 9.29224396800933e-07,
"logits/chosen": -2.040452241897583,
"logits/rejected": -1.6386874914169312,
"logps/chosen": -419.212646484375,
"logps/rejected": -1117.9078369140625,
"loss": 0.0405,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -10.218693733215332,
"rewards/margins": 25.372825622558594,
"rewards/rejected": -35.591522216796875,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 0.15373322367668152,
"learning_rate": 8.613974319136957e-07,
"logits/chosen": -1.946674108505249,
"logits/rejected": -1.572014570236206,
"logps/chosen": -434.0313720703125,
"logps/rejected": -1180.6865234375,
"loss": 0.0349,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -9.76918888092041,
"rewards/margins": 27.693220138549805,
"rewards/rejected": -37.46240997314453,
"step": 50
},
{
"epoch": 0.384,
"grad_norm": 0.13283002376556396,
"learning_rate": 7.754484907260512e-07,
"logits/chosen": -1.8221015930175781,
"logits/rejected": -1.4241716861724854,
"logps/chosen": -397.850341796875,
"logps/rejected": -1054.3414306640625,
"loss": 0.032,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -6.713717937469482,
"rewards/margins": 22.15141487121582,
"rewards/rejected": -28.865131378173828,
"step": 60
},
{
"epoch": 0.448,
"grad_norm": 0.12337207049131393,
"learning_rate": 6.756874120406714e-07,
"logits/chosen": -1.7552038431167603,
"logits/rejected": -1.296088457107544,
"logps/chosen": -325.4990539550781,
"logps/rejected": -998.6246337890625,
"loss": 0.0336,
"rewards/accuracies": 0.984375,
"rewards/chosen": -3.70082950592041,
"rewards/margins": 19.856613159179688,
"rewards/rejected": -23.557445526123047,
"step": 70
},
{
"epoch": 0.512,
"grad_norm": 0.0956556648015976,
"learning_rate": 5.671166329088277e-07,
"logits/chosen": -1.6788297891616821,
"logits/rejected": -1.2116974592208862,
"logps/chosen": -347.7698974609375,
"logps/rejected": -1006.9019775390625,
"loss": 0.0304,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -3.5100672245025635,
"rewards/margins": 18.646265029907227,
"rewards/rejected": -22.15633201599121,
"step": 80
},
{
"epoch": 0.576,
"grad_norm": 0.09139358252286911,
"learning_rate": 4.5518034554828327e-07,
"logits/chosen": -1.6748931407928467,
"logits/rejected": -1.1833109855651855,
"logps/chosen": -355.009033203125,
"logps/rejected": -1040.528076171875,
"loss": 0.036,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -3.3938891887664795,
"rewards/margins": 18.697372436523438,
"rewards/rejected": -22.09126091003418,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 0.10561755299568176,
"learning_rate": 3.454915028125263e-07,
"logits/chosen": -1.6425403356552124,
"logits/rejected": -1.1396013498306274,
"logps/chosen": -336.59295654296875,
"logps/rejected": -1013.5849609375,
"loss": 0.026,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -3.053579092025757,
"rewards/margins": 18.989028930664062,
"rewards/rejected": -22.0426082611084,
"step": 100
},
{
"epoch": 0.704,
"grad_norm": 0.44353532791137695,
"learning_rate": 2.4355036129704696e-07,
"logits/chosen": -1.6363352537155151,
"logits/rejected": -1.1411080360412598,
"logps/chosen": -348.1735534667969,
"logps/rejected": -1026.014404296875,
"loss": 0.0354,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -3.1121909618377686,
"rewards/margins": 18.843551635742188,
"rewards/rejected": -21.95574378967285,
"step": 110
},
{
"epoch": 0.768,
"grad_norm": 0.30070415139198303,
"learning_rate": 1.5446867550656767e-07,
"logits/chosen": -1.5898562669754028,
"logits/rejected": -1.1400786638259888,
"logps/chosen": -379.26361083984375,
"logps/rejected": -996.3677978515625,
"loss": 0.0419,
"rewards/accuracies": 0.984375,
"rewards/chosen": -3.5438451766967773,
"rewards/margins": 17.603504180908203,
"rewards/rejected": -21.147350311279297,
"step": 120
},
{
"epoch": 0.832,
"grad_norm": 0.4903264343738556,
"learning_rate": 8.271337313934867e-08,
"logits/chosen": -1.6253430843353271,
"logits/rejected": -1.1439229249954224,
"logps/chosen": -365.46771240234375,
"logps/rejected": -1015.4036254882812,
"loss": 0.031,
"rewards/accuracies": 0.9906249642372131,
"rewards/chosen": -3.238945245742798,
"rewards/margins": 18.085920333862305,
"rewards/rejected": -21.324867248535156,
"step": 130
},
{
"epoch": 0.896,
"grad_norm": 0.07947251945734024,
"learning_rate": 3.188256468013139e-08,
"logits/chosen": -1.650820255279541,
"logits/rejected": -1.1272088289260864,
"logps/chosen": -320.8426208496094,
"logps/rejected": -1027.0377197265625,
"loss": 0.0271,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -2.4786899089813232,
"rewards/margins": 19.36054801940918,
"rewards/rejected": -21.839237213134766,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 0.07403174042701721,
"learning_rate": 4.5251191160326495e-09,
"logits/chosen": -1.6176533699035645,
"logits/rejected": -1.134464979171753,
"logps/chosen": -357.2913513183594,
"logps/rejected": -1029.54443359375,
"loss": 0.0324,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -3.00289249420166,
"rewards/margins": 18.549577713012695,
"rewards/rejected": -21.552471160888672,
"step": 150
},
{
"epoch": 0.9984,
"step": 156,
"total_flos": 1.1530176818095063e+18,
"train_loss": 0.1052116885399207,
"train_runtime": 6627.1574,
"train_samples_per_second": 0.754,
"train_steps_per_second": 0.024
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1530176818095063e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}