Llama-3.1-8B-Instruct_resist_50 / trainer_state.json
Incomple's picture
End of training
78dc0a7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999219055056618,
"eval_steps": 500,
"global_step": 3201,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05029285435376806,
"grad_norm": 3.709867000579834,
"learning_rate": 5.015576323987538e-07,
"logits/chosen": -0.5341636538505554,
"logits/rejected": -0.4424578845500946,
"logps/chosen": -72.9678726196289,
"logps/rejected": -9.562939643859863,
"loss": 0.6897,
"rewards/accuracies": 0.5854037404060364,
"rewards/chosen": 0.006333178840577602,
"rewards/margins": 0.007243483327329159,
"rewards/rejected": -0.0009103047195822,
"step": 161
},
{
"epoch": 0.10058570870753612,
"grad_norm": 4.478637218475342,
"learning_rate": 9.996527777777777e-07,
"logits/chosen": -0.5466479063034058,
"logits/rejected": -0.4609982669353485,
"logps/chosen": -67.95721435546875,
"logps/rejected": -10.801457405090332,
"loss": 0.5858,
"rewards/accuracies": 0.9448757767677307,
"rewards/chosen": 0.21187880635261536,
"rewards/margins": 0.2563822567462921,
"rewards/rejected": -0.04450342431664467,
"step": 322
},
{
"epoch": 0.15087856306130418,
"grad_norm": 3.006552219390869,
"learning_rate": 9.4375e-07,
"logits/chosen": -0.5257240533828735,
"logits/rejected": -0.4737217128276825,
"logps/chosen": -55.05992126464844,
"logps/rejected": -17.820192337036133,
"loss": 0.2339,
"rewards/accuracies": 0.9798136949539185,
"rewards/chosen": 1.0976394414901733,
"rewards/margins": 1.828528642654419,
"rewards/rejected": -0.7308891415596008,
"step": 483
},
{
"epoch": 0.20117141741507225,
"grad_norm": 3.329214572906494,
"learning_rate": 8.878472222222221e-07,
"logits/chosen": -0.5365005135536194,
"logits/rejected": -0.4758988916873932,
"logps/chosen": -56.26280975341797,
"logps/rejected": -29.085416793823242,
"loss": 0.0705,
"rewards/accuracies": 0.9852484464645386,
"rewards/chosen": 1.5725164413452148,
"rewards/margins": 3.528273344039917,
"rewards/rejected": -1.9557571411132812,
"step": 644
},
{
"epoch": 0.2514642717688403,
"grad_norm": 0.14665871858596802,
"learning_rate": 8.319444444444444e-07,
"logits/chosen": -0.48797789216041565,
"logits/rejected": -0.402078241109848,
"logps/chosen": -51.26579666137695,
"logps/rejected": -40.8376350402832,
"loss": 0.0447,
"rewards/accuracies": 0.9790372848510742,
"rewards/chosen": 1.6765810251235962,
"rewards/margins": 4.7392683029174805,
"rewards/rejected": -3.062687397003174,
"step": 805
},
{
"epoch": 0.30175712612260835,
"grad_norm": 0.1438707709312439,
"learning_rate": 7.760416666666666e-07,
"logits/chosen": -0.48938173055648804,
"logits/rejected": -0.3820492923259735,
"logps/chosen": -50.945064544677734,
"logps/rejected": -47.827144622802734,
"loss": 0.0257,
"rewards/accuracies": 0.989130437374115,
"rewards/chosen": 1.6951720714569092,
"rewards/margins": 5.385878562927246,
"rewards/rejected": -3.6907060146331787,
"step": 966
},
{
"epoch": 0.3520499804763764,
"grad_norm": 2.6210033893585205,
"learning_rate": 7.201388888888889e-07,
"logits/chosen": -0.47775155305862427,
"logits/rejected": -0.3484514653682709,
"logps/chosen": -51.115570068359375,
"logps/rejected": -48.93259811401367,
"loss": 0.0334,
"rewards/accuracies": 0.9852484464645386,
"rewards/chosen": 1.7170895338058472,
"rewards/margins": 5.645313262939453,
"rewards/rejected": -3.928223133087158,
"step": 1127
},
{
"epoch": 0.4023428348301445,
"grad_norm": 4.137578964233398,
"learning_rate": 6.642361111111111e-07,
"logits/chosen": -0.418182373046875,
"logits/rejected": -0.27966105937957764,
"logps/chosen": -50.84815979003906,
"logps/rejected": -52.41380310058594,
"loss": 0.031,
"rewards/accuracies": 0.9836956858634949,
"rewards/chosen": 1.7334542274475098,
"rewards/margins": 5.92018985748291,
"rewards/rejected": -4.1867356300354,
"step": 1288
},
{
"epoch": 0.45263568918391256,
"grad_norm": 0.07434514909982681,
"learning_rate": 6.083333333333333e-07,
"logits/chosen": -0.4359574019908905,
"logits/rejected": -0.27854442596435547,
"logps/chosen": -52.3484992980957,
"logps/rejected": -55.29338073730469,
"loss": 0.0264,
"rewards/accuracies": 0.9883540272712708,
"rewards/chosen": 1.8382827043533325,
"rewards/margins": 6.365357398986816,
"rewards/rejected": -4.527073860168457,
"step": 1449
},
{
"epoch": 0.5029285435376806,
"grad_norm": 0.17919230461120605,
"learning_rate": 5.524305555555555e-07,
"logits/chosen": -0.4308469891548157,
"logits/rejected": -0.27532947063446045,
"logps/chosen": -52.922943115234375,
"logps/rejected": -57.74534225463867,
"loss": 0.0238,
"rewards/accuracies": 0.9860248565673828,
"rewards/chosen": 1.7562286853790283,
"rewards/margins": 6.457979679107666,
"rewards/rejected": -4.701751708984375,
"step": 1610
},
{
"epoch": 0.5532213978914486,
"grad_norm": 0.026971790939569473,
"learning_rate": 4.965277777777777e-07,
"logits/chosen": -0.3874114155769348,
"logits/rejected": -0.2123931497335434,
"logps/chosen": -54.096187591552734,
"logps/rejected": -60.510658264160156,
"loss": 0.0233,
"rewards/accuracies": 0.9860248565673828,
"rewards/chosen": 1.8518177270889282,
"rewards/margins": 6.871143341064453,
"rewards/rejected": -5.019325256347656,
"step": 1771
},
{
"epoch": 0.6035142522452167,
"grad_norm": 0.09704186022281647,
"learning_rate": 4.4062499999999996e-07,
"logits/chosen": -0.37683001160621643,
"logits/rejected": -0.19427433609962463,
"logps/chosen": -54.42588424682617,
"logps/rejected": -61.70278549194336,
"loss": 0.0221,
"rewards/accuracies": 0.986801266670227,
"rewards/chosen": 1.8429250717163086,
"rewards/margins": 7.024946212768555,
"rewards/rejected": -5.182021141052246,
"step": 1932
},
{
"epoch": 0.6538071065989848,
"grad_norm": 0.023908786475658417,
"learning_rate": 3.8472222222222225e-07,
"logits/chosen": -0.4016348421573639,
"logits/rejected": -0.23176224529743195,
"logps/chosen": -51.2470588684082,
"logps/rejected": -64.10649871826172,
"loss": 0.0229,
"rewards/accuracies": 0.9883540272712708,
"rewards/chosen": 1.6802482604980469,
"rewards/margins": 7.057994365692139,
"rewards/rejected": -5.377746105194092,
"step": 2093
},
{
"epoch": 0.7040999609527528,
"grad_norm": 33.38786697387695,
"learning_rate": 3.2881944444444443e-07,
"logits/chosen": -0.38065940141677856,
"logits/rejected": -0.19005167484283447,
"logps/chosen": -52.53501892089844,
"logps/rejected": -65.53170013427734,
"loss": 0.0286,
"rewards/accuracies": 0.986801266670227,
"rewards/chosen": 1.750819444656372,
"rewards/margins": 7.3389058113098145,
"rewards/rejected": -5.5880866050720215,
"step": 2254
},
{
"epoch": 0.7543928153065209,
"grad_norm": 33.96625518798828,
"learning_rate": 2.729166666666666e-07,
"logits/chosen": -0.36966800689697266,
"logits/rejected": -0.18849784135818481,
"logps/chosen": -51.86404800415039,
"logps/rejected": -67.312255859375,
"loss": 0.0249,
"rewards/accuracies": 0.9883540272712708,
"rewards/chosen": 1.648945927619934,
"rewards/margins": 7.407442092895508,
"rewards/rejected": -5.758496284484863,
"step": 2415
},
{
"epoch": 0.804685669660289,
"grad_norm": 0.029783952981233597,
"learning_rate": 2.1701388888888887e-07,
"logits/chosen": -0.3718484044075012,
"logits/rejected": -0.18486632406711578,
"logps/chosen": -54.5934944152832,
"logps/rejected": -68.00869750976562,
"loss": 0.0317,
"rewards/accuracies": 0.9852484464645386,
"rewards/chosen": 1.6592566967010498,
"rewards/margins": 7.394908905029297,
"rewards/rejected": -5.735651969909668,
"step": 2576
},
{
"epoch": 0.854978524014057,
"grad_norm": 0.015878599137067795,
"learning_rate": 1.611111111111111e-07,
"logits/chosen": -0.37658101320266724,
"logits/rejected": -0.2032197117805481,
"logps/chosen": -51.02374267578125,
"logps/rejected": -68.18423461914062,
"loss": 0.0295,
"rewards/accuracies": 0.9852484464645386,
"rewards/chosen": 1.6066235303878784,
"rewards/margins": 7.458284854888916,
"rewards/rejected": -5.851661205291748,
"step": 2737
},
{
"epoch": 0.9052713783678251,
"grad_norm": 0.03441372141242027,
"learning_rate": 1.0520833333333333e-07,
"logits/chosen": -0.3815793991088867,
"logits/rejected": -0.1944185197353363,
"logps/chosen": -51.63225555419922,
"logps/rejected": -68.67254638671875,
"loss": 0.0241,
"rewards/accuracies": 0.989130437374115,
"rewards/chosen": 1.6394439935684204,
"rewards/margins": 7.522336959838867,
"rewards/rejected": -5.882892608642578,
"step": 2898
},
{
"epoch": 0.9555642327215931,
"grad_norm": 0.028507934883236885,
"learning_rate": 4.9305555555555555e-08,
"logits/chosen": -0.35245007276535034,
"logits/rejected": -0.17326129972934723,
"logps/chosen": -54.05128479003906,
"logps/rejected": -69.85031127929688,
"loss": 0.0227,
"rewards/accuracies": 0.9906832575798035,
"rewards/chosen": 1.6639856100082397,
"rewards/margins": 7.611756801605225,
"rewards/rejected": -5.947770595550537,
"step": 3059
},
{
"epoch": 0.9999219055056618,
"step": 3201,
"total_flos": 1.8190117587218596e+18,
"train_loss": 0.10125412571545654,
"train_runtime": 44496.0824,
"train_samples_per_second": 0.576,
"train_steps_per_second": 0.072
}
],
"logging_steps": 161,
"max_steps": 3201,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8190117587218596e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}