Llama-3.1-8B-Instruct_holistic_50 / trainer_state.json
Incomple's picture
End of training
f3ae70c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999917239096251,
"eval_steps": 500,
"global_step": 6041,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05015310767193578,
"grad_norm": 4.3741655349731445,
"learning_rate": 5.008264462809917e-07,
"logits/chosen": -0.5391724109649658,
"logits/rejected": -0.4776774048805237,
"logps/chosen": -73.29891204833984,
"logps/rejected": -13.118536949157715,
"loss": 0.684,
"rewards/accuracies": 0.589108943939209,
"rewards/chosen": 0.016623780131340027,
"rewards/margins": 0.01931975968182087,
"rewards/rejected": -0.0026959802489727736,
"step": 303
},
{
"epoch": 0.10030621534387156,
"grad_norm": 3.5748157501220703,
"learning_rate": 9.998160412067696e-07,
"logits/chosen": -0.5328251719474792,
"logits/rejected": -0.4811299741268158,
"logps/chosen": -66.50557708740234,
"logps/rejected": -15.574967384338379,
"loss": 0.4581,
"rewards/accuracies": 0.8675742745399475,
"rewards/chosen": 0.6051633954048157,
"rewards/margins": 0.8234596848487854,
"rewards/rejected": -0.21829627454280853,
"step": 606
},
{
"epoch": 0.15045932301580733,
"grad_norm": 3.891946315765381,
"learning_rate": 9.440765268579838e-07,
"logits/chosen": -0.5248011946678162,
"logits/rejected": -0.4659003019332886,
"logps/chosen": -61.22736358642578,
"logps/rejected": -34.781681060791016,
"loss": 0.0781,
"rewards/accuracies": 0.9843234419822693,
"rewards/chosen": 1.5499842166900635,
"rewards/margins": 3.709665298461914,
"rewards/rejected": -2.1596810817718506,
"step": 909
},
{
"epoch": 0.20061243068774312,
"grad_norm": 0.11657057702541351,
"learning_rate": 8.883370125091979e-07,
"logits/chosen": -0.48526012897491455,
"logits/rejected": -0.4090143144130707,
"logps/chosen": -56.97617721557617,
"logps/rejected": -48.22561264038086,
"loss": 0.0338,
"rewards/accuracies": 0.9867987036705017,
"rewards/chosen": 1.691601276397705,
"rewards/margins": 5.236583709716797,
"rewards/rejected": -3.544982433319092,
"step": 1212
},
{
"epoch": 0.2507655383596789,
"grad_norm": 0.1646522730588913,
"learning_rate": 8.325974981604121e-07,
"logits/chosen": -0.43419143557548523,
"logits/rejected": -0.3306835889816284,
"logps/chosen": -56.835514068603516,
"logps/rejected": -55.82384490966797,
"loss": 0.027,
"rewards/accuracies": 0.9913366436958313,
"rewards/chosen": 1.7533217668533325,
"rewards/margins": 6.046737194061279,
"rewards/rejected": -4.2934160232543945,
"step": 1515
},
{
"epoch": 0.30091864603161467,
"grad_norm": 16.816612243652344,
"learning_rate": 7.768579838116262e-07,
"logits/chosen": -0.4011126756668091,
"logits/rejected": -0.2737236022949219,
"logps/chosen": -55.48396301269531,
"logps/rejected": -65.16657257080078,
"loss": 0.0307,
"rewards/accuracies": 0.9892739653587341,
"rewards/chosen": 1.696967601776123,
"rewards/margins": 6.927124500274658,
"rewards/rejected": -5.230156898498535,
"step": 1818
},
{
"epoch": 0.3510717537035504,
"grad_norm": 2.5598514080047607,
"learning_rate": 7.211184694628402e-07,
"logits/chosen": -0.37529540061950684,
"logits/rejected": -0.23136167228221893,
"logps/chosen": -57.458560943603516,
"logps/rejected": -76.030517578125,
"loss": 0.0153,
"rewards/accuracies": 0.9954620599746704,
"rewards/chosen": 1.6613062620162964,
"rewards/margins": 7.948617935180664,
"rewards/rejected": -6.287312030792236,
"step": 2121
},
{
"epoch": 0.40122486137548624,
"grad_norm": 0.03665272891521454,
"learning_rate": 6.653789551140544e-07,
"logits/chosen": -0.32625895738601685,
"logits/rejected": -0.1696743667125702,
"logps/chosen": -56.47751998901367,
"logps/rejected": -82.79418182373047,
"loss": 0.0185,
"rewards/accuracies": 0.9929868578910828,
"rewards/chosen": 1.56876802444458,
"rewards/margins": 8.616827964782715,
"rewards/rejected": -7.048060417175293,
"step": 2424
},
{
"epoch": 0.451377969047422,
"grad_norm": 0.0068373712711036205,
"learning_rate": 6.096394407652685e-07,
"logits/chosen": -0.33455565571784973,
"logits/rejected": -0.17350350320339203,
"logps/chosen": -57.92192840576172,
"logps/rejected": -88.568603515625,
"loss": 0.0217,
"rewards/accuracies": 0.9913366436958313,
"rewards/chosen": 1.5466176271438599,
"rewards/margins": 9.043907165527344,
"rewards/rejected": -7.497289657592773,
"step": 2727
},
{
"epoch": 0.5015310767193578,
"grad_norm": 1.1985265016555786,
"learning_rate": 5.538999264164827e-07,
"logits/chosen": -0.32679906487464905,
"logits/rejected": -0.16868844628334045,
"logps/chosen": -57.272857666015625,
"logps/rejected": -90.3719711303711,
"loss": 0.0215,
"rewards/accuracies": 0.9929868578910828,
"rewards/chosen": 1.4742236137390137,
"rewards/margins": 9.2846097946167,
"rewards/rejected": -7.8103861808776855,
"step": 3030
},
{
"epoch": 0.5516841843912935,
"grad_norm": 0.004047638736665249,
"learning_rate": 4.981604120676968e-07,
"logits/chosen": -0.3204115033149719,
"logits/rejected": -0.17468391358852386,
"logps/chosen": -58.51227951049805,
"logps/rejected": -94.06062316894531,
"loss": 0.0278,
"rewards/accuracies": 0.9905115962028503,
"rewards/chosen": 1.527059555053711,
"rewards/margins": 9.519412994384766,
"rewards/rejected": -7.992353916168213,
"step": 3333
},
{
"epoch": 0.6018372920632293,
"grad_norm": 0.3182278573513031,
"learning_rate": 4.4242089771891094e-07,
"logits/chosen": -0.3149339258670807,
"logits/rejected": -0.15087205171585083,
"logps/chosen": -57.350006103515625,
"logps/rejected": -94.76825714111328,
"loss": 0.0276,
"rewards/accuracies": 0.9900990128517151,
"rewards/chosen": 1.4863520860671997,
"rewards/margins": 9.664637565612793,
"rewards/rejected": -8.178285598754883,
"step": 3636
},
{
"epoch": 0.6519903997351651,
"grad_norm": 0.016533929854631424,
"learning_rate": 3.866813833701251e-07,
"logits/chosen": -0.3052721619606018,
"logits/rejected": -0.1389181911945343,
"logps/chosen": -58.16946792602539,
"logps/rejected": -95.81718444824219,
"loss": 0.0245,
"rewards/accuracies": 0.9909241199493408,
"rewards/chosen": 1.4919017553329468,
"rewards/margins": 9.79186725616455,
"rewards/rejected": -8.299964904785156,
"step": 3939
},
{
"epoch": 0.7021435074071009,
"grad_norm": 0.010237179696559906,
"learning_rate": 3.3094186902133917e-07,
"logits/chosen": -0.3208546042442322,
"logits/rejected": -0.1598815768957138,
"logps/chosen": -58.12910079956055,
"logps/rejected": -97.86547088623047,
"loss": 0.0245,
"rewards/accuracies": 0.9913366436958313,
"rewards/chosen": 1.4368475675582886,
"rewards/margins": 9.900219917297363,
"rewards/rejected": -8.463372230529785,
"step": 4242
},
{
"epoch": 0.7522966150790367,
"grad_norm": 0.008713229559361935,
"learning_rate": 2.752023546725533e-07,
"logits/chosen": -0.3229035437107086,
"logits/rejected": -0.15609696507453918,
"logps/chosen": -57.847740173339844,
"logps/rejected": -98.02397918701172,
"loss": 0.019,
"rewards/accuracies": 0.9925742745399475,
"rewards/chosen": 1.491112232208252,
"rewards/margins": 10.048946380615234,
"rewards/rejected": -8.557833671569824,
"step": 4545
},
{
"epoch": 0.8024497227509725,
"grad_norm": 0.0016923310467973351,
"learning_rate": 2.1946284032376748e-07,
"logits/chosen": -0.3198649287223816,
"logits/rejected": -0.1555498093366623,
"logps/chosen": -57.79204177856445,
"logps/rejected": -97.5900650024414,
"loss": 0.0152,
"rewards/accuracies": 0.9958746433258057,
"rewards/chosen": 1.5458145141601562,
"rewards/margins": 9.990068435668945,
"rewards/rejected": -8.444254875183105,
"step": 4848
},
{
"epoch": 0.8526028304229082,
"grad_norm": 0.005034138448536396,
"learning_rate": 1.637233259749816e-07,
"logits/chosen": -0.31206631660461426,
"logits/rejected": -0.13810566067695618,
"logps/chosen": -60.38402557373047,
"logps/rejected": -97.9999008178711,
"loss": 0.0182,
"rewards/accuracies": 0.9938119053840637,
"rewards/chosen": 1.5842220783233643,
"rewards/margins": 10.07706356048584,
"rewards/rejected": -8.492840766906738,
"step": 5151
},
{
"epoch": 0.902755938094844,
"grad_norm": 0.01207835040986538,
"learning_rate": 1.0798381162619573e-07,
"logits/chosen": -0.3065117299556732,
"logits/rejected": -0.13630090653896332,
"logps/chosen": -59.60470199584961,
"logps/rejected": -98.72499084472656,
"loss": 0.0196,
"rewards/accuracies": 0.9933993816375732,
"rewards/chosen": 1.5179524421691895,
"rewards/margins": 10.089001655578613,
"rewards/rejected": -8.571049690246582,
"step": 5454
},
{
"epoch": 0.9529090457667798,
"grad_norm": 0.008624515496194363,
"learning_rate": 5.224429727740986e-08,
"logits/chosen": -0.298076331615448,
"logits/rejected": -0.12163959443569183,
"logps/chosen": -61.02180862426758,
"logps/rejected": -98.43746185302734,
"loss": 0.0201,
"rewards/accuracies": 0.9942244291305542,
"rewards/chosen": 1.5929877758026123,
"rewards/margins": 10.118744850158691,
"rewards/rejected": -8.525758743286133,
"step": 5757
},
{
"epoch": 0.999917239096251,
"step": 6041,
"total_flos": 3.6566231979141366e+18,
"train_loss": 0.08035980544132106,
"train_runtime": 90068.102,
"train_samples_per_second": 0.537,
"train_steps_per_second": 0.067
}
],
"logging_steps": 303,
"max_steps": 6041,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.6566231979141366e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}