Llama-3.1-8B-Instruct_holistic / trainer_state.json
Incomple's picture
End of training
2d2d0d2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997303855486654,
"eval_steps": 500,
"global_step": 1854,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.050148287948234026,
"grad_norm": 3.929126501083374,
"learning_rate": 5e-07,
"logits/chosen": -0.5141991972923279,
"logits/rejected": -0.47028154134750366,
"logps/chosen": -69.07454681396484,
"logps/rejected": -14.869856834411621,
"loss": 0.6929,
"rewards/accuracies": 0.48521506786346436,
"rewards/chosen": -1.161982163466746e-05,
"rewards/margins": 0.0009046773775480688,
"rewards/rejected": -0.0009162970818579197,
"step": 93
},
{
"epoch": 0.10029657589646805,
"grad_norm": 7.2451324462890625,
"learning_rate": 1e-06,
"logits/chosen": -0.5093461871147156,
"logits/rejected": -0.45741066336631775,
"logps/chosen": -76.25039672851562,
"logps/rejected": -15.682782173156738,
"loss": 0.6682,
"rewards/accuracies": 0.7553763389587402,
"rewards/chosen": 0.043871019035577774,
"rewards/margins": 0.052321143448352814,
"rewards/rejected": -0.00845012441277504,
"step": 186
},
{
"epoch": 0.15044486384470207,
"grad_norm": 3.729768991470337,
"learning_rate": 9.442446043165467e-07,
"logits/chosen": -0.49496081471443176,
"logits/rejected": -0.4462580382823944,
"logps/chosen": -73.93294525146484,
"logps/rejected": -17.590185165405273,
"loss": 0.5259,
"rewards/accuracies": 0.8602150678634644,
"rewards/chosen": 0.3595849275588989,
"rewards/margins": 0.43593457341194153,
"rewards/rejected": -0.07634969055652618,
"step": 279
},
{
"epoch": 0.2005931517929361,
"grad_norm": 2.1467108726501465,
"learning_rate": 8.884892086330935e-07,
"logits/chosen": -0.5146396160125732,
"logits/rejected": -0.4725695848464966,
"logps/chosen": -67.33499145507812,
"logps/rejected": -21.152362823486328,
"loss": 0.3566,
"rewards/accuracies": 0.9408602118492126,
"rewards/chosen": 0.8917596340179443,
"rewards/margins": 1.2377275228500366,
"rewards/rejected": -0.3459678590297699,
"step": 372
},
{
"epoch": 0.25074143974117014,
"grad_norm": 2.8724427223205566,
"learning_rate": 8.327338129496403e-07,
"logits/chosen": -0.5053039193153381,
"logits/rejected": -0.46124857664108276,
"logps/chosen": -60.683773040771484,
"logps/rejected": -23.27366828918457,
"loss": 0.2328,
"rewards/accuracies": 0.9731183052062988,
"rewards/chosen": 1.1787759065628052,
"rewards/margins": 1.961389183998108,
"rewards/rejected": -0.7826132774353027,
"step": 465
},
{
"epoch": 0.30088972768940414,
"grad_norm": 4.906219959259033,
"learning_rate": 7.769784172661871e-07,
"logits/chosen": -0.5105525851249695,
"logits/rejected": -0.46868896484375,
"logps/chosen": -57.557701110839844,
"logps/rejected": -29.869394302368164,
"loss": 0.1128,
"rewards/accuracies": 0.9798387289047241,
"rewards/chosen": 1.250539779663086,
"rewards/margins": 2.861812114715576,
"rewards/rejected": -1.6112723350524902,
"step": 558
},
{
"epoch": 0.3510380156376382,
"grad_norm": 0.3975902497768402,
"learning_rate": 7.212230215827337e-07,
"logits/chosen": -0.51771479845047,
"logits/rejected": -0.46667957305908203,
"logps/chosen": -61.55564880371094,
"logps/rejected": -39.249454498291016,
"loss": 0.0582,
"rewards/accuracies": 0.9865591526031494,
"rewards/chosen": 1.4144244194030762,
"rewards/margins": 3.826444149017334,
"rewards/rejected": -2.412019729614258,
"step": 651
},
{
"epoch": 0.4011863035858722,
"grad_norm": 2.5660245418548584,
"learning_rate": 6.654676258992805e-07,
"logits/chosen": -0.46265119314193726,
"logits/rejected": -0.426276832818985,
"logps/chosen": -56.20552062988281,
"logps/rejected": -43.43349838256836,
"loss": 0.0577,
"rewards/accuracies": 0.9811828136444092,
"rewards/chosen": 1.415700912475586,
"rewards/margins": 4.192880153656006,
"rewards/rejected": -2.77717924118042,
"step": 744
},
{
"epoch": 0.4513345915341062,
"grad_norm": 0.4916980564594269,
"learning_rate": 6.097122302158273e-07,
"logits/chosen": -0.5033692121505737,
"logits/rejected": -0.4418078064918518,
"logps/chosen": -59.61499786376953,
"logps/rejected": -45.10437774658203,
"loss": 0.0413,
"rewards/accuracies": 0.9852150678634644,
"rewards/chosen": 1.577943205833435,
"rewards/margins": 4.589221477508545,
"rewards/rejected": -3.0112783908843994,
"step": 837
},
{
"epoch": 0.5014828794823403,
"grad_norm": 0.3903834819793701,
"learning_rate": 5.539568345323741e-07,
"logits/chosen": -0.44615066051483154,
"logits/rejected": -0.3858674168586731,
"logps/chosen": -59.962955474853516,
"logps/rejected": -47.591575622558594,
"loss": 0.0306,
"rewards/accuracies": 0.9879032373428345,
"rewards/chosen": 1.6375161409378052,
"rewards/margins": 4.831850528717041,
"rewards/rejected": -3.1943342685699463,
"step": 930
},
{
"epoch": 0.5516311674305743,
"grad_norm": 0.27728018164634705,
"learning_rate": 4.982014388489209e-07,
"logits/chosen": -0.42113569378852844,
"logits/rejected": -0.37468260526657104,
"logps/chosen": -58.571006774902344,
"logps/rejected": -50.136695861816406,
"loss": 0.0334,
"rewards/accuracies": 0.9865591526031494,
"rewards/chosen": 1.5126001834869385,
"rewards/margins": 4.904977798461914,
"rewards/rejected": -3.3923778533935547,
"step": 1023
},
{
"epoch": 0.6017794553788083,
"grad_norm": 0.25181448459625244,
"learning_rate": 4.424460431654676e-07,
"logits/chosen": -0.41716840863227844,
"logits/rejected": -0.35924020409584045,
"logps/chosen": -56.86894226074219,
"logps/rejected": -51.47317123413086,
"loss": 0.0338,
"rewards/accuracies": 0.9838709831237793,
"rewards/chosen": 1.5967961549758911,
"rewards/margins": 5.1519880294799805,
"rewards/rejected": -3.5551917552948,
"step": 1116
},
{
"epoch": 0.6519277433270423,
"grad_norm": 20.254880905151367,
"learning_rate": 3.8669064748201436e-07,
"logits/chosen": -0.4112766981124878,
"logits/rejected": -0.33467066287994385,
"logps/chosen": -59.09377670288086,
"logps/rejected": -51.40492630004883,
"loss": 0.0345,
"rewards/accuracies": 0.9879032373428345,
"rewards/chosen": 1.6269216537475586,
"rewards/margins": 5.300109386444092,
"rewards/rejected": -3.6731879711151123,
"step": 1209
},
{
"epoch": 0.7020760312752764,
"grad_norm": 0.0816323384642601,
"learning_rate": 3.309352517985611e-07,
"logits/chosen": -0.4078998267650604,
"logits/rejected": -0.3409072160720825,
"logps/chosen": -54.6790771484375,
"logps/rejected": -53.65495300292969,
"loss": 0.0335,
"rewards/accuracies": 0.9865591526031494,
"rewards/chosen": 1.5546735525131226,
"rewards/margins": 5.438999652862549,
"rewards/rejected": -3.8843259811401367,
"step": 1302
},
{
"epoch": 0.7522243192235104,
"grad_norm": 10.449593544006348,
"learning_rate": 2.751798561151079e-07,
"logits/chosen": -0.41149967908859253,
"logits/rejected": -0.3351740837097168,
"logps/chosen": -58.309329986572266,
"logps/rejected": -52.98766326904297,
"loss": 0.0298,
"rewards/accuracies": 0.9879032373428345,
"rewards/chosen": 1.7052394151687622,
"rewards/margins": 5.557784557342529,
"rewards/rejected": -3.8525450229644775,
"step": 1395
},
{
"epoch": 0.8023726071717444,
"grad_norm": 1.828351378440857,
"learning_rate": 2.1942446043165465e-07,
"logits/chosen": -0.41116863489151,
"logits/rejected": -0.3420298993587494,
"logps/chosen": -57.310306549072266,
"logps/rejected": -56.159385681152344,
"loss": 0.0199,
"rewards/accuracies": 0.9932795763015747,
"rewards/chosen": 1.6173115968704224,
"rewards/margins": 5.636691093444824,
"rewards/rejected": -4.019379615783691,
"step": 1488
},
{
"epoch": 0.8525208951199784,
"grad_norm": 0.3853701651096344,
"learning_rate": 1.6366906474820144e-07,
"logits/chosen": -0.386165589094162,
"logits/rejected": -0.314485102891922,
"logps/chosen": -59.73184585571289,
"logps/rejected": -56.95379638671875,
"loss": 0.0239,
"rewards/accuracies": 0.9905914068222046,
"rewards/chosen": 1.7348586320877075,
"rewards/margins": 5.7297468185424805,
"rewards/rejected": -3.9948880672454834,
"step": 1581
},
{
"epoch": 0.9026691830682124,
"grad_norm": 0.15693414211273193,
"learning_rate": 1.0791366906474819e-07,
"logits/chosen": -0.4054364860057831,
"logits/rejected": -0.33498746156692505,
"logps/chosen": -59.102989196777344,
"logps/rejected": -57.07588195800781,
"loss": 0.0154,
"rewards/accuracies": 0.9946236610412598,
"rewards/chosen": 1.6817787885665894,
"rewards/margins": 5.79107141494751,
"rewards/rejected": -4.109292984008789,
"step": 1674
},
{
"epoch": 0.9528174710164464,
"grad_norm": 0.12521210312843323,
"learning_rate": 5.2158273381294966e-08,
"logits/chosen": -0.40175333619117737,
"logits/rejected": -0.32109692692756653,
"logps/chosen": -56.866294860839844,
"logps/rejected": -57.35883331298828,
"loss": 0.0239,
"rewards/accuracies": 0.9905914068222046,
"rewards/chosen": 1.6757546663284302,
"rewards/margins": 5.896215438842773,
"rewards/rejected": -4.220460414886475,
"step": 1767
},
{
"epoch": 0.9997303855486654,
"step": 1854,
"total_flos": 1.1904720283191214e+18,
"train_loss": 0.15248539275464632,
"train_runtime": 30932.1025,
"train_samples_per_second": 0.48,
"train_steps_per_second": 0.06
}
],
"logging_steps": 93,
"max_steps": 1854,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1904720283191214e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}