Llama-3.1-8B-Instruct_holistic_10 / trainer_state.json
Incomple's picture
End of training
b8db756 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998045729919874,
"eval_steps": 500,
"global_step": 1279,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.050029314051201874,
"grad_norm": 6.638810634613037,
"learning_rate": 5e-07,
"logits/chosen": -0.5138859748840332,
"logits/rejected": -0.4518983066082001,
"logps/chosen": -77.28889465332031,
"logps/rejected": -14.844705581665039,
"loss": 0.6924,
"rewards/accuracies": 0.50390625,
"rewards/chosen": 0.0017984423320740461,
"rewards/margins": 0.0019690156914293766,
"rewards/rejected": -0.00017057315562851727,
"step": 64
},
{
"epoch": 0.10005862810240375,
"grad_norm": 3.6703028678894043,
"learning_rate": 1e-06,
"logits/chosen": -0.5301803350448608,
"logits/rejected": -0.46129050850868225,
"logps/chosen": -77.88700866699219,
"logps/rejected": -13.736372947692871,
"loss": 0.6787,
"rewards/accuracies": 0.693359375,
"rewards/chosen": 0.026838650926947594,
"rewards/margins": 0.02976146526634693,
"rewards/rejected": -0.002922814106568694,
"step": 128
},
{
"epoch": 0.15008794215360563,
"grad_norm": 4.980158805847168,
"learning_rate": 9.44396177237185e-07,
"logits/chosen": -0.5469677448272705,
"logits/rejected": -0.48332178592681885,
"logps/chosen": -77.87326049804688,
"logps/rejected": -14.177068710327148,
"loss": 0.6161,
"rewards/accuracies": 0.82421875,
"rewards/chosen": 0.15434227883815765,
"rewards/margins": 0.17204590141773224,
"rewards/rejected": -0.017703618854284286,
"step": 192
},
{
"epoch": 0.2001172562048075,
"grad_norm": 2.8744795322418213,
"learning_rate": 8.887923544743701e-07,
"logits/chosen": -0.5636645555496216,
"logits/rejected": -0.5051460266113281,
"logps/chosen": -63.355735778808594,
"logps/rejected": -12.730010032653809,
"loss": 0.5124,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.3969045877456665,
"rewards/margins": 0.5048438310623169,
"rewards/rejected": -0.10793925076723099,
"step": 256
},
{
"epoch": 0.2501465702560094,
"grad_norm": 2.2977707386016846,
"learning_rate": 8.331885317115551e-07,
"logits/chosen": -0.5301216840744019,
"logits/rejected": -0.47033828496932983,
"logps/chosen": -65.85761260986328,
"logps/rejected": -15.843816757202148,
"loss": 0.3878,
"rewards/accuracies": 0.888671875,
"rewards/chosen": 0.79034823179245,
"rewards/margins": 1.061028003692627,
"rewards/rejected": -0.2706798315048218,
"step": 320
},
{
"epoch": 0.30017588430721126,
"grad_norm": 8.175545692443848,
"learning_rate": 7.775847089487402e-07,
"logits/chosen": -0.5346195697784424,
"logits/rejected": -0.4867401123046875,
"logps/chosen": -57.788429260253906,
"logps/rejected": -16.67302894592285,
"loss": 0.3424,
"rewards/accuracies": 0.904296875,
"rewards/chosen": 0.9831899404525757,
"rewards/margins": 1.4113095998764038,
"rewards/rejected": -0.42811962962150574,
"step": 384
},
{
"epoch": 0.35020519835841313,
"grad_norm": 5.475202560424805,
"learning_rate": 7.219808861859253e-07,
"logits/chosen": -0.5421884059906006,
"logits/rejected": -0.4896155595779419,
"logps/chosen": -59.334922790527344,
"logps/rejected": -18.624767303466797,
"loss": 0.265,
"rewards/accuracies": 0.970703125,
"rewards/chosen": 1.2069406509399414,
"rewards/margins": 1.8477783203125,
"rewards/rejected": -0.640837550163269,
"step": 448
},
{
"epoch": 0.400234512409615,
"grad_norm": 3.7930808067321777,
"learning_rate": 6.663770634231103e-07,
"logits/chosen": -0.5423855185508728,
"logits/rejected": -0.5051375031471252,
"logps/chosen": -58.52878952026367,
"logps/rejected": -23.4506893157959,
"loss": 0.182,
"rewards/accuracies": 0.986328125,
"rewards/chosen": 1.3336964845657349,
"rewards/margins": 2.311509132385254,
"rewards/rejected": -0.977812647819519,
"step": 512
},
{
"epoch": 0.45026382646081686,
"grad_norm": 7.777218818664551,
"learning_rate": 6.107732406602954e-07,
"logits/chosen": -0.5627282857894897,
"logits/rejected": -0.5052539110183716,
"logps/chosen": -60.55673599243164,
"logps/rejected": -26.99791717529297,
"loss": 0.1096,
"rewards/accuracies": 0.982421875,
"rewards/chosen": 1.3532178401947021,
"rewards/margins": 2.905151605606079,
"rewards/rejected": -1.5519336462020874,
"step": 576
},
{
"epoch": 0.5002931405120188,
"grad_norm": 0.5906669497489929,
"learning_rate": 5.551694178974804e-07,
"logits/chosen": -0.5611196160316467,
"logits/rejected": -0.5089166164398193,
"logps/chosen": -59.12473678588867,
"logps/rejected": -32.835243225097656,
"loss": 0.0648,
"rewards/accuracies": 0.9921875,
"rewards/chosen": 1.443546175956726,
"rewards/margins": 3.489488124847412,
"rewards/rejected": -2.0459418296813965,
"step": 640
},
{
"epoch": 0.5503224545632206,
"grad_norm": 0.7606092095375061,
"learning_rate": 4.995655951346655e-07,
"logits/chosen": -0.5526726245880127,
"logits/rejected": -0.49657124280929565,
"logps/chosen": -57.72775650024414,
"logps/rejected": -36.90594482421875,
"loss": 0.0546,
"rewards/accuracies": 0.990234375,
"rewards/chosen": 1.4812407493591309,
"rewards/margins": 3.8918285369873047,
"rewards/rejected": -2.410587787628174,
"step": 704
},
{
"epoch": 0.6003517686144225,
"grad_norm": 0.8932979702949524,
"learning_rate": 4.4396177237185057e-07,
"logits/chosen": -0.5280415415763855,
"logits/rejected": -0.4753148555755615,
"logps/chosen": -58.317237854003906,
"logps/rejected": -40.71867370605469,
"loss": 0.0436,
"rewards/accuracies": 0.990234375,
"rewards/chosen": 1.4664157629013062,
"rewards/margins": 4.119485855102539,
"rewards/rejected": -2.6530702114105225,
"step": 768
},
{
"epoch": 0.6503810826656244,
"grad_norm": 1.0950371026992798,
"learning_rate": 3.8835794960903563e-07,
"logits/chosen": -0.523577094078064,
"logits/rejected": -0.464932382106781,
"logps/chosen": -58.45826721191406,
"logps/rejected": -40.988529205322266,
"loss": 0.0442,
"rewards/accuracies": 0.990234375,
"rewards/chosen": 1.5603256225585938,
"rewards/margins": 4.361178398132324,
"rewards/rejected": -2.8008527755737305,
"step": 832
},
{
"epoch": 0.7004103967168263,
"grad_norm": 0.8026629686355591,
"learning_rate": 3.327541268462207e-07,
"logits/chosen": -0.5141459703445435,
"logits/rejected": -0.44692087173461914,
"logps/chosen": -59.13020324707031,
"logps/rejected": -41.566627502441406,
"loss": 0.038,
"rewards/accuracies": 0.994140625,
"rewards/chosen": 1.5455219745635986,
"rewards/margins": 4.467495918273926,
"rewards/rejected": -2.9219741821289062,
"step": 896
},
{
"epoch": 0.7504397107680282,
"grad_norm": 0.05718870088458061,
"learning_rate": 2.7715030408340575e-07,
"logits/chosen": -0.5028055906295776,
"logits/rejected": -0.43091291189193726,
"logps/chosen": -56.72560501098633,
"logps/rejected": -42.42565155029297,
"loss": 0.0423,
"rewards/accuracies": 0.986328125,
"rewards/chosen": 1.564468264579773,
"rewards/margins": 4.592702865600586,
"rewards/rejected": -3.0282342433929443,
"step": 960
},
{
"epoch": 0.80046902481923,
"grad_norm": 0.14965815842151642,
"learning_rate": 2.215464813205908e-07,
"logits/chosen": -0.510848343372345,
"logits/rejected": -0.4440664052963257,
"logps/chosen": -56.914100646972656,
"logps/rejected": -42.933258056640625,
"loss": 0.0307,
"rewards/accuracies": 0.9921875,
"rewards/chosen": 1.552268147468567,
"rewards/margins": 4.621021270751953,
"rewards/rejected": -3.068753242492676,
"step": 1024
},
{
"epoch": 0.8504983388704319,
"grad_norm": 0.4853415787220001,
"learning_rate": 1.6594265855777585e-07,
"logits/chosen": -0.5129883289337158,
"logits/rejected": -0.4405321180820465,
"logps/chosen": -56.04991912841797,
"logps/rejected": -43.990997314453125,
"loss": 0.0213,
"rewards/accuracies": 0.994140625,
"rewards/chosen": 1.5749919414520264,
"rewards/margins": 4.778657913208008,
"rewards/rejected": -3.2036657333374023,
"step": 1088
},
{
"epoch": 0.9005276529216337,
"grad_norm": 0.44261807203292847,
"learning_rate": 1.103388357949609e-07,
"logits/chosen": -0.5202418565750122,
"logits/rejected": -0.4409845471382141,
"logps/chosen": -58.534915924072266,
"logps/rejected": -44.22764205932617,
"loss": 0.0335,
"rewards/accuracies": 0.98828125,
"rewards/chosen": 1.6060659885406494,
"rewards/margins": 4.796003341674805,
"rewards/rejected": -3.189937114715576,
"step": 1152
},
{
"epoch": 0.9505569669728356,
"grad_norm": 0.2634647786617279,
"learning_rate": 5.4735013032145953e-08,
"logits/chosen": -0.48342519998550415,
"logits/rejected": -0.4142173230648041,
"logps/chosen": -57.232521057128906,
"logps/rejected": -44.50954818725586,
"loss": 0.0315,
"rewards/accuracies": 0.990234375,
"rewards/chosen": 1.5877962112426758,
"rewards/margins": 4.721351623535156,
"rewards/rejected": -3.1335554122924805,
"step": 1216
},
{
"epoch": 0.9998045729919874,
"step": 1279,
"total_flos": 7.724459437129728e+17,
"train_loss": 0.21135991807092067,
"train_runtime": 19931.1539,
"train_samples_per_second": 0.513,
"train_steps_per_second": 0.064
}
],
"logging_steps": 64,
"max_steps": 1279,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.724459437129728e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}