llama3_8b_sft_helpsteer / trainer_state.json
Jennny's picture
Model save
09bdbc6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 100,
"global_step": 496,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004032258064516129,
"grad_norm": 215.24710014620507,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.6159,
"step": 1
},
{
"epoch": 0.020161290322580645,
"grad_norm": 312.9268127676858,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.4896,
"step": 5
},
{
"epoch": 0.04032258064516129,
"grad_norm": 22.150019622688806,
"learning_rate": 4.000000000000001e-06,
"loss": 1.4524,
"step": 10
},
{
"epoch": 0.06048387096774194,
"grad_norm": 6.1881183003709035,
"learning_rate": 6e-06,
"loss": 1.2634,
"step": 15
},
{
"epoch": 0.08064516129032258,
"grad_norm": 4.300533001219337,
"learning_rate": 8.000000000000001e-06,
"loss": 1.1845,
"step": 20
},
{
"epoch": 0.10080645161290322,
"grad_norm": 3.355604785739065,
"learning_rate": 1e-05,
"loss": 1.1828,
"step": 25
},
{
"epoch": 0.12096774193548387,
"grad_norm": 3.1071264175265796,
"learning_rate": 1.2e-05,
"loss": 1.1377,
"step": 30
},
{
"epoch": 0.14112903225806453,
"grad_norm": 3.739675462647032,
"learning_rate": 1.4e-05,
"loss": 1.1406,
"step": 35
},
{
"epoch": 0.16129032258064516,
"grad_norm": 3.5497603101186597,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.1189,
"step": 40
},
{
"epoch": 0.1814516129032258,
"grad_norm": 3.459217793654087,
"learning_rate": 1.8e-05,
"loss": 1.1015,
"step": 45
},
{
"epoch": 0.20161290322580644,
"grad_norm": 2.5061828560304673,
"learning_rate": 2e-05,
"loss": 1.1509,
"step": 50
},
{
"epoch": 0.2217741935483871,
"grad_norm": 3.071258628815723,
"learning_rate": 1.999379852284651e-05,
"loss": 1.1428,
"step": 55
},
{
"epoch": 0.24193548387096775,
"grad_norm": 2.1526518734405915,
"learning_rate": 1.9975201783049804e-05,
"loss": 1.0857,
"step": 60
},
{
"epoch": 0.2620967741935484,
"grad_norm": 3.1257781855467806,
"learning_rate": 1.9944232846061284e-05,
"loss": 1.1007,
"step": 65
},
{
"epoch": 0.28225806451612906,
"grad_norm": 2.650156490646313,
"learning_rate": 1.9900930122511993e-05,
"loss": 1.1763,
"step": 70
},
{
"epoch": 0.3024193548387097,
"grad_norm": 2.222582022247095,
"learning_rate": 1.984534732057208e-05,
"loss": 1.0812,
"step": 75
},
{
"epoch": 0.3225806451612903,
"grad_norm": 3.755219640137959,
"learning_rate": 1.977755337933682e-05,
"loss": 1.1358,
"step": 80
},
{
"epoch": 0.34274193548387094,
"grad_norm": 2.9861330009858578,
"learning_rate": 1.9697632383321755e-05,
"loss": 1.0906,
"step": 85
},
{
"epoch": 0.3629032258064516,
"grad_norm": 4.930667988082775,
"learning_rate": 1.960568345817306e-05,
"loss": 1.0903,
"step": 90
},
{
"epoch": 0.38306451612903225,
"grad_norm": 2.2881396839609565,
"learning_rate": 1.9501820647722458e-05,
"loss": 1.1276,
"step": 95
},
{
"epoch": 0.4032258064516129,
"grad_norm": 2.699315706389293,
"learning_rate": 1.9386172772539162e-05,
"loss": 1.0391,
"step": 100
},
{
"epoch": 0.4032258064516129,
"eval_loss": 1.1010785102844238,
"eval_runtime": 5.7903,
"eval_samples_per_second": 68.563,
"eval_steps_per_second": 2.245,
"step": 100
},
{
"epoch": 0.42338709677419356,
"grad_norm": 2.3951607960351318,
"learning_rate": 1.925888327015434e-05,
"loss": 1.1294,
"step": 105
},
{
"epoch": 0.4435483870967742,
"grad_norm": 2.590506218859771,
"learning_rate": 1.9120110017156172e-05,
"loss": 1.1039,
"step": 110
},
{
"epoch": 0.4637096774193548,
"grad_norm": 2.3642899736042855,
"learning_rate": 1.8970025133376252e-05,
"loss": 1.0845,
"step": 115
},
{
"epoch": 0.4838709677419355,
"grad_norm": 2.4194055228848623,
"learning_rate": 1.8808814768410157e-05,
"loss": 1.1614,
"step": 120
},
{
"epoch": 0.5040322580645161,
"grad_norm": 2.3158075822843545,
"learning_rate": 1.8636678870736928e-05,
"loss": 1.0667,
"step": 125
},
{
"epoch": 0.5241935483870968,
"grad_norm": 2.2181248045354165,
"learning_rate": 1.8453830939723913e-05,
"loss": 1.0932,
"step": 130
},
{
"epoch": 0.5443548387096774,
"grad_norm": 2.4950408748993653,
"learning_rate": 1.826049776082446e-05,
"loss": 1.102,
"step": 135
},
{
"epoch": 0.5645161290322581,
"grad_norm": 2.9826288289990774,
"learning_rate": 1.8056919124296957e-05,
"loss": 1.1153,
"step": 140
},
{
"epoch": 0.5846774193548387,
"grad_norm": 4.789513209353997,
"learning_rate": 1.784334752779408e-05,
"loss": 1.0892,
"step": 145
},
{
"epoch": 0.6048387096774194,
"grad_norm": 2.153748931520763,
"learning_rate": 1.76200478631911e-05,
"loss": 1.0512,
"step": 150
},
{
"epoch": 0.625,
"grad_norm": 2.1449515128315584,
"learning_rate": 1.7387297088041696e-05,
"loss": 1.0708,
"step": 155
},
{
"epoch": 0.6451612903225806,
"grad_norm": 2.568672641778878,
"learning_rate": 1.714538388206878e-05,
"loss": 1.1534,
"step": 160
},
{
"epoch": 0.6653225806451613,
"grad_norm": 2.4066286973710818,
"learning_rate": 1.6894608289116344e-05,
"loss": 1.0782,
"step": 165
},
{
"epoch": 0.6854838709677419,
"grad_norm": 2.469873074784075,
"learning_rate": 1.663528134500646e-05,
"loss": 1.1579,
"step": 170
},
{
"epoch": 0.7056451612903226,
"grad_norm": 2.546868439730841,
"learning_rate": 1.6367724691762967e-05,
"loss": 1.0727,
"step": 175
},
{
"epoch": 0.7258064516129032,
"grad_norm": 2.610421464248938,
"learning_rate": 1.609227017868033e-05,
"loss": 1.0766,
"step": 180
},
{
"epoch": 0.7459677419354839,
"grad_norm": 2.4332563313436486,
"learning_rate": 1.5809259450732495e-05,
"loss": 1.1252,
"step": 185
},
{
"epoch": 0.7661290322580645,
"grad_norm": 2.273728572874247,
"learning_rate": 1.551904352483217e-05,
"loss": 1.1049,
"step": 190
},
{
"epoch": 0.7862903225806451,
"grad_norm": 1.9228388068167408,
"learning_rate": 1.5221982354466172e-05,
"loss": 1.0661,
"step": 195
},
{
"epoch": 0.8064516129032258,
"grad_norm": 2.1988965883386573,
"learning_rate": 1.4918444383246738e-05,
"loss": 1.1256,
"step": 200
},
{
"epoch": 0.8064516129032258,
"eval_loss": 1.0620791912078857,
"eval_runtime": 5.8005,
"eval_samples_per_second": 68.443,
"eval_steps_per_second": 2.241,
"step": 200
},
{
"epoch": 0.8266129032258065,
"grad_norm": 2.4001670804799273,
"learning_rate": 1.460880608793262e-05,
"loss": 1.069,
"step": 205
},
{
"epoch": 0.8467741935483871,
"grad_norm": 2.361908189355991,
"learning_rate": 1.4293451511486658e-05,
"loss": 1.0566,
"step": 210
},
{
"epoch": 0.8669354838709677,
"grad_norm": 2.1713973708275955,
"learning_rate": 1.3972771786749074e-05,
"loss": 1.0593,
"step": 215
},
{
"epoch": 0.8870967741935484,
"grad_norm": 2.0277729000613403,
"learning_rate": 1.3647164651317178e-05,
"loss": 1.0812,
"step": 220
},
{
"epoch": 0.907258064516129,
"grad_norm": 2.4350034774265326,
"learning_rate": 1.3317033954233246e-05,
"loss": 1.1498,
"step": 225
},
{
"epoch": 0.9274193548387096,
"grad_norm": 2.359441580453533,
"learning_rate": 1.2982789155092407e-05,
"loss": 1.1063,
"step": 230
},
{
"epoch": 0.9475806451612904,
"grad_norm": 2.337686204567925,
"learning_rate": 1.264484481619177e-05,
"loss": 1.0411,
"step": 235
},
{
"epoch": 0.967741935483871,
"grad_norm": 2.1962971145230297,
"learning_rate": 1.23036200883507e-05,
"loss": 1.0567,
"step": 240
},
{
"epoch": 0.9879032258064516,
"grad_norm": 2.8727159078532507,
"learning_rate": 1.1959538191039986e-05,
"loss": 1.09,
"step": 245
},
{
"epoch": 1.0080645161290323,
"grad_norm": 3.2425805939567804,
"learning_rate": 1.1613025887464642e-05,
"loss": 0.8776,
"step": 250
},
{
"epoch": 1.028225806451613,
"grad_norm": 3.0688033959726506,
"learning_rate": 1.1264512955251479e-05,
"loss": 0.5624,
"step": 255
},
{
"epoch": 1.0483870967741935,
"grad_norm": 2.310668387893831,
"learning_rate": 1.0914431653397856e-05,
"loss": 0.5702,
"step": 260
},
{
"epoch": 1.0685483870967742,
"grad_norm": 1.810288404740645,
"learning_rate": 1.056321618614284e-05,
"loss": 0.5516,
"step": 265
},
{
"epoch": 1.0887096774193548,
"grad_norm": 2.029102262007128,
"learning_rate": 1.0211302164425657e-05,
"loss": 0.492,
"step": 270
},
{
"epoch": 1.1088709677419355,
"grad_norm": 2.1849097454980977,
"learning_rate": 9.859126065599435e-06,
"loss": 0.5087,
"step": 275
},
{
"epoch": 1.129032258064516,
"grad_norm": 2.0350424139394456,
"learning_rate": 9.507124692070356e-06,
"loss": 0.5514,
"step": 280
},
{
"epoch": 1.1491935483870968,
"grad_norm": 2.1657967134210643,
"learning_rate": 9.155734629533612e-06,
"loss": 0.4923,
"step": 285
},
{
"epoch": 1.1693548387096775,
"grad_norm": 2.3019047292629065,
"learning_rate": 8.805391705478149e-06,
"loss": 0.5185,
"step": 290
},
{
"epoch": 1.189516129032258,
"grad_norm": 2.342813194456275,
"learning_rate": 8.456530448631856e-06,
"loss": 0.5622,
"step": 295
},
{
"epoch": 1.2096774193548387,
"grad_norm": 2.4435792193657373,
"learning_rate": 8.10958355001755e-06,
"loss": 0.5037,
"step": 300
},
{
"epoch": 1.2096774193548387,
"eval_loss": 1.0970051288604736,
"eval_runtime": 5.7929,
"eval_samples_per_second": 68.532,
"eval_steps_per_second": 2.244,
"step": 300
},
{
"epoch": 1.2298387096774193,
"grad_norm": 2.0093621809495943,
"learning_rate": 7.764981326288273e-06,
"loss": 0.5044,
"step": 305
},
{
"epoch": 1.25,
"grad_norm": 2.217076831970701,
"learning_rate": 7.423151186007527e-06,
"loss": 0.4857,
"step": 310
},
{
"epoch": 1.2701612903225805,
"grad_norm": 2.711127350014508,
"learning_rate": 7.084517099536378e-06,
"loss": 0.5131,
"step": 315
},
{
"epoch": 1.2903225806451613,
"grad_norm": 2.2254535872260925,
"learning_rate": 6.749499073184957e-06,
"loss": 0.5338,
"step": 320
},
{
"epoch": 1.310483870967742,
"grad_norm": 2.438095643634648,
"learning_rate": 6.418512628280544e-06,
"loss": 0.5052,
"step": 325
},
{
"epoch": 1.3306451612903225,
"grad_norm": 2.1227128290496045,
"learning_rate": 6.09196828579838e-06,
"loss": 0.4932,
"step": 330
},
{
"epoch": 1.3508064516129032,
"grad_norm": 2.157875906897965,
"learning_rate": 5.7702710571943695e-06,
"loss": 0.488,
"step": 335
},
{
"epoch": 1.370967741935484,
"grad_norm": 2.1989233445083176,
"learning_rate": 5.453819942071212e-06,
"loss": 0.4638,
"step": 340
},
{
"epoch": 1.3911290322580645,
"grad_norm": 1.7426003114674993,
"learning_rate": 5.1430074333010346e-06,
"loss": 0.5005,
"step": 345
},
{
"epoch": 1.4112903225806452,
"grad_norm": 2.4552831246664453,
"learning_rate": 4.838219030218274e-06,
"loss": 0.4814,
"step": 350
},
{
"epoch": 1.4314516129032258,
"grad_norm": 2.3809447566869113,
"learning_rate": 4.5398327604866056e-06,
"loss": 0.4956,
"step": 355
},
{
"epoch": 1.4516129032258065,
"grad_norm": 1.9931477191927476,
"learning_rate": 4.248218711232952e-06,
"loss": 0.5095,
"step": 360
},
{
"epoch": 1.471774193548387,
"grad_norm": 2.0758952034120104,
"learning_rate": 3.963738570030135e-06,
"loss": 0.5031,
"step": 365
},
{
"epoch": 1.4919354838709677,
"grad_norm": 2.113141005154832,
"learning_rate": 3.6867451762974117e-06,
"loss": 0.4858,
"step": 370
},
{
"epoch": 1.5120967741935485,
"grad_norm": 2.033150384960035,
"learning_rate": 3.417582083675365e-06,
"loss": 0.4838,
"step": 375
},
{
"epoch": 1.532258064516129,
"grad_norm": 2.6578869400642984,
"learning_rate": 3.1565831339178844e-06,
"loss": 0.4981,
"step": 380
},
{
"epoch": 1.5524193548387095,
"grad_norm": 2.4259980364912557,
"learning_rate": 2.9040720428297754e-06,
"loss": 0.4953,
"step": 385
},
{
"epoch": 1.5725806451612905,
"grad_norm": 2.188171842523612,
"learning_rate": 2.6603619987635087e-06,
"loss": 0.4886,
"step": 390
},
{
"epoch": 1.592741935483871,
"grad_norm": 2.4509085938481645,
"learning_rate": 2.4257552741731593e-06,
"loss": 0.4919,
"step": 395
},
{
"epoch": 1.6129032258064515,
"grad_norm": 2.057354914567607,
"learning_rate": 2.200542850707247e-06,
"loss": 0.5057,
"step": 400
},
{
"epoch": 1.6129032258064515,
"eval_loss": 1.0593500137329102,
"eval_runtime": 5.7784,
"eval_samples_per_second": 68.704,
"eval_steps_per_second": 2.25,
"step": 400
},
{
"epoch": 1.6330645161290323,
"grad_norm": 1.797397264265373,
"learning_rate": 1.985004058305535e-06,
"loss": 0.4598,
"step": 405
},
{
"epoch": 1.653225806451613,
"grad_norm": 2.243456315367938,
"learning_rate": 1.7794062287473734e-06,
"loss": 0.5226,
"step": 410
},
{
"epoch": 1.6733870967741935,
"grad_norm": 2.220033645347878,
"learning_rate": 1.5840043640813274e-06,
"loss": 0.5404,
"step": 415
},
{
"epoch": 1.6935483870967742,
"grad_norm": 1.9945052495277809,
"learning_rate": 1.3990408203472938e-06,
"loss": 0.4725,
"step": 420
},
{
"epoch": 1.713709677419355,
"grad_norm": 2.2666732532368217,
"learning_rate": 1.2247450069834077e-06,
"loss": 0.4773,
"step": 425
},
{
"epoch": 1.7338709677419355,
"grad_norm": 1.907392372458416,
"learning_rate": 1.061333102290576e-06,
"loss": 0.4776,
"step": 430
},
{
"epoch": 1.754032258064516,
"grad_norm": 2.6145484574587083,
"learning_rate": 9.090077853075119e-07,
"loss": 0.4864,
"step": 435
},
{
"epoch": 1.7741935483870968,
"grad_norm": 2.06555720808374,
"learning_rate": 7.679579844288509e-07,
"loss": 0.4692,
"step": 440
},
{
"epoch": 1.7943548387096775,
"grad_norm": 2.1245728247554982,
"learning_rate": 6.383586430781196e-07,
"loss": 0.5071,
"step": 445
},
{
"epoch": 1.814516129032258,
"grad_norm": 2.0064654819537298,
"learning_rate": 5.203705027262185e-07,
"loss": 0.4659,
"step": 450
},
{
"epoch": 1.8346774193548387,
"grad_norm": 2.1497980743373906,
"learning_rate": 4.141399035245053e-07,
"loss": 0.5124,
"step": 455
},
{
"epoch": 1.8548387096774195,
"grad_norm": 2.1598292791262064,
"learning_rate": 3.197986027997657e-07,
"loss": 0.4742,
"step": 460
},
{
"epoch": 1.875,
"grad_norm": 2.01644524585016,
"learning_rate": 2.3746361163621723e-07,
"loss": 0.4833,
"step": 465
},
{
"epoch": 1.8951612903225805,
"grad_norm": 1.9818198911248883,
"learning_rate": 1.6723704974718758e-07,
"loss": 0.5176,
"step": 470
},
{
"epoch": 1.9153225806451613,
"grad_norm": 2.0731118119143126,
"learning_rate": 1.0920601881650006e-07,
"loss": 0.4895,
"step": 475
},
{
"epoch": 1.935483870967742,
"grad_norm": 1.9849538964737026,
"learning_rate": 6.344249446665673e-08,
"loss": 0.5093,
"step": 480
},
{
"epoch": 1.9556451612903225,
"grad_norm": 1.8778229972783114,
"learning_rate": 3.0003236987802276e-08,
"loss": 0.4298,
"step": 485
},
{
"epoch": 1.9758064516129032,
"grad_norm": 2.0981658218579873,
"learning_rate": 8.929720938193331e-09,
"loss": 0.524,
"step": 490
},
{
"epoch": 1.995967741935484,
"grad_norm": 2.1714963758794936,
"learning_rate": 2.48083703494606e-10,
"loss": 0.523,
"step": 495
},
{
"epoch": 2.0,
"step": 496,
"total_flos": 20768208814080.0,
"train_loss": 0.8128850824169574,
"train_runtime": 1218.4539,
"train_samples_per_second": 13.005,
"train_steps_per_second": 0.407
}
],
"logging_steps": 5,
"max_steps": 496,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 20768208814080.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}