freelancer-projects-3k-traces / trainer_state.json
wilyub's picture
End of training
48901b6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10126582278481013,
"grad_norm": 4.892731114911985,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.7873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10263531655073166,
"step": 5,
"valid_targets_mean": 5434.5,
"valid_targets_min": 4043
},
{
"epoch": 0.20253164556962025,
"grad_norm": 1.814904106553782,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.7417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09018491953611374,
"step": 10,
"valid_targets_mean": 6879.8,
"valid_targets_min": 4333
},
{
"epoch": 0.3037974683544304,
"grad_norm": 0.8841019383762758,
"learning_rate": 2.2400000000000002e-05,
"loss": 0.6809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.083181232213974,
"step": 15,
"valid_targets_mean": 5398.1,
"valid_targets_min": 4032
},
{
"epoch": 0.4050632911392405,
"grad_norm": 0.5370849896096638,
"learning_rate": 3.0400000000000004e-05,
"loss": 0.6482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07828044146299362,
"step": 20,
"valid_targets_mean": 6784.9,
"valid_targets_min": 4367
},
{
"epoch": 0.5063291139240507,
"grad_norm": 0.4107573727646989,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.6248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08727987855672836,
"step": 25,
"valid_targets_mean": 6592.4,
"valid_targets_min": 5831
},
{
"epoch": 0.6075949367088608,
"grad_norm": 0.3884148187532474,
"learning_rate": 3.9968815283639625e-05,
"loss": 0.583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06718327105045319,
"step": 30,
"valid_targets_mean": 5202.8,
"valid_targets_min": 3358
},
{
"epoch": 0.7088607594936709,
"grad_norm": 0.3296344047063898,
"learning_rate": 3.9842294026289565e-05,
"loss": 0.5819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08376607298851013,
"step": 35,
"valid_targets_mean": 6393.1,
"valid_targets_min": 4108
},
{
"epoch": 0.810126582278481,
"grad_norm": 0.28688795849602644,
"learning_rate": 3.9619103106983835e-05,
"loss": 0.5663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07219389081001282,
"step": 40,
"valid_targets_mean": 5679.5,
"valid_targets_min": 3016
},
{
"epoch": 0.9113924050632911,
"grad_norm": 0.2599787531636512,
"learning_rate": 3.930032988944623e-05,
"loss": 0.5474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07744254171848297,
"step": 45,
"valid_targets_mean": 6223.2,
"valid_targets_min": 4265
},
{
"epoch": 1.0,
"grad_norm": 0.3334196405708405,
"learning_rate": 3.888752740474962e-05,
"loss": 0.5276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16258563101291656,
"step": 50,
"valid_targets_mean": 6408.6,
"valid_targets_min": 1343
},
{
"epoch": 1.1012658227848102,
"grad_norm": 0.2530444272354564,
"learning_rate": 3.838270678510469e-05,
"loss": 0.5328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053741633892059326,
"step": 55,
"valid_targets_mean": 4568.9,
"valid_targets_min": 1527
},
{
"epoch": 1.2025316455696202,
"grad_norm": 0.25786402054016816,
"learning_rate": 3.778832746582596e-05,
"loss": 0.526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06370914727449417,
"step": 60,
"valid_targets_mean": 5678.0,
"valid_targets_min": 1703
},
{
"epoch": 1.3037974683544304,
"grad_norm": 0.25779285406561275,
"learning_rate": 3.710728520321014e-05,
"loss": 0.5289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07068575918674469,
"step": 65,
"valid_targets_mean": 5731.5,
"valid_targets_min": 2599
},
{
"epoch": 1.4050632911392404,
"grad_norm": 0.27350360851995736,
"learning_rate": 3.634289796670257e-05,
"loss": 0.5091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05845070257782936,
"step": 70,
"valid_targets_mean": 5868.4,
"valid_targets_min": 3084
},
{
"epoch": 1.5063291139240507,
"grad_norm": 0.2251730778907972,
"learning_rate": 3.549888977408359e-05,
"loss": 0.5036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06416966021060944,
"step": 75,
"valid_targets_mean": 7533.8,
"valid_targets_min": 1211
},
{
"epoch": 1.6075949367088609,
"grad_norm": 0.25292755111647425,
"learning_rate": 3.457937254842823e-05,
"loss": 0.5035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06763243675231934,
"step": 80,
"valid_targets_mean": 5904.2,
"valid_targets_min": 4620
},
{
"epoch": 1.7088607594936709,
"grad_norm": 0.2395809902870368,
"learning_rate": 3.3588826085230336e-05,
"loss": 0.5125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06270727515220642,
"step": 85,
"valid_targets_mean": 5431.5,
"valid_targets_min": 1653
},
{
"epoch": 1.810126582278481,
"grad_norm": 0.2403546339603389,
"learning_rate": 3.253207622728921e-05,
"loss": 0.5008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05581539869308472,
"step": 90,
"valid_targets_mean": 5039.5,
"valid_targets_min": 1803
},
{
"epoch": 1.9113924050632911,
"grad_norm": 0.23877919903509898,
"learning_rate": 3.141427135368864e-05,
"loss": 0.5092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06935933232307434,
"step": 95,
"valid_targets_mean": 6473.8,
"valid_targets_min": 1096
},
{
"epoch": 2.0,
"grad_norm": 0.4045083524782313,
"learning_rate": 3.024085729741143e-05,
"loss": 0.4985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16390681266784668,
"step": 100,
"valid_targets_mean": 4693.0,
"valid_targets_min": 3570
},
{
"epoch": 2.1012658227848102,
"grad_norm": 0.24016857209390058,
"learning_rate": 2.9017550813788616e-05,
"loss": 0.4904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.061118703335523605,
"step": 105,
"valid_targets_mean": 5252.2,
"valid_targets_min": 1622
},
{
"epoch": 2.2025316455696204,
"grad_norm": 0.24557410950934505,
"learning_rate": 2.7750311729042062e-05,
"loss": 0.4844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055277056992053986,
"step": 110,
"valid_targets_mean": 6305.8,
"valid_targets_min": 1349
},
{
"epoch": 2.3037974683544302,
"grad_norm": 0.2487370991728573,
"learning_rate": 2.6445313904610227e-05,
"loss": 0.4946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06643018871545792,
"step": 115,
"valid_targets_mean": 6322.1,
"valid_targets_min": 4384
},
{
"epoch": 2.4050632911392404,
"grad_norm": 0.24563068855298614,
"learning_rate": 2.510891515871581e-05,
"loss": 0.4945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0655747652053833,
"step": 120,
"valid_targets_mean": 7007.6,
"valid_targets_min": 4748
},
{
"epoch": 2.5063291139240507,
"grad_norm": 0.22602016251768164,
"learning_rate": 2.37476262917145e-05,
"loss": 0.4871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05510348081588745,
"step": 125,
"valid_targets_mean": 6089.8,
"valid_targets_min": 1814
},
{
"epoch": 2.607594936708861,
"grad_norm": 0.24857564424554462,
"learning_rate": 2.2368079366130028e-05,
"loss": 0.4778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06448166072368622,
"step": 130,
"valid_targets_mean": 6170.4,
"valid_targets_min": 1748
},
{
"epoch": 2.708860759493671,
"grad_norm": 0.24223407823647905,
"learning_rate": 2.097699539591227e-05,
"loss": 0.4962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055263325572013855,
"step": 135,
"valid_targets_mean": 5147.0,
"valid_targets_min": 1780
},
{
"epoch": 2.810126582278481,
"grad_norm": 0.25190029062889885,
"learning_rate": 1.9581151602332865e-05,
"loss": 0.4765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05278221517801285,
"step": 140,
"valid_targets_mean": 4165.2,
"valid_targets_min": 1059
},
{
"epoch": 2.911392405063291,
"grad_norm": 0.23072238058861513,
"learning_rate": 1.8187348396044402e-05,
"loss": 0.487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057958684861660004,
"step": 145,
"valid_targets_mean": 5567.4,
"valid_targets_min": 2038
},
{
"epoch": 3.0,
"grad_norm": 0.35788077181586037,
"learning_rate": 1.6802376246163307e-05,
"loss": 0.4775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17279455065727234,
"step": 150,
"valid_targets_mean": 6114.6,
"valid_targets_min": 4298
},
{
"epoch": 3.1012658227848102,
"grad_norm": 0.22842034105067952,
"learning_rate": 1.5432982597786886e-05,
"loss": 0.4615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057237960398197174,
"step": 155,
"valid_targets_mean": 7781.5,
"valid_targets_min": 3630
},
{
"epoch": 3.2025316455696204,
"grad_norm": 0.25279304012367937,
"learning_rate": 1.4085838999119075e-05,
"loss": 0.4881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06189911440014839,
"step": 160,
"valid_targets_mean": 5685.0,
"valid_targets_min": 1780
},
{
"epoch": 3.3037974683544302,
"grad_norm": 0.24789675995551264,
"learning_rate": 1.2767508598358158e-05,
"loss": 0.4791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07715524733066559,
"step": 165,
"valid_targets_mean": 6161.6,
"valid_targets_min": 2145
},
{
"epoch": 3.4050632911392404,
"grad_norm": 0.23756645723447042,
"learning_rate": 1.1484414168698547e-05,
"loss": 0.4649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06180926039814949,
"step": 170,
"valid_targets_mean": 5851.0,
"valid_targets_min": 4620
},
{
"epoch": 3.5063291139240507,
"grad_norm": 0.2074147259270552,
"learning_rate": 1.0242806817225344e-05,
"loss": 0.4772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0647956132888794,
"step": 175,
"valid_targets_mean": 6451.0,
"valid_targets_min": 4490
},
{
"epoch": 3.607594936708861,
"grad_norm": 0.222985373654853,
"learning_rate": 9.048735530148998e-06,
"loss": 0.4767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07306292653083801,
"step": 180,
"valid_targets_mean": 7238.0,
"valid_targets_min": 4225
},
{
"epoch": 3.708860759493671,
"grad_norm": 0.24693082038587955,
"learning_rate": 7.908017702752504e-06,
"loss": 0.483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.058695971965789795,
"step": 185,
"valid_targets_mean": 5157.0,
"valid_targets_min": 1544
},
{
"epoch": 3.810126582278481,
"grad_norm": 0.21476601473174206,
"learning_rate": 6.826210797626389e-06,
"loss": 0.484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05881170928478241,
"step": 190,
"valid_targets_mean": 6449.1,
"valid_targets_min": 4147
},
{
"epoch": 3.911392405063291,
"grad_norm": 0.23899265053004184,
"learning_rate": 5.8085852692695864e-06,
"loss": 0.472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053444743156433105,
"step": 195,
"valid_targets_mean": 5982.6,
"valid_targets_min": 4017
},
{
"epoch": 4.0,
"grad_norm": 0.38057113948556304,
"learning_rate": 4.8600988869648745e-06,
"loss": 0.4598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14350973069667816,
"step": 200,
"valid_targets_mean": 5090.1,
"valid_targets_min": 1703
},
{
"epoch": 4.10126582278481,
"grad_norm": 0.23532229738444221,
"learning_rate": 3.985372581025333e-06,
"loss": 0.4809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06552933156490326,
"step": 205,
"valid_targets_mean": 6486.4,
"valid_targets_min": 4490
},
{
"epoch": 4.2025316455696204,
"grad_norm": 0.2188409024433163,
"learning_rate": 3.1886679300863156e-06,
"loss": 0.4712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04854092746973038,
"step": 210,
"valid_targets_mean": 5235.5,
"valid_targets_min": 2016
},
{
"epoch": 4.30379746835443,
"grad_norm": 0.23226895996673688,
"learning_rate": 2.473866399122733e-06,
"loss": 0.4698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04958043992519379,
"step": 215,
"valid_targets_mean": 4248.0,
"valid_targets_min": 1703
},
{
"epoch": 4.405063291139241,
"grad_norm": 0.20534990029779562,
"learning_rate": 1.8444504293418286e-06,
"loss": 0.4659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06132490187883377,
"step": 220,
"valid_targets_mean": 5857.9,
"valid_targets_min": 3622
},
{
"epoch": 4.506329113924051,
"grad_norm": 0.21045777391697457,
"learning_rate": 1.3034864720797112e-06,
"loss": 0.4578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04531293734908104,
"step": 225,
"valid_targets_mean": 4599.2,
"valid_targets_min": 1479
},
{
"epoch": 4.6075949367088604,
"grad_norm": 0.25733866620896073,
"learning_rate": 8.536100493586552e-07,
"loss": 0.4614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05553407967090607,
"step": 230,
"valid_targets_mean": 6007.9,
"valid_targets_min": 1799
},
{
"epoch": 4.708860759493671,
"grad_norm": 0.22037233350715793,
"learning_rate": 4.970129138887347e-07,
"loss": 0.4704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05725882947444916,
"step": 235,
"valid_targets_mean": 4865.5,
"valid_targets_min": 1546
},
{
"epoch": 4.810126582278481,
"grad_norm": 0.2174665831033445,
"learning_rate": 2.3543237106894434e-07,
"loss": 0.4778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049080438911914825,
"step": 240,
"valid_targets_mean": 6412.1,
"valid_targets_min": 1393
},
{
"epoch": 4.911392405063291,
"grad_norm": 0.2267785532190801,
"learning_rate": 7.01428150099126e-08,
"loss": 0.4807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.056198298931121826,
"step": 245,
"valid_targets_mean": 5363.9,
"valid_targets_min": 4209
},
{
"epoch": 5.0,
"grad_norm": 0.3766849273372062,
"learning_rate": 1.949519813915224e-09,
"loss": 0.4731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1424449235200882,
"step": 250,
"valid_targets_mean": 4742.4,
"valid_targets_min": 1340
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1424449235200882,
"step": 250,
"total_flos": 7.594294192772219e+17,
"train_loss": 0.5147013063430786,
"train_runtime": 9538.6713,
"train_samples_per_second": 1.656,
"train_steps_per_second": 0.026,
"valid_targets_mean": 4742.4,
"valid_targets_min": 1340
}
],
"logging_steps": 5,
"max_steps": 250,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.594294192772219e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}