| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10126582278481013, |
| "grad_norm": 4.892731114911985, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.7873, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10263531655073166, |
| "step": 5, |
| "valid_targets_mean": 5434.5, |
| "valid_targets_min": 4043 |
| }, |
| { |
| "epoch": 0.20253164556962025, |
| "grad_norm": 1.814904106553782, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 0.7417, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09018491953611374, |
| "step": 10, |
| "valid_targets_mean": 6879.8, |
| "valid_targets_min": 4333 |
| }, |
| { |
| "epoch": 0.3037974683544304, |
| "grad_norm": 0.8841019383762758, |
| "learning_rate": 2.2400000000000002e-05, |
| "loss": 0.6809, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.083181232213974, |
| "step": 15, |
| "valid_targets_mean": 5398.1, |
| "valid_targets_min": 4032 |
| }, |
| { |
| "epoch": 0.4050632911392405, |
| "grad_norm": 0.5370849896096638, |
| "learning_rate": 3.0400000000000004e-05, |
| "loss": 0.6482, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07828044146299362, |
| "step": 20, |
| "valid_targets_mean": 6784.9, |
| "valid_targets_min": 4367 |
| }, |
| { |
| "epoch": 0.5063291139240507, |
| "grad_norm": 0.4107573727646989, |
| "learning_rate": 3.8400000000000005e-05, |
| "loss": 0.6248, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08727987855672836, |
| "step": 25, |
| "valid_targets_mean": 6592.4, |
| "valid_targets_min": 5831 |
| }, |
| { |
| "epoch": 0.6075949367088608, |
| "grad_norm": 0.3884148187532474, |
| "learning_rate": 3.9968815283639625e-05, |
| "loss": 0.583, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06718327105045319, |
| "step": 30, |
| "valid_targets_mean": 5202.8, |
| "valid_targets_min": 3358 |
| }, |
| { |
| "epoch": 0.7088607594936709, |
| "grad_norm": 0.3296344047063898, |
| "learning_rate": 3.9842294026289565e-05, |
| "loss": 0.5819, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08376607298851013, |
| "step": 35, |
| "valid_targets_mean": 6393.1, |
| "valid_targets_min": 4108 |
| }, |
| { |
| "epoch": 0.810126582278481, |
| "grad_norm": 0.28688795849602644, |
| "learning_rate": 3.9619103106983835e-05, |
| "loss": 0.5663, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07219389081001282, |
| "step": 40, |
| "valid_targets_mean": 5679.5, |
| "valid_targets_min": 3016 |
| }, |
| { |
| "epoch": 0.9113924050632911, |
| "grad_norm": 0.2599787531636512, |
| "learning_rate": 3.930032988944623e-05, |
| "loss": 0.5474, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07744254171848297, |
| "step": 45, |
| "valid_targets_mean": 6223.2, |
| "valid_targets_min": 4265 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3334196405708405, |
| "learning_rate": 3.888752740474962e-05, |
| "loss": 0.5276, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.16258563101291656, |
| "step": 50, |
| "valid_targets_mean": 6408.6, |
| "valid_targets_min": 1343 |
| }, |
| { |
| "epoch": 1.1012658227848102, |
| "grad_norm": 0.2530444272354564, |
| "learning_rate": 3.838270678510469e-05, |
| "loss": 0.5328, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053741633892059326, |
| "step": 55, |
| "valid_targets_mean": 4568.9, |
| "valid_targets_min": 1527 |
| }, |
| { |
| "epoch": 1.2025316455696202, |
| "grad_norm": 0.25786402054016816, |
| "learning_rate": 3.778832746582596e-05, |
| "loss": 0.526, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06370914727449417, |
| "step": 60, |
| "valid_targets_mean": 5678.0, |
| "valid_targets_min": 1703 |
| }, |
| { |
| "epoch": 1.3037974683544304, |
| "grad_norm": 0.25779285406561275, |
| "learning_rate": 3.710728520321014e-05, |
| "loss": 0.5289, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07068575918674469, |
| "step": 65, |
| "valid_targets_mean": 5731.5, |
| "valid_targets_min": 2599 |
| }, |
| { |
| "epoch": 1.4050632911392404, |
| "grad_norm": 0.27350360851995736, |
| "learning_rate": 3.634289796670257e-05, |
| "loss": 0.5091, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05845070257782936, |
| "step": 70, |
| "valid_targets_mean": 5868.4, |
| "valid_targets_min": 3084 |
| }, |
| { |
| "epoch": 1.5063291139240507, |
| "grad_norm": 0.2251730778907972, |
| "learning_rate": 3.549888977408359e-05, |
| "loss": 0.5036, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06416966021060944, |
| "step": 75, |
| "valid_targets_mean": 7533.8, |
| "valid_targets_min": 1211 |
| }, |
| { |
| "epoch": 1.6075949367088609, |
| "grad_norm": 0.25292755111647425, |
| "learning_rate": 3.457937254842823e-05, |
| "loss": 0.5035, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06763243675231934, |
| "step": 80, |
| "valid_targets_mean": 5904.2, |
| "valid_targets_min": 4620 |
| }, |
| { |
| "epoch": 1.7088607594936709, |
| "grad_norm": 0.2395809902870368, |
| "learning_rate": 3.3588826085230336e-05, |
| "loss": 0.5125, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06270727515220642, |
| "step": 85, |
| "valid_targets_mean": 5431.5, |
| "valid_targets_min": 1653 |
| }, |
| { |
| "epoch": 1.810126582278481, |
| "grad_norm": 0.2403546339603389, |
| "learning_rate": 3.253207622728921e-05, |
| "loss": 0.5008, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05581539869308472, |
| "step": 90, |
| "valid_targets_mean": 5039.5, |
| "valid_targets_min": 1803 |
| }, |
| { |
| "epoch": 1.9113924050632911, |
| "grad_norm": 0.23877919903509898, |
| "learning_rate": 3.141427135368864e-05, |
| "loss": 0.5092, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06935933232307434, |
| "step": 95, |
| "valid_targets_mean": 6473.8, |
| "valid_targets_min": 1096 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4045083524782313, |
| "learning_rate": 3.024085729741143e-05, |
| "loss": 0.4985, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.16390681266784668, |
| "step": 100, |
| "valid_targets_mean": 4693.0, |
| "valid_targets_min": 3570 |
| }, |
| { |
| "epoch": 2.1012658227848102, |
| "grad_norm": 0.24016857209390058, |
| "learning_rate": 2.9017550813788616e-05, |
| "loss": 0.4904, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.061118703335523605, |
| "step": 105, |
| "valid_targets_mean": 5252.2, |
| "valid_targets_min": 1622 |
| }, |
| { |
| "epoch": 2.2025316455696204, |
| "grad_norm": 0.24557410950934505, |
| "learning_rate": 2.7750311729042062e-05, |
| "loss": 0.4844, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055277056992053986, |
| "step": 110, |
| "valid_targets_mean": 6305.8, |
| "valid_targets_min": 1349 |
| }, |
| { |
| "epoch": 2.3037974683544302, |
| "grad_norm": 0.2487370991728573, |
| "learning_rate": 2.6445313904610227e-05, |
| "loss": 0.4946, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06643018871545792, |
| "step": 115, |
| "valid_targets_mean": 6322.1, |
| "valid_targets_min": 4384 |
| }, |
| { |
| "epoch": 2.4050632911392404, |
| "grad_norm": 0.24563068855298614, |
| "learning_rate": 2.510891515871581e-05, |
| "loss": 0.4945, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0655747652053833, |
| "step": 120, |
| "valid_targets_mean": 7007.6, |
| "valid_targets_min": 4748 |
| }, |
| { |
| "epoch": 2.5063291139240507, |
| "grad_norm": 0.22602016251768164, |
| "learning_rate": 2.37476262917145e-05, |
| "loss": 0.4871, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05510348081588745, |
| "step": 125, |
| "valid_targets_mean": 6089.8, |
| "valid_targets_min": 1814 |
| }, |
| { |
| "epoch": 2.607594936708861, |
| "grad_norm": 0.24857564424554462, |
| "learning_rate": 2.2368079366130028e-05, |
| "loss": 0.4778, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06448166072368622, |
| "step": 130, |
| "valid_targets_mean": 6170.4, |
| "valid_targets_min": 1748 |
| }, |
| { |
| "epoch": 2.708860759493671, |
| "grad_norm": 0.24223407823647905, |
| "learning_rate": 2.097699539591227e-05, |
| "loss": 0.4962, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055263325572013855, |
| "step": 135, |
| "valid_targets_mean": 5147.0, |
| "valid_targets_min": 1780 |
| }, |
| { |
| "epoch": 2.810126582278481, |
| "grad_norm": 0.25190029062889885, |
| "learning_rate": 1.9581151602332865e-05, |
| "loss": 0.4765, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05278221517801285, |
| "step": 140, |
| "valid_targets_mean": 4165.2, |
| "valid_targets_min": 1059 |
| }, |
| { |
| "epoch": 2.911392405063291, |
| "grad_norm": 0.23072238058861513, |
| "learning_rate": 1.8187348396044402e-05, |
| "loss": 0.487, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.057958684861660004, |
| "step": 145, |
| "valid_targets_mean": 5567.4, |
| "valid_targets_min": 2038 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.35788077181586037, |
| "learning_rate": 1.6802376246163307e-05, |
| "loss": 0.4775, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17279455065727234, |
| "step": 150, |
| "valid_targets_mean": 6114.6, |
| "valid_targets_min": 4298 |
| }, |
| { |
| "epoch": 3.1012658227848102, |
| "grad_norm": 0.22842034105067952, |
| "learning_rate": 1.5432982597786886e-05, |
| "loss": 0.4615, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.057237960398197174, |
| "step": 155, |
| "valid_targets_mean": 7781.5, |
| "valid_targets_min": 3630 |
| }, |
| { |
| "epoch": 3.2025316455696204, |
| "grad_norm": 0.25279304012367937, |
| "learning_rate": 1.4085838999119075e-05, |
| "loss": 0.4881, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06189911440014839, |
| "step": 160, |
| "valid_targets_mean": 5685.0, |
| "valid_targets_min": 1780 |
| }, |
| { |
| "epoch": 3.3037974683544302, |
| "grad_norm": 0.24789675995551264, |
| "learning_rate": 1.2767508598358158e-05, |
| "loss": 0.4791, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07715524733066559, |
| "step": 165, |
| "valid_targets_mean": 6161.6, |
| "valid_targets_min": 2145 |
| }, |
| { |
| "epoch": 3.4050632911392404, |
| "grad_norm": 0.23756645723447042, |
| "learning_rate": 1.1484414168698547e-05, |
| "loss": 0.4649, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06180926039814949, |
| "step": 170, |
| "valid_targets_mean": 5851.0, |
| "valid_targets_min": 4620 |
| }, |
| { |
| "epoch": 3.5063291139240507, |
| "grad_norm": 0.2074147259270552, |
| "learning_rate": 1.0242806817225344e-05, |
| "loss": 0.4772, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0647956132888794, |
| "step": 175, |
| "valid_targets_mean": 6451.0, |
| "valid_targets_min": 4490 |
| }, |
| { |
| "epoch": 3.607594936708861, |
| "grad_norm": 0.222985373654853, |
| "learning_rate": 9.048735530148998e-06, |
| "loss": 0.4767, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07306292653083801, |
| "step": 180, |
| "valid_targets_mean": 7238.0, |
| "valid_targets_min": 4225 |
| }, |
| { |
| "epoch": 3.708860759493671, |
| "grad_norm": 0.24693082038587955, |
| "learning_rate": 7.908017702752504e-06, |
| "loss": 0.483, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.058695971965789795, |
| "step": 185, |
| "valid_targets_mean": 5157.0, |
| "valid_targets_min": 1544 |
| }, |
| { |
| "epoch": 3.810126582278481, |
| "grad_norm": 0.21476601473174206, |
| "learning_rate": 6.826210797626389e-06, |
| "loss": 0.484, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05881170928478241, |
| "step": 190, |
| "valid_targets_mean": 6449.1, |
| "valid_targets_min": 4147 |
| }, |
| { |
| "epoch": 3.911392405063291, |
| "grad_norm": 0.23899265053004184, |
| "learning_rate": 5.8085852692695864e-06, |
| "loss": 0.472, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053444743156433105, |
| "step": 195, |
| "valid_targets_mean": 5982.6, |
| "valid_targets_min": 4017 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.38057113948556304, |
| "learning_rate": 4.8600988869648745e-06, |
| "loss": 0.4598, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14350973069667816, |
| "step": 200, |
| "valid_targets_mean": 5090.1, |
| "valid_targets_min": 1703 |
| }, |
| { |
| "epoch": 4.10126582278481, |
| "grad_norm": 0.23532229738444221, |
| "learning_rate": 3.985372581025333e-06, |
| "loss": 0.4809, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06552933156490326, |
| "step": 205, |
| "valid_targets_mean": 6486.4, |
| "valid_targets_min": 4490 |
| }, |
| { |
| "epoch": 4.2025316455696204, |
| "grad_norm": 0.2188409024433163, |
| "learning_rate": 3.1886679300863156e-06, |
| "loss": 0.4712, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04854092746973038, |
| "step": 210, |
| "valid_targets_mean": 5235.5, |
| "valid_targets_min": 2016 |
| }, |
| { |
| "epoch": 4.30379746835443, |
| "grad_norm": 0.23226895996673688, |
| "learning_rate": 2.473866399122733e-06, |
| "loss": 0.4698, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04958043992519379, |
| "step": 215, |
| "valid_targets_mean": 4248.0, |
| "valid_targets_min": 1703 |
| }, |
| { |
| "epoch": 4.405063291139241, |
| "grad_norm": 0.20534990029779562, |
| "learning_rate": 1.8444504293418286e-06, |
| "loss": 0.4659, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06132490187883377, |
| "step": 220, |
| "valid_targets_mean": 5857.9, |
| "valid_targets_min": 3622 |
| }, |
| { |
| "epoch": 4.506329113924051, |
| "grad_norm": 0.21045777391697457, |
| "learning_rate": 1.3034864720797112e-06, |
| "loss": 0.4578, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04531293734908104, |
| "step": 225, |
| "valid_targets_mean": 4599.2, |
| "valid_targets_min": 1479 |
| }, |
| { |
| "epoch": 4.6075949367088604, |
| "grad_norm": 0.25733866620896073, |
| "learning_rate": 8.536100493586552e-07, |
| "loss": 0.4614, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05553407967090607, |
| "step": 230, |
| "valid_targets_mean": 6007.9, |
| "valid_targets_min": 1799 |
| }, |
| { |
| "epoch": 4.708860759493671, |
| "grad_norm": 0.22037233350715793, |
| "learning_rate": 4.970129138887347e-07, |
| "loss": 0.4704, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05725882947444916, |
| "step": 235, |
| "valid_targets_mean": 4865.5, |
| "valid_targets_min": 1546 |
| }, |
| { |
| "epoch": 4.810126582278481, |
| "grad_norm": 0.2174665831033445, |
| "learning_rate": 2.3543237106894434e-07, |
| "loss": 0.4778, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.049080438911914825, |
| "step": 240, |
| "valid_targets_mean": 6412.1, |
| "valid_targets_min": 1393 |
| }, |
| { |
| "epoch": 4.911392405063291, |
| "grad_norm": 0.2267785532190801, |
| "learning_rate": 7.01428150099126e-08, |
| "loss": 0.4807, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.056198298931121826, |
| "step": 245, |
| "valid_targets_mean": 5363.9, |
| "valid_targets_min": 4209 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.3766849273372062, |
| "learning_rate": 1.949519813915224e-09, |
| "loss": 0.4731, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1424449235200882, |
| "step": 250, |
| "valid_targets_mean": 4742.4, |
| "valid_targets_min": 1340 |
| }, |
| { |
| "epoch": 5.0, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1424449235200882, |
| "step": 250, |
| "total_flos": 7.594294192772219e+17, |
| "train_loss": 0.5147013063430786, |
| "train_runtime": 9538.6713, |
| "train_samples_per_second": 1.656, |
| "train_steps_per_second": 0.026, |
| "valid_targets_mean": 4742.4, |
| "valid_targets_min": 1340 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.594294192772219e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|