| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 116.47855530474041, | |
| "global_step": 774000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.988713318284425e-05, | |
| "loss": 6.9537, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 6.797055244445801, | |
| "eval_runtime": 118.7849, | |
| "eval_samples_per_second": 103.456, | |
| "eval_steps_per_second": 6.474, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.9774266365688486e-05, | |
| "loss": 6.6451, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 6.689827919006348, | |
| "eval_runtime": 118.3538, | |
| "eval_samples_per_second": 103.833, | |
| "eval_steps_per_second": 6.497, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.966139954853273e-05, | |
| "loss": 6.5518, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_loss": 6.630918979644775, | |
| "eval_runtime": 118.3302, | |
| "eval_samples_per_second": 103.853, | |
| "eval_steps_per_second": 6.499, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.954853273137698e-05, | |
| "loss": 6.4713, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 6.497533798217773, | |
| "eval_runtime": 118.3323, | |
| "eval_samples_per_second": 103.852, | |
| "eval_steps_per_second": 6.499, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 4.9435665914221216e-05, | |
| "loss": 6.0827, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "eval_loss": 5.684892654418945, | |
| "eval_runtime": 118.3572, | |
| "eval_samples_per_second": 103.83, | |
| "eval_steps_per_second": 6.497, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 4.932279909706546e-05, | |
| "loss": 5.0663, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "eval_loss": 4.617100715637207, | |
| "eval_runtime": 118.3432, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 4.920993227990971e-05, | |
| "loss": 4.3025, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "eval_loss": 4.159748077392578, | |
| "eval_runtime": 118.3398, | |
| "eval_samples_per_second": 103.845, | |
| "eval_steps_per_second": 6.498, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 4.909706546275395e-05, | |
| "loss": 3.9214, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "eval_loss": 3.8544375896453857, | |
| "eval_runtime": 118.3517, | |
| "eval_samples_per_second": 103.835, | |
| "eval_steps_per_second": 6.498, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 4.89841986455982e-05, | |
| "loss": 3.6779, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "eval_loss": 3.664363384246826, | |
| "eval_runtime": 118.2895, | |
| "eval_samples_per_second": 103.889, | |
| "eval_steps_per_second": 6.501, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 4.887133182844244e-05, | |
| "loss": 3.502, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_loss": 3.5141005516052246, | |
| "eval_runtime": 118.2983, | |
| "eval_samples_per_second": 103.881, | |
| "eval_steps_per_second": 6.501, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 4.875846501128669e-05, | |
| "loss": 3.366, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "eval_loss": 3.4036142826080322, | |
| "eval_runtime": 118.3098, | |
| "eval_samples_per_second": 103.871, | |
| "eval_steps_per_second": 6.5, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 4.864559819413093e-05, | |
| "loss": 3.2695, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "eval_loss": 3.321831226348877, | |
| "eval_runtime": 118.3723, | |
| "eval_samples_per_second": 103.817, | |
| "eval_steps_per_second": 6.496, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 4.853273137697517e-05, | |
| "loss": 3.1758, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_loss": 3.2432045936584473, | |
| "eval_runtime": 118.355, | |
| "eval_samples_per_second": 103.832, | |
| "eval_steps_per_second": 6.497, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 4.841986455981942e-05, | |
| "loss": 3.1008, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "eval_loss": 3.186511754989624, | |
| "eval_runtime": 118.3602, | |
| "eval_samples_per_second": 103.827, | |
| "eval_steps_per_second": 6.497, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 4.830699774266366e-05, | |
| "loss": 3.0354, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "eval_loss": 3.115652561187744, | |
| "eval_runtime": 118.3431, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 4.81941309255079e-05, | |
| "loss": 2.9798, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "eval_loss": 3.0752041339874268, | |
| "eval_runtime": 118.2033, | |
| "eval_samples_per_second": 103.965, | |
| "eval_steps_per_second": 6.506, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 11.51, | |
| "learning_rate": 4.808126410835215e-05, | |
| "loss": 2.9252, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 11.51, | |
| "eval_loss": 3.028315305709839, | |
| "eval_runtime": 118.2129, | |
| "eval_samples_per_second": 103.956, | |
| "eval_steps_per_second": 6.505, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "learning_rate": 4.796839729119639e-05, | |
| "loss": 2.881, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "eval_loss": 2.9871439933776855, | |
| "eval_runtime": 118.1982, | |
| "eval_samples_per_second": 103.969, | |
| "eval_steps_per_second": 6.506, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 12.87, | |
| "learning_rate": 4.785553047404063e-05, | |
| "loss": 2.8366, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 12.87, | |
| "eval_loss": 2.9422881603240967, | |
| "eval_runtime": 118.1796, | |
| "eval_samples_per_second": 103.986, | |
| "eval_steps_per_second": 6.507, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "learning_rate": 4.774266365688488e-05, | |
| "loss": 2.7917, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "eval_loss": 2.9027907848358154, | |
| "eval_runtime": 118.1933, | |
| "eval_samples_per_second": 103.974, | |
| "eval_steps_per_second": 6.506, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 4.762979683972912e-05, | |
| "loss": 2.7592, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "eval_loss": 2.8720462322235107, | |
| "eval_runtime": 118.2133, | |
| "eval_samples_per_second": 103.956, | |
| "eval_steps_per_second": 6.505, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "learning_rate": 4.751693002257336e-05, | |
| "loss": 2.7278, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "eval_loss": 2.8500328063964844, | |
| "eval_runtime": 118.2044, | |
| "eval_samples_per_second": 103.964, | |
| "eval_steps_per_second": 6.506, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 15.58, | |
| "learning_rate": 4.740406320541761e-05, | |
| "loss": 2.693, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 15.58, | |
| "eval_loss": 2.817178249359131, | |
| "eval_runtime": 118.1867, | |
| "eval_samples_per_second": 103.98, | |
| "eval_steps_per_second": 6.507, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 4.729119638826185e-05, | |
| "loss": 2.6645, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "eval_loss": 2.786304235458374, | |
| "eval_runtime": 118.2219, | |
| "eval_samples_per_second": 103.949, | |
| "eval_steps_per_second": 6.505, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "learning_rate": 4.71783295711061e-05, | |
| "loss": 2.6361, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "eval_loss": 2.770569324493408, | |
| "eval_runtime": 118.2234, | |
| "eval_samples_per_second": 103.947, | |
| "eval_steps_per_second": 6.505, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 4.706546275395034e-05, | |
| "loss": 2.6083, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "eval_loss": 2.7391059398651123, | |
| "eval_runtime": 118.2576, | |
| "eval_samples_per_second": 103.917, | |
| "eval_steps_per_second": 6.503, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "learning_rate": 4.695259593679459e-05, | |
| "loss": 2.5847, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "eval_loss": 2.718665838241577, | |
| "eval_runtime": 118.2124, | |
| "eval_samples_per_second": 103.957, | |
| "eval_steps_per_second": 6.505, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "learning_rate": 4.6839729119638834e-05, | |
| "loss": 2.5619, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "eval_loss": 2.7032158374786377, | |
| "eval_runtime": 118.2283, | |
| "eval_samples_per_second": 103.943, | |
| "eval_steps_per_second": 6.504, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "learning_rate": 4.672686230248307e-05, | |
| "loss": 2.5368, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "eval_loss": 2.6911468505859375, | |
| "eval_runtime": 118.3184, | |
| "eval_samples_per_second": 103.864, | |
| "eval_steps_per_second": 6.499, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 20.32, | |
| "learning_rate": 4.661399548532732e-05, | |
| "loss": 2.5203, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 20.32, | |
| "eval_loss": 2.666966676712036, | |
| "eval_runtime": 118.3095, | |
| "eval_samples_per_second": 103.872, | |
| "eval_steps_per_second": 6.5, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "learning_rate": 4.6501128668171564e-05, | |
| "loss": 2.4997, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_loss": 2.6472320556640625, | |
| "eval_runtime": 118.1415, | |
| "eval_samples_per_second": 104.019, | |
| "eval_steps_per_second": 6.509, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 21.67, | |
| "learning_rate": 4.63882618510158e-05, | |
| "loss": 2.4755, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 21.67, | |
| "eval_loss": 2.6281678676605225, | |
| "eval_runtime": 118.147, | |
| "eval_samples_per_second": 104.014, | |
| "eval_steps_per_second": 6.509, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 22.35, | |
| "learning_rate": 4.627539503386005e-05, | |
| "loss": 2.4593, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 22.35, | |
| "eval_loss": 2.6077518463134766, | |
| "eval_runtime": 118.4066, | |
| "eval_samples_per_second": 103.786, | |
| "eval_steps_per_second": 6.495, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 23.02, | |
| "learning_rate": 4.616252821670429e-05, | |
| "loss": 2.4468, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 23.02, | |
| "eval_loss": 2.60119366645813, | |
| "eval_runtime": 118.2017, | |
| "eval_samples_per_second": 103.966, | |
| "eval_steps_per_second": 6.506, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "learning_rate": 4.604966139954853e-05, | |
| "loss": 2.4243, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "eval_loss": 2.583709239959717, | |
| "eval_runtime": 118.0992, | |
| "eval_samples_per_second": 104.057, | |
| "eval_steps_per_second": 6.511, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 24.38, | |
| "learning_rate": 4.593679458239278e-05, | |
| "loss": 2.4093, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 24.38, | |
| "eval_loss": 2.5716421604156494, | |
| "eval_runtime": 118.1155, | |
| "eval_samples_per_second": 104.042, | |
| "eval_steps_per_second": 6.511, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 25.06, | |
| "learning_rate": 4.582392776523702e-05, | |
| "loss": 2.396, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 25.06, | |
| "eval_loss": 2.561039686203003, | |
| "eval_runtime": 118.1545, | |
| "eval_samples_per_second": 104.008, | |
| "eval_steps_per_second": 6.508, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "learning_rate": 4.571106094808127e-05, | |
| "loss": 2.3764, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "eval_loss": 2.543470859527588, | |
| "eval_runtime": 118.1796, | |
| "eval_samples_per_second": 103.986, | |
| "eval_steps_per_second": 6.507, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "learning_rate": 4.559819413092551e-05, | |
| "loss": 2.3623, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "eval_loss": 2.5341155529022217, | |
| "eval_runtime": 118.1214, | |
| "eval_samples_per_second": 104.037, | |
| "eval_steps_per_second": 6.51, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "learning_rate": 4.548532731376975e-05, | |
| "loss": 2.3529, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "eval_loss": 2.5200819969177246, | |
| "eval_runtime": 118.324, | |
| "eval_samples_per_second": 103.859, | |
| "eval_steps_per_second": 6.499, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "learning_rate": 4.5372460496614e-05, | |
| "loss": 2.3393, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "eval_loss": 2.5085155963897705, | |
| "eval_runtime": 118.3431, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "learning_rate": 4.525959367945824e-05, | |
| "loss": 2.3247, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "eval_loss": 2.5002756118774414, | |
| "eval_runtime": 118.3427, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "learning_rate": 4.514672686230249e-05, | |
| "loss": 2.3127, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "eval_loss": 2.4838666915893555, | |
| "eval_runtime": 118.3361, | |
| "eval_samples_per_second": 103.848, | |
| "eval_steps_per_second": 6.498, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 29.8, | |
| "learning_rate": 4.5033860045146734e-05, | |
| "loss": 2.3006, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 29.8, | |
| "eval_loss": 2.480976104736328, | |
| "eval_runtime": 118.3346, | |
| "eval_samples_per_second": 103.85, | |
| "eval_steps_per_second": 6.499, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 30.47, | |
| "learning_rate": 4.492099322799097e-05, | |
| "loss": 2.2896, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 30.47, | |
| "eval_loss": 2.4641942977905273, | |
| "eval_runtime": 118.3398, | |
| "eval_samples_per_second": 103.845, | |
| "eval_steps_per_second": 6.498, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 31.15, | |
| "learning_rate": 4.480812641083522e-05, | |
| "loss": 2.2789, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 31.15, | |
| "eval_loss": 2.4657058715820312, | |
| "eval_runtime": 118.3281, | |
| "eval_samples_per_second": 103.855, | |
| "eval_steps_per_second": 6.499, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 31.83, | |
| "learning_rate": 4.4695259593679463e-05, | |
| "loss": 2.2665, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 31.83, | |
| "eval_loss": 2.4447216987609863, | |
| "eval_runtime": 118.3432, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 32.51, | |
| "learning_rate": 4.45823927765237e-05, | |
| "loss": 2.2545, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 32.51, | |
| "eval_loss": 2.4484477043151855, | |
| "eval_runtime": 118.3442, | |
| "eval_samples_per_second": 103.841, | |
| "eval_steps_per_second": 6.498, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 33.18, | |
| "learning_rate": 4.446952595936795e-05, | |
| "loss": 2.2446, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 33.18, | |
| "eval_loss": 2.4324302673339844, | |
| "eval_runtime": 118.3492, | |
| "eval_samples_per_second": 103.837, | |
| "eval_steps_per_second": 6.498, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 33.86, | |
| "learning_rate": 4.435665914221219e-05, | |
| "loss": 2.2352, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 33.86, | |
| "eval_loss": 2.426417827606201, | |
| "eval_runtime": 118.3545, | |
| "eval_samples_per_second": 103.832, | |
| "eval_steps_per_second": 6.497, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 34.54, | |
| "learning_rate": 4.424379232505644e-05, | |
| "loss": 2.2218, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 34.54, | |
| "eval_loss": 2.4175431728363037, | |
| "eval_runtime": 121.4712, | |
| "eval_samples_per_second": 101.168, | |
| "eval_steps_per_second": 6.331, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 4.413092550790068e-05, | |
| "loss": 2.2153, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "eval_loss": 2.4122180938720703, | |
| "eval_runtime": 121.3685, | |
| "eval_samples_per_second": 101.254, | |
| "eval_steps_per_second": 6.336, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 35.89, | |
| "learning_rate": 4.401805869074492e-05, | |
| "loss": 2.206, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 35.89, | |
| "eval_loss": 2.392340660095215, | |
| "eval_runtime": 121.4694, | |
| "eval_samples_per_second": 101.169, | |
| "eval_steps_per_second": 6.331, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 36.57, | |
| "learning_rate": 4.390519187358917e-05, | |
| "loss": 2.1931, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 36.57, | |
| "eval_loss": 2.386526584625244, | |
| "eval_runtime": 121.4241, | |
| "eval_samples_per_second": 101.207, | |
| "eval_steps_per_second": 6.333, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 37.25, | |
| "learning_rate": 4.379232505643341e-05, | |
| "loss": 2.1876, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 37.25, | |
| "eval_loss": 2.383101224899292, | |
| "eval_runtime": 121.3529, | |
| "eval_samples_per_second": 101.267, | |
| "eval_steps_per_second": 6.337, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 37.92, | |
| "learning_rate": 4.367945823927765e-05, | |
| "loss": 2.1817, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 37.92, | |
| "eval_loss": 2.3782711029052734, | |
| "eval_runtime": 121.4029, | |
| "eval_samples_per_second": 101.225, | |
| "eval_steps_per_second": 6.334, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 38.6, | |
| "learning_rate": 4.35665914221219e-05, | |
| "loss": 2.1661, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 38.6, | |
| "eval_loss": 2.3761754035949707, | |
| "eval_runtime": 121.5223, | |
| "eval_samples_per_second": 101.126, | |
| "eval_steps_per_second": 6.328, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 39.28, | |
| "learning_rate": 4.3453724604966136e-05, | |
| "loss": 2.1635, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 39.28, | |
| "eval_loss": 2.365755319595337, | |
| "eval_runtime": 121.4762, | |
| "eval_samples_per_second": 101.164, | |
| "eval_steps_per_second": 6.33, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 39.95, | |
| "learning_rate": 4.334085778781038e-05, | |
| "loss": 2.1533, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 39.95, | |
| "eval_loss": 2.359434127807617, | |
| "eval_runtime": 121.2612, | |
| "eval_samples_per_second": 101.343, | |
| "eval_steps_per_second": 6.342, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 40.63, | |
| "learning_rate": 4.322799097065463e-05, | |
| "loss": 2.1444, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 40.63, | |
| "eval_loss": 2.3534085750579834, | |
| "eval_runtime": 121.4588, | |
| "eval_samples_per_second": 101.178, | |
| "eval_steps_per_second": 6.331, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 41.31, | |
| "learning_rate": 4.311512415349887e-05, | |
| "loss": 2.1389, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 41.31, | |
| "eval_loss": 2.3499608039855957, | |
| "eval_runtime": 121.4347, | |
| "eval_samples_per_second": 101.198, | |
| "eval_steps_per_second": 6.333, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "learning_rate": 4.300225733634312e-05, | |
| "loss": 2.1343, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "eval_loss": 2.33479642868042, | |
| "eval_runtime": 121.4769, | |
| "eval_samples_per_second": 101.163, | |
| "eval_steps_per_second": 6.33, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 42.66, | |
| "learning_rate": 4.2889390519187363e-05, | |
| "loss": 2.1204, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 42.66, | |
| "eval_loss": 2.338609457015991, | |
| "eval_runtime": 119.9199, | |
| "eval_samples_per_second": 102.477, | |
| "eval_steps_per_second": 6.413, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 43.34, | |
| "learning_rate": 4.277652370203161e-05, | |
| "loss": 2.1149, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 43.34, | |
| "eval_loss": 2.3366451263427734, | |
| "eval_runtime": 121.3615, | |
| "eval_samples_per_second": 101.259, | |
| "eval_steps_per_second": 6.336, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "learning_rate": 4.266365688487585e-05, | |
| "loss": 2.1124, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "eval_loss": 2.3272287845611572, | |
| "eval_runtime": 121.4223, | |
| "eval_samples_per_second": 101.209, | |
| "eval_steps_per_second": 6.333, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 44.7, | |
| "learning_rate": 4.255079006772009e-05, | |
| "loss": 2.0996, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 44.7, | |
| "eval_loss": 2.3160288333892822, | |
| "eval_runtime": 121.4331, | |
| "eval_samples_per_second": 101.2, | |
| "eval_steps_per_second": 6.333, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 45.37, | |
| "learning_rate": 4.243792325056434e-05, | |
| "loss": 2.0954, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 45.37, | |
| "eval_loss": 2.3165717124938965, | |
| "eval_runtime": 121.3269, | |
| "eval_samples_per_second": 101.288, | |
| "eval_steps_per_second": 6.338, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 46.05, | |
| "learning_rate": 4.232505643340858e-05, | |
| "loss": 2.0901, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 46.05, | |
| "eval_loss": 2.303679943084717, | |
| "eval_runtime": 121.3845, | |
| "eval_samples_per_second": 101.24, | |
| "eval_steps_per_second": 6.335, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 46.73, | |
| "learning_rate": 4.221218961625282e-05, | |
| "loss": 2.0819, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 46.73, | |
| "eval_loss": 2.2977073192596436, | |
| "eval_runtime": 121.4379, | |
| "eval_samples_per_second": 101.196, | |
| "eval_steps_per_second": 6.332, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 47.4, | |
| "learning_rate": 4.209932279909707e-05, | |
| "loss": 2.0725, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 47.4, | |
| "eval_loss": 2.3036298751831055, | |
| "eval_runtime": 121.4278, | |
| "eval_samples_per_second": 101.204, | |
| "eval_steps_per_second": 6.333, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 48.08, | |
| "learning_rate": 4.198645598194131e-05, | |
| "loss": 2.0729, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 48.08, | |
| "eval_loss": 2.2955193519592285, | |
| "eval_runtime": 121.3513, | |
| "eval_samples_per_second": 101.268, | |
| "eval_steps_per_second": 6.337, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 48.76, | |
| "learning_rate": 4.187358916478555e-05, | |
| "loss": 2.0621, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 48.76, | |
| "eval_loss": 2.284827947616577, | |
| "eval_runtime": 121.3713, | |
| "eval_samples_per_second": 101.251, | |
| "eval_steps_per_second": 6.336, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 49.44, | |
| "learning_rate": 4.17607223476298e-05, | |
| "loss": 2.055, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 49.44, | |
| "eval_loss": 2.2865021228790283, | |
| "eval_runtime": 121.4042, | |
| "eval_samples_per_second": 101.224, | |
| "eval_steps_per_second": 6.334, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 50.11, | |
| "learning_rate": 4.164785553047404e-05, | |
| "loss": 2.0514, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 50.11, | |
| "eval_loss": 2.2722549438476562, | |
| "eval_runtime": 121.3871, | |
| "eval_samples_per_second": 101.238, | |
| "eval_steps_per_second": 6.335, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 50.79, | |
| "learning_rate": 4.153498871331828e-05, | |
| "loss": 2.0427, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 50.79, | |
| "eval_loss": 2.2747364044189453, | |
| "eval_runtime": 121.4348, | |
| "eval_samples_per_second": 101.198, | |
| "eval_steps_per_second": 6.333, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 51.47, | |
| "learning_rate": 4.142212189616253e-05, | |
| "loss": 2.0398, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 51.47, | |
| "eval_loss": 2.275329113006592, | |
| "eval_runtime": 121.4165, | |
| "eval_samples_per_second": 101.214, | |
| "eval_steps_per_second": 6.334, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 52.14, | |
| "learning_rate": 4.130925507900677e-05, | |
| "loss": 2.0373, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 52.14, | |
| "eval_loss": 2.263934850692749, | |
| "eval_runtime": 121.4832, | |
| "eval_samples_per_second": 101.158, | |
| "eval_steps_per_second": 6.33, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 52.82, | |
| "learning_rate": 4.119638826185102e-05, | |
| "loss": 2.0293, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 52.82, | |
| "eval_loss": 2.2591116428375244, | |
| "eval_runtime": 121.453, | |
| "eval_samples_per_second": 101.183, | |
| "eval_steps_per_second": 6.332, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 53.5, | |
| "learning_rate": 4.108352144469526e-05, | |
| "loss": 2.0222, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 53.5, | |
| "eval_loss": 2.251147508621216, | |
| "eval_runtime": 121.2819, | |
| "eval_samples_per_second": 101.326, | |
| "eval_steps_per_second": 6.341, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 54.18, | |
| "learning_rate": 4.097065462753951e-05, | |
| "loss": 2.018, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 54.18, | |
| "eval_loss": 2.2465593814849854, | |
| "eval_runtime": 121.4341, | |
| "eval_samples_per_second": 101.199, | |
| "eval_steps_per_second": 6.333, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 54.85, | |
| "learning_rate": 4.085778781038375e-05, | |
| "loss": 2.0129, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 54.85, | |
| "eval_loss": 2.244495153427124, | |
| "eval_runtime": 121.4224, | |
| "eval_samples_per_second": 101.209, | |
| "eval_steps_per_second": 6.333, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 55.53, | |
| "learning_rate": 4.074492099322799e-05, | |
| "loss": 2.0071, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 55.53, | |
| "eval_loss": 2.244058609008789, | |
| "eval_runtime": 121.4271, | |
| "eval_samples_per_second": 101.205, | |
| "eval_steps_per_second": 6.333, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 56.21, | |
| "learning_rate": 4.063205417607224e-05, | |
| "loss": 2.0026, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 56.21, | |
| "eval_loss": 2.2374625205993652, | |
| "eval_runtime": 121.4591, | |
| "eval_samples_per_second": 101.178, | |
| "eval_steps_per_second": 6.331, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 56.88, | |
| "learning_rate": 4.0519187358916484e-05, | |
| "loss": 1.9989, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 56.88, | |
| "eval_loss": 2.2266647815704346, | |
| "eval_runtime": 121.4333, | |
| "eval_samples_per_second": 101.2, | |
| "eval_steps_per_second": 6.333, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 57.56, | |
| "learning_rate": 4.040632054176072e-05, | |
| "loss": 1.9917, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 57.56, | |
| "eval_loss": 2.2338218688964844, | |
| "eval_runtime": 121.4506, | |
| "eval_samples_per_second": 101.185, | |
| "eval_steps_per_second": 6.332, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 58.24, | |
| "learning_rate": 4.029345372460497e-05, | |
| "loss": 1.9869, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 58.24, | |
| "eval_loss": 2.226421594619751, | |
| "eval_runtime": 121.395, | |
| "eval_samples_per_second": 101.232, | |
| "eval_steps_per_second": 6.335, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 58.92, | |
| "learning_rate": 4.018058690744921e-05, | |
| "loss": 1.9855, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 58.92, | |
| "eval_loss": 2.22316837310791, | |
| "eval_runtime": 121.4361, | |
| "eval_samples_per_second": 101.197, | |
| "eval_steps_per_second": 6.333, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 59.59, | |
| "learning_rate": 4.006772009029345e-05, | |
| "loss": 1.975, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 59.59, | |
| "eval_loss": 2.221580743789673, | |
| "eval_runtime": 121.4217, | |
| "eval_samples_per_second": 101.209, | |
| "eval_steps_per_second": 6.333, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 60.27, | |
| "learning_rate": 3.99548532731377e-05, | |
| "loss": 1.9738, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 60.27, | |
| "eval_loss": 2.2099127769470215, | |
| "eval_runtime": 121.4414, | |
| "eval_samples_per_second": 101.193, | |
| "eval_steps_per_second": 6.332, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 60.95, | |
| "learning_rate": 3.984198645598194e-05, | |
| "loss": 1.9724, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 60.95, | |
| "eval_loss": 2.217116355895996, | |
| "eval_runtime": 121.4225, | |
| "eval_samples_per_second": 101.209, | |
| "eval_steps_per_second": 6.333, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 61.63, | |
| "learning_rate": 3.972911963882618e-05, | |
| "loss": 1.9643, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 61.63, | |
| "eval_loss": 2.2091891765594482, | |
| "eval_runtime": 121.3749, | |
| "eval_samples_per_second": 101.248, | |
| "eval_steps_per_second": 6.336, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 62.3, | |
| "learning_rate": 3.961625282167043e-05, | |
| "loss": 1.9582, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 62.3, | |
| "eval_loss": 2.2050740718841553, | |
| "eval_runtime": 121.3877, | |
| "eval_samples_per_second": 101.238, | |
| "eval_steps_per_second": 6.335, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 62.98, | |
| "learning_rate": 3.950338600451467e-05, | |
| "loss": 1.9596, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 62.98, | |
| "eval_loss": 2.2095320224761963, | |
| "eval_runtime": 121.3734, | |
| "eval_samples_per_second": 101.25, | |
| "eval_steps_per_second": 6.336, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 63.66, | |
| "learning_rate": 3.939051918735892e-05, | |
| "loss": 1.9491, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 63.66, | |
| "eval_loss": 2.201195478439331, | |
| "eval_runtime": 121.4462, | |
| "eval_samples_per_second": 101.189, | |
| "eval_steps_per_second": 6.332, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 64.33, | |
| "learning_rate": 3.927765237020316e-05, | |
| "loss": 1.9493, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 64.33, | |
| "eval_loss": 2.1953182220458984, | |
| "eval_runtime": 121.4109, | |
| "eval_samples_per_second": 101.218, | |
| "eval_steps_per_second": 6.334, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 65.01, | |
| "learning_rate": 3.916478555304741e-05, | |
| "loss": 1.946, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 65.01, | |
| "eval_loss": 2.1867878437042236, | |
| "eval_runtime": 119.4236, | |
| "eval_samples_per_second": 102.903, | |
| "eval_steps_per_second": 6.439, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 65.69, | |
| "learning_rate": 3.9051918735891654e-05, | |
| "loss": 1.9359, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 65.69, | |
| "eval_loss": 2.1980998516082764, | |
| "eval_runtime": 121.4109, | |
| "eval_samples_per_second": 101.218, | |
| "eval_steps_per_second": 6.334, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 66.37, | |
| "learning_rate": 3.893905191873589e-05, | |
| "loss": 1.9333, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 66.37, | |
| "eval_loss": 2.1945624351501465, | |
| "eval_runtime": 121.3328, | |
| "eval_samples_per_second": 101.283, | |
| "eval_steps_per_second": 6.338, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 67.04, | |
| "learning_rate": 3.882618510158014e-05, | |
| "loss": 1.9327, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 67.04, | |
| "eval_loss": 2.1841721534729004, | |
| "eval_runtime": 121.4364, | |
| "eval_samples_per_second": 101.197, | |
| "eval_steps_per_second": 6.333, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 67.72, | |
| "learning_rate": 3.8713318284424384e-05, | |
| "loss": 1.9243, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 67.72, | |
| "eval_loss": 2.189509391784668, | |
| "eval_runtime": 121.4244, | |
| "eval_samples_per_second": 101.207, | |
| "eval_steps_per_second": 6.333, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 68.4, | |
| "learning_rate": 3.860045146726862e-05, | |
| "loss": 1.9241, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 68.4, | |
| "eval_loss": 2.170930862426758, | |
| "eval_runtime": 121.4233, | |
| "eval_samples_per_second": 101.208, | |
| "eval_steps_per_second": 6.333, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 69.07, | |
| "learning_rate": 3.848758465011287e-05, | |
| "loss": 1.9183, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 69.07, | |
| "eval_loss": 2.18388032913208, | |
| "eval_runtime": 121.4294, | |
| "eval_samples_per_second": 101.203, | |
| "eval_steps_per_second": 6.333, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 69.75, | |
| "learning_rate": 3.837471783295711e-05, | |
| "loss": 1.916, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 69.75, | |
| "eval_loss": 2.179508686065674, | |
| "eval_runtime": 121.3882, | |
| "eval_samples_per_second": 101.237, | |
| "eval_steps_per_second": 6.335, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 70.43, | |
| "learning_rate": 3.826185101580136e-05, | |
| "loss": 1.9105, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 70.43, | |
| "eval_loss": 2.1771745681762695, | |
| "eval_runtime": 121.4394, | |
| "eval_samples_per_second": 101.194, | |
| "eval_steps_per_second": 6.332, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "learning_rate": 3.81489841986456e-05, | |
| "loss": 1.9117, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "eval_loss": 2.181852340698242, | |
| "eval_runtime": 121.3465, | |
| "eval_samples_per_second": 101.272, | |
| "eval_steps_per_second": 6.337, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 71.78, | |
| "learning_rate": 3.803611738148984e-05, | |
| "loss": 1.9041, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 71.78, | |
| "eval_loss": 2.1662580966949463, | |
| "eval_runtime": 121.4449, | |
| "eval_samples_per_second": 101.19, | |
| "eval_steps_per_second": 6.332, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 72.46, | |
| "learning_rate": 3.792325056433409e-05, | |
| "loss": 1.9021, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 72.46, | |
| "eval_loss": 2.165590524673462, | |
| "eval_runtime": 121.4569, | |
| "eval_samples_per_second": 101.18, | |
| "eval_steps_per_second": 6.331, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 73.14, | |
| "learning_rate": 3.781038374717833e-05, | |
| "loss": 1.8995, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 73.14, | |
| "eval_loss": 2.1610703468322754, | |
| "eval_runtime": 121.4207, | |
| "eval_samples_per_second": 101.21, | |
| "eval_steps_per_second": 6.333, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 73.81, | |
| "learning_rate": 3.769751693002257e-05, | |
| "loss": 1.896, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 73.81, | |
| "eval_loss": 2.1603007316589355, | |
| "eval_runtime": 121.3989, | |
| "eval_samples_per_second": 101.228, | |
| "eval_steps_per_second": 6.334, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 74.49, | |
| "learning_rate": 3.758465011286682e-05, | |
| "loss": 1.8901, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 74.49, | |
| "eval_loss": 2.158705711364746, | |
| "eval_runtime": 121.4675, | |
| "eval_samples_per_second": 101.171, | |
| "eval_steps_per_second": 6.331, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 75.17, | |
| "learning_rate": 3.747178329571106e-05, | |
| "loss": 1.8887, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 75.17, | |
| "eval_loss": 2.152022123336792, | |
| "eval_runtime": 121.4184, | |
| "eval_samples_per_second": 101.212, | |
| "eval_steps_per_second": 6.333, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 75.85, | |
| "learning_rate": 3.735891647855531e-05, | |
| "loss": 1.8855, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 75.85, | |
| "eval_loss": 2.1545896530151367, | |
| "eval_runtime": 121.4036, | |
| "eval_samples_per_second": 101.224, | |
| "eval_steps_per_second": 6.334, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 76.52, | |
| "learning_rate": 3.7246049661399554e-05, | |
| "loss": 1.8802, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 76.52, | |
| "eval_loss": 2.1495370864868164, | |
| "eval_runtime": 121.4898, | |
| "eval_samples_per_second": 101.153, | |
| "eval_steps_per_second": 6.33, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 77.2, | |
| "learning_rate": 3.71331828442438e-05, | |
| "loss": 1.8783, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 77.2, | |
| "eval_loss": 2.1447861194610596, | |
| "eval_runtime": 121.3997, | |
| "eval_samples_per_second": 101.228, | |
| "eval_steps_per_second": 6.334, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 77.88, | |
| "learning_rate": 3.702031602708804e-05, | |
| "loss": 1.8743, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 77.88, | |
| "eval_loss": 2.1373064517974854, | |
| "eval_runtime": 121.3755, | |
| "eval_samples_per_second": 101.248, | |
| "eval_steps_per_second": 6.336, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 78.56, | |
| "learning_rate": 3.6907449209932284e-05, | |
| "loss": 1.8679, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 78.56, | |
| "eval_loss": 2.1462478637695312, | |
| "eval_runtime": 121.3988, | |
| "eval_samples_per_second": 101.228, | |
| "eval_steps_per_second": 6.334, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 79.23, | |
| "learning_rate": 3.679458239277653e-05, | |
| "loss": 1.8681, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 79.23, | |
| "eval_loss": 2.14402174949646, | |
| "eval_runtime": 121.4163, | |
| "eval_samples_per_second": 101.214, | |
| "eval_steps_per_second": 6.334, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 79.91, | |
| "learning_rate": 3.668171557562077e-05, | |
| "loss": 1.8649, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 79.91, | |
| "eval_loss": 2.1338822841644287, | |
| "eval_runtime": 121.3916, | |
| "eval_samples_per_second": 101.234, | |
| "eval_steps_per_second": 6.335, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 80.59, | |
| "learning_rate": 3.656884875846501e-05, | |
| "loss": 1.8593, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 80.59, | |
| "eval_loss": 2.139404296875, | |
| "eval_runtime": 121.4277, | |
| "eval_samples_per_second": 101.204, | |
| "eval_steps_per_second": 6.333, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 81.26, | |
| "learning_rate": 3.645598194130926e-05, | |
| "loss": 1.8592, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 81.26, | |
| "eval_loss": 2.1354503631591797, | |
| "eval_runtime": 121.402, | |
| "eval_samples_per_second": 101.226, | |
| "eval_steps_per_second": 6.334, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 81.94, | |
| "learning_rate": 3.63431151241535e-05, | |
| "loss": 1.8569, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 81.94, | |
| "eval_loss": 2.135469436645508, | |
| "eval_runtime": 121.3789, | |
| "eval_samples_per_second": 101.245, | |
| "eval_steps_per_second": 6.336, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 82.62, | |
| "learning_rate": 3.623024830699774e-05, | |
| "loss": 1.849, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 82.62, | |
| "eval_loss": 2.1346044540405273, | |
| "eval_runtime": 121.4745, | |
| "eval_samples_per_second": 101.165, | |
| "eval_steps_per_second": 6.331, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 83.3, | |
| "learning_rate": 3.611738148984199e-05, | |
| "loss": 1.8481, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 83.3, | |
| "eval_loss": 2.1314146518707275, | |
| "eval_runtime": 121.3262, | |
| "eval_samples_per_second": 101.289, | |
| "eval_steps_per_second": 6.338, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 83.97, | |
| "learning_rate": 3.600451467268623e-05, | |
| "loss": 1.8499, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 83.97, | |
| "eval_loss": 2.126936197280884, | |
| "eval_runtime": 121.4278, | |
| "eval_samples_per_second": 101.204, | |
| "eval_steps_per_second": 6.333, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 84.65, | |
| "learning_rate": 3.589164785553047e-05, | |
| "loss": 1.8394, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 84.65, | |
| "eval_loss": 2.118168592453003, | |
| "eval_runtime": 121.4516, | |
| "eval_samples_per_second": 101.184, | |
| "eval_steps_per_second": 6.332, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 85.33, | |
| "learning_rate": 3.577878103837472e-05, | |
| "loss": 1.8394, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 85.33, | |
| "eval_loss": 2.1206483840942383, | |
| "eval_runtime": 121.4259, | |
| "eval_samples_per_second": 101.206, | |
| "eval_steps_per_second": 6.333, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "learning_rate": 3.566591422121896e-05, | |
| "loss": 1.8408, | |
| "step": 571500 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 2.1219327449798584, | |
| "eval_runtime": 118.4077, | |
| "eval_samples_per_second": 103.785, | |
| "eval_steps_per_second": 6.495, | |
| "step": 571500 | |
| }, | |
| { | |
| "epoch": 86.68, | |
| "learning_rate": 3.555304740406321e-05, | |
| "loss": 1.832, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 86.68, | |
| "eval_loss": 2.1104817390441895, | |
| "eval_runtime": 118.1444, | |
| "eval_samples_per_second": 104.017, | |
| "eval_steps_per_second": 6.509, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 87.36, | |
| "learning_rate": 3.5440180586907454e-05, | |
| "loss": 1.8305, | |
| "step": 580500 | |
| }, | |
| { | |
| "epoch": 87.36, | |
| "eval_loss": 2.1246631145477295, | |
| "eval_runtime": 118.1028, | |
| "eval_samples_per_second": 104.053, | |
| "eval_steps_per_second": 6.511, | |
| "step": 580500 | |
| }, | |
| { | |
| "epoch": 88.04, | |
| "learning_rate": 3.53273137697517e-05, | |
| "loss": 1.8333, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 88.04, | |
| "eval_loss": 2.1201488971710205, | |
| "eval_runtime": 118.0958, | |
| "eval_samples_per_second": 104.06, | |
| "eval_steps_per_second": 6.512, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 88.71, | |
| "learning_rate": 3.521444695259594e-05, | |
| "loss": 1.8227, | |
| "step": 589500 | |
| }, | |
| { | |
| "epoch": 88.71, | |
| "eval_loss": 2.104985475540161, | |
| "eval_runtime": 118.0975, | |
| "eval_samples_per_second": 104.058, | |
| "eval_steps_per_second": 6.512, | |
| "step": 589500 | |
| }, | |
| { | |
| "epoch": 89.39, | |
| "learning_rate": 3.5101580135440183e-05, | |
| "loss": 1.8216, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 89.39, | |
| "eval_loss": 2.1118545532226562, | |
| "eval_runtime": 118.0937, | |
| "eval_samples_per_second": 104.061, | |
| "eval_steps_per_second": 6.512, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 90.07, | |
| "learning_rate": 3.498871331828443e-05, | |
| "loss": 1.8234, | |
| "step": 598500 | |
| }, | |
| { | |
| "epoch": 90.07, | |
| "eval_loss": 2.109297037124634, | |
| "eval_runtime": 118.1025, | |
| "eval_samples_per_second": 104.054, | |
| "eval_steps_per_second": 6.511, | |
| "step": 598500 | |
| }, | |
| { | |
| "epoch": 90.74, | |
| "learning_rate": 3.487584650112867e-05, | |
| "loss": 1.8162, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 90.74, | |
| "eval_loss": 2.0999834537506104, | |
| "eval_runtime": 118.0968, | |
| "eval_samples_per_second": 104.059, | |
| "eval_steps_per_second": 6.512, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 91.42, | |
| "learning_rate": 3.476297968397291e-05, | |
| "loss": 1.8153, | |
| "step": 607500 | |
| }, | |
| { | |
| "epoch": 91.42, | |
| "eval_loss": 2.110783576965332, | |
| "eval_runtime": 118.1749, | |
| "eval_samples_per_second": 103.99, | |
| "eval_steps_per_second": 6.507, | |
| "step": 607500 | |
| }, | |
| { | |
| "epoch": 92.1, | |
| "learning_rate": 3.465011286681716e-05, | |
| "loss": 1.8153, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 92.1, | |
| "eval_loss": 2.1009647846221924, | |
| "eval_runtime": 118.1986, | |
| "eval_samples_per_second": 103.969, | |
| "eval_steps_per_second": 6.506, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 92.78, | |
| "learning_rate": 3.4537246049661404e-05, | |
| "loss": 1.8095, | |
| "step": 616500 | |
| }, | |
| { | |
| "epoch": 92.78, | |
| "eval_loss": 2.0992209911346436, | |
| "eval_runtime": 118.1834, | |
| "eval_samples_per_second": 103.982, | |
| "eval_steps_per_second": 6.507, | |
| "step": 616500 | |
| }, | |
| { | |
| "epoch": 93.45, | |
| "learning_rate": 3.442437923250564e-05, | |
| "loss": 1.807, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 93.45, | |
| "eval_loss": 2.098292827606201, | |
| "eval_runtime": 118.1816, | |
| "eval_samples_per_second": 103.984, | |
| "eval_steps_per_second": 6.507, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 94.13, | |
| "learning_rate": 3.431151241534989e-05, | |
| "loss": 1.805, | |
| "step": 625500 | |
| }, | |
| { | |
| "epoch": 94.13, | |
| "eval_loss": 2.0988106727600098, | |
| "eval_runtime": 118.1659, | |
| "eval_samples_per_second": 103.998, | |
| "eval_steps_per_second": 6.508, | |
| "step": 625500 | |
| }, | |
| { | |
| "epoch": 94.81, | |
| "learning_rate": 3.4198645598194133e-05, | |
| "loss": 1.8015, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 94.81, | |
| "eval_loss": 2.0965840816497803, | |
| "eval_runtime": 118.1678, | |
| "eval_samples_per_second": 103.996, | |
| "eval_steps_per_second": 6.508, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 95.49, | |
| "learning_rate": 3.408577878103837e-05, | |
| "loss": 1.7964, | |
| "step": 634500 | |
| }, | |
| { | |
| "epoch": 95.49, | |
| "eval_loss": 2.0856127738952637, | |
| "eval_runtime": 118.3087, | |
| "eval_samples_per_second": 103.872, | |
| "eval_steps_per_second": 6.5, | |
| "step": 634500 | |
| }, | |
| { | |
| "epoch": 96.16, | |
| "learning_rate": 3.397291196388262e-05, | |
| "loss": 1.7988, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 96.16, | |
| "eval_loss": 2.0870988368988037, | |
| "eval_runtime": 118.2923, | |
| "eval_samples_per_second": 103.887, | |
| "eval_steps_per_second": 6.501, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 96.84, | |
| "learning_rate": 3.386004514672686e-05, | |
| "loss": 1.794, | |
| "step": 643500 | |
| }, | |
| { | |
| "epoch": 96.84, | |
| "eval_loss": 2.092172861099243, | |
| "eval_runtime": 118.3047, | |
| "eval_samples_per_second": 103.876, | |
| "eval_steps_per_second": 6.5, | |
| "step": 643500 | |
| }, | |
| { | |
| "epoch": 97.52, | |
| "learning_rate": 3.374717832957111e-05, | |
| "loss": 1.7917, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 97.52, | |
| "eval_loss": 2.0861566066741943, | |
| "eval_runtime": 118.2327, | |
| "eval_samples_per_second": 103.939, | |
| "eval_steps_per_second": 6.504, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 98.19, | |
| "learning_rate": 3.3634311512415354e-05, | |
| "loss": 1.79, | |
| "step": 652500 | |
| }, | |
| { | |
| "epoch": 98.19, | |
| "eval_loss": 2.0844566822052, | |
| "eval_runtime": 118.1297, | |
| "eval_samples_per_second": 104.03, | |
| "eval_steps_per_second": 6.51, | |
| "step": 652500 | |
| }, | |
| { | |
| "epoch": 98.87, | |
| "learning_rate": 3.35214446952596e-05, | |
| "loss": 1.788, | |
| "step": 657000 | |
| }, | |
| { | |
| "epoch": 98.87, | |
| "eval_loss": 2.0832607746124268, | |
| "eval_runtime": 118.1145, | |
| "eval_samples_per_second": 104.043, | |
| "eval_steps_per_second": 6.511, | |
| "step": 657000 | |
| }, | |
| { | |
| "epoch": 99.55, | |
| "learning_rate": 3.3408577878103845e-05, | |
| "loss": 1.7833, | |
| "step": 661500 | |
| }, | |
| { | |
| "epoch": 99.55, | |
| "eval_loss": 2.082475185394287, | |
| "eval_runtime": 118.0929, | |
| "eval_samples_per_second": 104.062, | |
| "eval_steps_per_second": 6.512, | |
| "step": 661500 | |
| }, | |
| { | |
| "epoch": 100.23, | |
| "learning_rate": 3.3295711060948083e-05, | |
| "loss": 1.7821, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 100.23, | |
| "eval_loss": 2.0830888748168945, | |
| "eval_runtime": 118.1331, | |
| "eval_samples_per_second": 104.027, | |
| "eval_steps_per_second": 6.51, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 100.9, | |
| "learning_rate": 3.318284424379233e-05, | |
| "loss": 1.7809, | |
| "step": 670500 | |
| }, | |
| { | |
| "epoch": 100.9, | |
| "eval_loss": 2.080984115600586, | |
| "eval_runtime": 118.0711, | |
| "eval_samples_per_second": 104.081, | |
| "eval_steps_per_second": 6.513, | |
| "step": 670500 | |
| }, | |
| { | |
| "epoch": 101.58, | |
| "learning_rate": 3.3069977426636574e-05, | |
| "loss": 1.7757, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 101.58, | |
| "eval_loss": 2.078061819076538, | |
| "eval_runtime": 118.0799, | |
| "eval_samples_per_second": 104.074, | |
| "eval_steps_per_second": 6.513, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 102.26, | |
| "learning_rate": 3.295711060948081e-05, | |
| "loss": 1.7737, | |
| "step": 679500 | |
| }, | |
| { | |
| "epoch": 102.26, | |
| "eval_loss": 2.079832077026367, | |
| "eval_runtime": 118.3274, | |
| "eval_samples_per_second": 103.856, | |
| "eval_steps_per_second": 6.499, | |
| "step": 679500 | |
| }, | |
| { | |
| "epoch": 102.93, | |
| "learning_rate": 3.284424379232506e-05, | |
| "loss": 1.7738, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 102.93, | |
| "eval_loss": 2.0702972412109375, | |
| "eval_runtime": 118.0753, | |
| "eval_samples_per_second": 104.078, | |
| "eval_steps_per_second": 6.513, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 103.61, | |
| "learning_rate": 3.2731376975169304e-05, | |
| "loss": 1.7701, | |
| "step": 688500 | |
| }, | |
| { | |
| "epoch": 103.61, | |
| "eval_loss": 2.0662286281585693, | |
| "eval_runtime": 118.9742, | |
| "eval_samples_per_second": 103.291, | |
| "eval_steps_per_second": 6.464, | |
| "step": 688500 | |
| }, | |
| { | |
| "epoch": 104.29, | |
| "learning_rate": 3.261851015801354e-05, | |
| "loss": 1.7682, | |
| "step": 693000 | |
| }, | |
| { | |
| "epoch": 104.29, | |
| "eval_loss": 2.070204973220825, | |
| "eval_runtime": 118.3434, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 6.498, | |
| "step": 693000 | |
| }, | |
| { | |
| "epoch": 104.97, | |
| "learning_rate": 3.250564334085779e-05, | |
| "loss": 1.7669, | |
| "step": 697500 | |
| }, | |
| { | |
| "epoch": 104.97, | |
| "eval_loss": 2.063176155090332, | |
| "eval_runtime": 118.357, | |
| "eval_samples_per_second": 103.83, | |
| "eval_steps_per_second": 6.497, | |
| "step": 697500 | |
| }, | |
| { | |
| "epoch": 105.64, | |
| "learning_rate": 3.239277652370203e-05, | |
| "loss": 1.7616, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 105.64, | |
| "eval_loss": 2.067533254623413, | |
| "eval_runtime": 118.3349, | |
| "eval_samples_per_second": 103.849, | |
| "eval_steps_per_second": 6.499, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 106.32, | |
| "learning_rate": 3.227990970654628e-05, | |
| "loss": 1.7623, | |
| "step": 706500 | |
| }, | |
| { | |
| "epoch": 106.32, | |
| "eval_loss": 2.0670344829559326, | |
| "eval_runtime": 118.3389, | |
| "eval_samples_per_second": 103.846, | |
| "eval_steps_per_second": 6.498, | |
| "step": 706500 | |
| }, | |
| { | |
| "epoch": 107.0, | |
| "learning_rate": 3.216704288939052e-05, | |
| "loss": 1.7623, | |
| "step": 711000 | |
| }, | |
| { | |
| "epoch": 107.0, | |
| "eval_loss": 2.0591020584106445, | |
| "eval_runtime": 118.3407, | |
| "eval_samples_per_second": 103.844, | |
| "eval_steps_per_second": 6.498, | |
| "step": 711000 | |
| }, | |
| { | |
| "epoch": 107.67, | |
| "learning_rate": 3.205417607223476e-05, | |
| "loss": 1.7553, | |
| "step": 715500 | |
| }, | |
| { | |
| "epoch": 107.67, | |
| "eval_loss": 2.0580272674560547, | |
| "eval_runtime": 118.3269, | |
| "eval_samples_per_second": 103.856, | |
| "eval_steps_per_second": 6.499, | |
| "step": 715500 | |
| }, | |
| { | |
| "epoch": 108.35, | |
| "learning_rate": 3.194130925507901e-05, | |
| "loss": 1.753, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 108.35, | |
| "eval_loss": 2.0603325366973877, | |
| "eval_runtime": 118.3271, | |
| "eval_samples_per_second": 103.856, | |
| "eval_steps_per_second": 6.499, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 109.03, | |
| "learning_rate": 3.1828442437923254e-05, | |
| "loss": 1.7549, | |
| "step": 724500 | |
| }, | |
| { | |
| "epoch": 109.03, | |
| "eval_loss": 2.0661327838897705, | |
| "eval_runtime": 118.3395, | |
| "eval_samples_per_second": 103.845, | |
| "eval_steps_per_second": 6.498, | |
| "step": 724500 | |
| }, | |
| { | |
| "epoch": 109.71, | |
| "learning_rate": 3.17155756207675e-05, | |
| "loss": 1.7474, | |
| "step": 729000 | |
| }, | |
| { | |
| "epoch": 109.71, | |
| "eval_loss": 2.0576255321502686, | |
| "eval_runtime": 118.3567, | |
| "eval_samples_per_second": 103.83, | |
| "eval_steps_per_second": 6.497, | |
| "step": 729000 | |
| }, | |
| { | |
| "epoch": 110.38, | |
| "learning_rate": 3.1602708803611745e-05, | |
| "loss": 1.7504, | |
| "step": 733500 | |
| }, | |
| { | |
| "epoch": 110.38, | |
| "eval_loss": 2.0617053508758545, | |
| "eval_runtime": 118.3259, | |
| "eval_samples_per_second": 103.857, | |
| "eval_steps_per_second": 6.499, | |
| "step": 733500 | |
| }, | |
| { | |
| "epoch": 111.06, | |
| "learning_rate": 3.148984198645598e-05, | |
| "loss": 1.7464, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 111.06, | |
| "eval_loss": 2.049917459487915, | |
| "eval_runtime": 118.3445, | |
| "eval_samples_per_second": 103.841, | |
| "eval_steps_per_second": 6.498, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 111.74, | |
| "learning_rate": 3.137697516930023e-05, | |
| "loss": 1.7432, | |
| "step": 742500 | |
| }, | |
| { | |
| "epoch": 111.74, | |
| "eval_loss": 2.056652069091797, | |
| "eval_runtime": 118.3587, | |
| "eval_samples_per_second": 103.828, | |
| "eval_steps_per_second": 6.497, | |
| "step": 742500 | |
| }, | |
| { | |
| "epoch": 112.42, | |
| "learning_rate": 3.1264108352144474e-05, | |
| "loss": 1.7404, | |
| "step": 747000 | |
| }, | |
| { | |
| "epoch": 112.42, | |
| "eval_loss": 2.0593619346618652, | |
| "eval_runtime": 118.3421, | |
| "eval_samples_per_second": 103.843, | |
| "eval_steps_per_second": 6.498, | |
| "step": 747000 | |
| }, | |
| { | |
| "epoch": 113.09, | |
| "learning_rate": 3.115124153498871e-05, | |
| "loss": 1.7438, | |
| "step": 751500 | |
| }, | |
| { | |
| "epoch": 113.09, | |
| "eval_loss": 2.045955181121826, | |
| "eval_runtime": 118.325, | |
| "eval_samples_per_second": 103.858, | |
| "eval_steps_per_second": 6.499, | |
| "step": 751500 | |
| }, | |
| { | |
| "epoch": 113.77, | |
| "learning_rate": 3.103837471783296e-05, | |
| "loss": 1.7375, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 113.77, | |
| "eval_loss": 2.055434465408325, | |
| "eval_runtime": 118.3498, | |
| "eval_samples_per_second": 103.836, | |
| "eval_steps_per_second": 6.498, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 114.45, | |
| "learning_rate": 3.0925507900677204e-05, | |
| "loss": 1.7336, | |
| "step": 760500 | |
| }, | |
| { | |
| "epoch": 114.45, | |
| "eval_loss": 2.040349006652832, | |
| "eval_runtime": 118.3546, | |
| "eval_samples_per_second": 103.832, | |
| "eval_steps_per_second": 6.497, | |
| "step": 760500 | |
| }, | |
| { | |
| "epoch": 115.12, | |
| "learning_rate": 3.081264108352145e-05, | |
| "loss": 1.7344, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 115.12, | |
| "eval_loss": 2.054170846939087, | |
| "eval_runtime": 118.3514, | |
| "eval_samples_per_second": 103.835, | |
| "eval_steps_per_second": 6.498, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 115.8, | |
| "learning_rate": 3.069977426636569e-05, | |
| "loss": 1.7324, | |
| "step": 769500 | |
| }, | |
| { | |
| "epoch": 115.8, | |
| "eval_loss": 2.042388439178467, | |
| "eval_runtime": 118.3282, | |
| "eval_samples_per_second": 103.855, | |
| "eval_steps_per_second": 6.499, | |
| "step": 769500 | |
| }, | |
| { | |
| "epoch": 116.48, | |
| "learning_rate": 3.058690744920993e-05, | |
| "loss": 1.7255, | |
| "step": 774000 | |
| }, | |
| { | |
| "epoch": 116.48, | |
| "eval_loss": 2.0501296520233154, | |
| "eval_runtime": 118.1548, | |
| "eval_samples_per_second": 104.008, | |
| "eval_steps_per_second": 6.508, | |
| "step": 774000 | |
| } | |
| ], | |
| "max_steps": 1993500, | |
| "num_train_epochs": 300, | |
| "total_flos": 6.520019673893634e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |