| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 565, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.92920353982301e-05, | |
| "loss": 2.8813, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 7.858407079646018e-05, | |
| "loss": 1.9089, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.9185810685157776, | |
| "eval_runtime": 1.818, | |
| "eval_samples_per_second": 6.601, | |
| "eval_steps_per_second": 1.65, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 7.787610619469027e-05, | |
| "loss": 1.0124, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 7.716814159292036e-05, | |
| "loss": 0.6625, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 0.5026406645774841, | |
| "eval_runtime": 1.8132, | |
| "eval_samples_per_second": 6.618, | |
| "eval_steps_per_second": 1.655, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.646017699115045e-05, | |
| "loss": 0.7557, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.575221238938054e-05, | |
| "loss": 0.6228, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 0.42135417461395264, | |
| "eval_runtime": 1.8168, | |
| "eval_samples_per_second": 6.605, | |
| "eval_steps_per_second": 1.651, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 7.504424778761063e-05, | |
| "loss": 0.5134, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.433628318584072e-05, | |
| "loss": 0.6733, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 0.3993542194366455, | |
| "eval_runtime": 1.8194, | |
| "eval_samples_per_second": 6.595, | |
| "eval_steps_per_second": 1.649, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.362831858407081e-05, | |
| "loss": 0.5836, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 7.292035398230088e-05, | |
| "loss": 0.5581, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.33813050389289856, | |
| "eval_runtime": 1.8219, | |
| "eval_samples_per_second": 6.586, | |
| "eval_steps_per_second": 1.647, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.221238938053097e-05, | |
| "loss": 0.4934, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 7.150442477876106e-05, | |
| "loss": 0.3853, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.3289722502231598, | |
| "eval_runtime": 1.8241, | |
| "eval_samples_per_second": 6.579, | |
| "eval_steps_per_second": 1.645, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.079646017699116e-05, | |
| "loss": 0.4525, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.008849557522125e-05, | |
| "loss": 0.4146, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 0.2982443571090698, | |
| "eval_runtime": 1.8249, | |
| "eval_samples_per_second": 6.576, | |
| "eval_steps_per_second": 1.644, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.938053097345134e-05, | |
| "loss": 0.483, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 6.867256637168142e-05, | |
| "loss": 0.4702, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 0.28516504168510437, | |
| "eval_runtime": 1.8265, | |
| "eval_samples_per_second": 6.57, | |
| "eval_steps_per_second": 1.642, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 6.79646017699115e-05, | |
| "loss": 0.3644, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.72566371681416e-05, | |
| "loss": 0.2309, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.3017539978027344, | |
| "eval_runtime": 1.8277, | |
| "eval_samples_per_second": 6.566, | |
| "eval_steps_per_second": 1.641, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 6.654867256637168e-05, | |
| "loss": 0.3725, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.584070796460177e-05, | |
| "loss": 0.4707, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 0.2674517333507538, | |
| "eval_runtime": 1.8288, | |
| "eval_samples_per_second": 6.562, | |
| "eval_steps_per_second": 1.64, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.513274336283187e-05, | |
| "loss": 0.2723, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.442477876106195e-05, | |
| "loss": 0.3001, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.25267747044563293, | |
| "eval_runtime": 1.8294, | |
| "eval_samples_per_second": 6.56, | |
| "eval_steps_per_second": 1.64, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 6.371681415929204e-05, | |
| "loss": 0.2697, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.300884955752213e-05, | |
| "loss": 0.4044, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.25363221764564514, | |
| "eval_runtime": 1.8299, | |
| "eval_samples_per_second": 6.558, | |
| "eval_steps_per_second": 1.639, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.230088495575222e-05, | |
| "loss": 0.2452, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 6.15929203539823e-05, | |
| "loss": 0.3605, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 0.24785174429416656, | |
| "eval_runtime": 1.8306, | |
| "eval_samples_per_second": 6.555, | |
| "eval_steps_per_second": 1.639, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 6.0884955752212394e-05, | |
| "loss": 0.2825, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 6.0176991150442476e-05, | |
| "loss": 0.2309, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.23038876056671143, | |
| "eval_runtime": 1.8337, | |
| "eval_samples_per_second": 6.544, | |
| "eval_steps_per_second": 1.636, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 5.946902654867257e-05, | |
| "loss": 0.4162, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.876106194690266e-05, | |
| "loss": 0.2481, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.2184617966413498, | |
| "eval_runtime": 1.8365, | |
| "eval_samples_per_second": 6.534, | |
| "eval_steps_per_second": 1.634, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 5.805309734513275e-05, | |
| "loss": 0.1906, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.734513274336284e-05, | |
| "loss": 0.3251, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.2109968066215515, | |
| "eval_runtime": 1.8352, | |
| "eval_samples_per_second": 6.539, | |
| "eval_steps_per_second": 1.635, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.663716814159293e-05, | |
| "loss": 0.1837, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 5.592920353982301e-05, | |
| "loss": 0.227, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.21275126934051514, | |
| "eval_runtime": 1.8352, | |
| "eval_samples_per_second": 6.539, | |
| "eval_steps_per_second": 1.635, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 5.52212389380531e-05, | |
| "loss": 0.2545, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 5.451327433628319e-05, | |
| "loss": 0.238, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 0.2064710259437561, | |
| "eval_runtime": 1.8347, | |
| "eval_samples_per_second": 6.541, | |
| "eval_steps_per_second": 1.635, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 5.380530973451328e-05, | |
| "loss": 0.2433, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 5.309734513274337e-05, | |
| "loss": 0.2171, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 0.2167491912841797, | |
| "eval_runtime": 1.8363, | |
| "eval_samples_per_second": 6.535, | |
| "eval_steps_per_second": 1.634, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 5.2389380530973454e-05, | |
| "loss": 0.2056, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5.168141592920354e-05, | |
| "loss": 0.2844, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 0.20672880113124847, | |
| "eval_runtime": 1.836, | |
| "eval_samples_per_second": 6.536, | |
| "eval_steps_per_second": 1.634, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 5.097345132743363e-05, | |
| "loss": 0.1808, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 5.026548672566372e-05, | |
| "loss": 0.2822, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.2064841240644455, | |
| "eval_runtime": 1.8357, | |
| "eval_samples_per_second": 6.537, | |
| "eval_steps_per_second": 1.634, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.955752212389381e-05, | |
| "loss": 0.2273, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.884955752212389e-05, | |
| "loss": 0.2111, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.20206451416015625, | |
| "eval_runtime": 1.8356, | |
| "eval_samples_per_second": 6.537, | |
| "eval_steps_per_second": 1.634, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 4.814159292035399e-05, | |
| "loss": 0.2043, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.7433628318584076e-05, | |
| "loss": 0.1915, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.21362699568271637, | |
| "eval_runtime": 1.8362, | |
| "eval_samples_per_second": 6.535, | |
| "eval_steps_per_second": 1.634, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.6725663716814165e-05, | |
| "loss": 0.1787, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.6017699115044254e-05, | |
| "loss": 0.122, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_loss": 0.22454655170440674, | |
| "eval_runtime": 1.837, | |
| "eval_samples_per_second": 6.533, | |
| "eval_steps_per_second": 1.633, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 4.5309734513274336e-05, | |
| "loss": 0.1402, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 4.4601769911504425e-05, | |
| "loss": 0.1845, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 0.20348918437957764, | |
| "eval_runtime": 1.837, | |
| "eval_samples_per_second": 6.532, | |
| "eval_steps_per_second": 1.633, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.3893805309734514e-05, | |
| "loss": 0.1514, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.31858407079646e-05, | |
| "loss": 0.1597, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.1980418711900711, | |
| "eval_runtime": 1.8389, | |
| "eval_samples_per_second": 6.526, | |
| "eval_steps_per_second": 1.631, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 4.24778761061947e-05, | |
| "loss": 0.1923, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 4.176991150442479e-05, | |
| "loss": 0.1037, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.19392161071300507, | |
| "eval_runtime": 1.8366, | |
| "eval_samples_per_second": 6.534, | |
| "eval_steps_per_second": 1.633, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.106194690265487e-05, | |
| "loss": 0.1511, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.035398230088496e-05, | |
| "loss": 0.109, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.19458027184009552, | |
| "eval_runtime": 1.8368, | |
| "eval_samples_per_second": 6.533, | |
| "eval_steps_per_second": 1.633, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.964601769911505e-05, | |
| "loss": 0.1201, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.893805309734514e-05, | |
| "loss": 0.1312, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.19362002611160278, | |
| "eval_runtime": 1.8369, | |
| "eval_samples_per_second": 6.533, | |
| "eval_steps_per_second": 1.633, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.8230088495575226e-05, | |
| "loss": 0.1315, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.7522123893805314e-05, | |
| "loss": 0.2261, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_loss": 0.1917983889579773, | |
| "eval_runtime": 1.8378, | |
| "eval_samples_per_second": 6.53, | |
| "eval_steps_per_second": 1.632, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.6814159292035403e-05, | |
| "loss": 0.1398, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.6106194690265486e-05, | |
| "loss": 0.113, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 0.1863226443529129, | |
| "eval_runtime": 1.838, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.539823008849558e-05, | |
| "loss": 0.1173, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.469026548672567e-05, | |
| "loss": 0.1762, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 0.1789919137954712, | |
| "eval_runtime": 1.8392, | |
| "eval_samples_per_second": 6.525, | |
| "eval_steps_per_second": 1.631, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.398230088495575e-05, | |
| "loss": 0.1439, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.327433628318584e-05, | |
| "loss": 0.1431, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 0.17829616367816925, | |
| "eval_runtime": 1.8377, | |
| "eval_samples_per_second": 6.53, | |
| "eval_steps_per_second": 1.632, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 3.256637168141594e-05, | |
| "loss": 0.2121, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 3.185840707964602e-05, | |
| "loss": 0.2109, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_loss": 0.1760822981595993, | |
| "eval_runtime": 1.838, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 3.115044247787611e-05, | |
| "loss": 0.0792, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.0442477876106197e-05, | |
| "loss": 0.0885, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_loss": 0.18443678319454193, | |
| "eval_runtime": 1.8392, | |
| "eval_samples_per_second": 6.524, | |
| "eval_steps_per_second": 1.631, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.9734513274336286e-05, | |
| "loss": 0.088, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.9026548672566375e-05, | |
| "loss": 0.0647, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "eval_loss": 0.19218747317790985, | |
| "eval_runtime": 1.8398, | |
| "eval_samples_per_second": 6.522, | |
| "eval_steps_per_second": 1.631, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.8318584070796464e-05, | |
| "loss": 0.0756, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.761061946902655e-05, | |
| "loss": 0.126, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_loss": 0.190928652882576, | |
| "eval_runtime": 1.8379, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.690265486725664e-05, | |
| "loss": 0.0702, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.6194690265486727e-05, | |
| "loss": 0.0965, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_loss": 0.18783879280090332, | |
| "eval_runtime": 1.838, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.5486725663716816e-05, | |
| "loss": 0.085, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.4778761061946905e-05, | |
| "loss": 0.1068, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_loss": 0.19145984947681427, | |
| "eval_runtime": 1.8387, | |
| "eval_samples_per_second": 6.526, | |
| "eval_steps_per_second": 1.632, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.4070796460176994e-05, | |
| "loss": 0.0922, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.3362831858407083e-05, | |
| "loss": 0.0973, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "eval_loss": 0.18135036528110504, | |
| "eval_runtime": 1.8379, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.2654867256637168e-05, | |
| "loss": 0.0887, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.1946902654867257e-05, | |
| "loss": 0.074, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "eval_loss": 0.18348699808120728, | |
| "eval_runtime": 1.8391, | |
| "eval_samples_per_second": 6.525, | |
| "eval_steps_per_second": 1.631, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 2.123893805309735e-05, | |
| "loss": 0.0987, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 2.0530973451327435e-05, | |
| "loss": 0.0899, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "eval_loss": 0.18212918937206268, | |
| "eval_runtime": 1.8382, | |
| "eval_samples_per_second": 6.528, | |
| "eval_steps_per_second": 1.632, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.9823008849557524e-05, | |
| "loss": 0.0738, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.9115044247787613e-05, | |
| "loss": 0.1126, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "eval_loss": 0.18066109716892242, | |
| "eval_runtime": 1.8399, | |
| "eval_samples_per_second": 6.522, | |
| "eval_steps_per_second": 1.63, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.8407079646017702e-05, | |
| "loss": 0.0881, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.769911504424779e-05, | |
| "loss": 0.0969, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_loss": 0.1776157021522522, | |
| "eval_runtime": 1.838, | |
| "eval_samples_per_second": 6.529, | |
| "eval_steps_per_second": 1.632, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.6991150442477876e-05, | |
| "loss": 0.0559, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.628318584070797e-05, | |
| "loss": 0.0644, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "eval_loss": 0.1763620376586914, | |
| "eval_runtime": 1.8403, | |
| "eval_samples_per_second": 6.521, | |
| "eval_steps_per_second": 1.63, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.5575221238938054e-05, | |
| "loss": 0.0818, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.4867256637168143e-05, | |
| "loss": 0.049, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_loss": 0.1785365492105484, | |
| "eval_runtime": 1.8386, | |
| "eval_samples_per_second": 6.527, | |
| "eval_steps_per_second": 1.632, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.4159292035398232e-05, | |
| "loss": 0.0452, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 1.345132743362832e-05, | |
| "loss": 0.0466, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "eval_loss": 0.18220937252044678, | |
| "eval_runtime": 1.8402, | |
| "eval_samples_per_second": 6.521, | |
| "eval_steps_per_second": 1.63, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.2743362831858408e-05, | |
| "loss": 0.0407, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.2035398230088497e-05, | |
| "loss": 0.0545, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "eval_loss": 0.1870112270116806, | |
| "eval_runtime": 1.8384, | |
| "eval_samples_per_second": 6.528, | |
| "eval_steps_per_second": 1.632, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.1327433628318584e-05, | |
| "loss": 0.0489, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.0619469026548675e-05, | |
| "loss": 0.0391, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "eval_loss": 0.19078491628170013, | |
| "eval_runtime": 1.8372, | |
| "eval_samples_per_second": 6.532, | |
| "eval_steps_per_second": 1.633, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.911504424778762e-06, | |
| "loss": 0.0447, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 9.203539823008851e-06, | |
| "loss": 0.0614, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "eval_loss": 0.19175942242145538, | |
| "eval_runtime": 1.8389, | |
| "eval_samples_per_second": 6.526, | |
| "eval_steps_per_second": 1.631, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 8.495575221238938e-06, | |
| "loss": 0.055, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 7.787610619469027e-06, | |
| "loss": 0.0597, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_loss": 0.18947459757328033, | |
| "eval_runtime": 1.8278, | |
| "eval_samples_per_second": 6.565, | |
| "eval_steps_per_second": 1.641, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 7.079646017699116e-06, | |
| "loss": 0.0498, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 6.371681415929204e-06, | |
| "loss": 0.0461, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "eval_loss": 0.18631692230701447, | |
| "eval_runtime": 1.8309, | |
| "eval_samples_per_second": 6.554, | |
| "eval_steps_per_second": 1.639, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 5.663716814159292e-06, | |
| "loss": 0.0432, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 4.955752212389381e-06, | |
| "loss": 0.0456, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "eval_loss": 0.1867295503616333, | |
| "eval_runtime": 1.8305, | |
| "eval_samples_per_second": 6.556, | |
| "eval_steps_per_second": 1.639, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 4.247787610619469e-06, | |
| "loss": 0.0455, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.539823008849558e-06, | |
| "loss": 0.0438, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "eval_loss": 0.186712846159935, | |
| "eval_runtime": 1.8333, | |
| "eval_samples_per_second": 6.545, | |
| "eval_steps_per_second": 1.636, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.831858407079646e-06, | |
| "loss": 0.0475, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.1238938053097345e-06, | |
| "loss": 0.0394, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "eval_loss": 0.1870775818824768, | |
| "eval_runtime": 1.8346, | |
| "eval_samples_per_second": 6.541, | |
| "eval_steps_per_second": 1.635, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.415929203539823e-06, | |
| "loss": 0.0486, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 7.079646017699115e-07, | |
| "loss": 0.0454, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_loss": 0.18716545403003693, | |
| "eval_runtime": 1.835, | |
| "eval_samples_per_second": 6.539, | |
| "eval_steps_per_second": 1.635, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0503, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 565, | |
| "total_flos": 2459078098944000.0, | |
| "train_loss": 0.24255070493812056, | |
| "train_runtime": 469.174, | |
| "train_samples_per_second": 1.204, | |
| "train_steps_per_second": 1.204 | |
| } | |
| ], | |
| "max_steps": 565, | |
| "num_train_epochs": 5, | |
| "total_flos": 2459078098944000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |