| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.997807075247146e-05, | |
| "loss": 1.446, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.991232148123761e-05, | |
| "loss": 1.2288, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.980286753286195e-05, | |
| "loss": 1.2506, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.964990092676263e-05, | |
| "loss": 1.1189, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.9453690018345144e-05, | |
| "loss": 1.104, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.9214579028215776e-05, | |
| "loss": 1.2144, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.893298743830168e-05, | |
| "loss": 1.0983, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.860940925593703e-05, | |
| "loss": 1.1491, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.8244412147206284e-05, | |
| "loss": 1.1163, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.783863644106502e-05, | |
| "loss": 1.0851, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.7392794005985326e-05, | |
| "loss": 1.1639, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.690766700109659e-05, | |
| "loss": 1.1499, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.638410650401267e-05, | |
| "loss": 1.0992, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.5823031017752485e-05, | |
| "loss": 1.1705, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.522542485937369e-05, | |
| "loss": 1.0919, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.4592336433146e-05, | |
| "loss": 1.1709, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.3924876391293915e-05, | |
| "loss": 1.2184, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.3224215685535294e-05, | |
| "loss": 1.1752, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.249158351283414e-05, | |
| "loss": 1.1208, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.172826515897146e-05, | |
| "loss": 1.0406, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.093559974371725e-05, | |
| "loss": 1.1003, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.011497787155938e-05, | |
| "loss": 1.0862, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.92678391921108e-05, | |
| "loss": 1.0911, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.8395669874474915e-05, | |
| "loss": 1.2021, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 1.1072, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.6582400877996546e-05, | |
| "loss": 0.7294, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.564448228912682e-05, | |
| "loss": 0.7575, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4687889661302576e-05, | |
| "loss": 0.7256, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.3714301183045385e-05, | |
| "loss": 0.6674, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 0.6673, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.172299551538164e-05, | |
| "loss": 0.6968, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.0708771752766394e-05, | |
| "loss": 0.6963, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.9684532864643122e-05, | |
| "loss": 0.6005, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.8652075714060295e-05, | |
| "loss": 0.6712, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.761321158169134e-05, | |
| "loss": 0.6859, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.656976298823284e-05, | |
| "loss": 0.6128, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5523560497083926e-05, | |
| "loss": 0.6395, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.447643950291608e-05, | |
| "loss": 0.7517, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.3430237011767167e-05, | |
| "loss": 0.6147, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.238678841830867e-05, | |
| "loss": 0.7306, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.1347924285939714e-05, | |
| "loss": 0.7235, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.031546713535688e-05, | |
| "loss": 0.595, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.9291228247233605e-05, | |
| "loss": 0.6347, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.827700448461836e-05, | |
| "loss": 0.6938, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 0.6408, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.6285698816954624e-05, | |
| "loss": 0.6296, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.5312110338697426e-05, | |
| "loss": 0.6688, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.4355517710873184e-05, | |
| "loss": 0.661, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.3417599122003464e-05, | |
| "loss": 0.6846, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.2500000000000006e-05, | |
| "loss": 0.6501, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.1604330125525079e-05, | |
| "loss": 0.5096, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.0732160807889211e-05, | |
| "loss": 0.4049, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.88502212844063e-06, | |
| "loss": 0.4556, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 9.064400256282757e-06, | |
| "loss": 0.4205, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 8.271734841028553e-06, | |
| "loss": 0.3667, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.508416487165862e-06, | |
| "loss": 0.3817, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 6.775784314464717e-06, | |
| "loss": 0.4369, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 6.075123608706093e-06, | |
| "loss": 0.3672, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 5.4076635668540075e-06, | |
| "loss": 0.4072, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.7745751406263165e-06, | |
| "loss": 0.416, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.176968982247514e-06, | |
| "loss": 0.3504, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.6158934959873353e-06, | |
| "loss": 0.4019, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.092332998903416e-06, | |
| "loss": 0.4057, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.6072059940146775e-06, | |
| "loss": 0.3894, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.1613635589349756e-06, | |
| "loss": 0.3795, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.7555878527937164e-06, | |
| "loss": 0.3934, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.3905907440629752e-06, | |
| "loss": 0.423, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.067012561698319e-06, | |
| "loss": 0.4121, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 7.854209717842231e-07, | |
| "loss": 0.4026, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 5.463099816548579e-07, | |
| "loss": 0.4056, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.5009907323737825e-07, | |
| "loss": 0.3625, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.9713246713805588e-07, | |
| "loss": 0.4279, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 8.767851876239074e-08, | |
| "loss": 0.3752, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.192924752854042e-08, | |
| "loss": 0.3992, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.3638, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 375, | |
| "total_flos": 6.763019396186112e+16, | |
| "train_loss": 0.7424986867904663, | |
| "train_runtime": 1158.4304, | |
| "train_samples_per_second": 5.179, | |
| "train_steps_per_second": 0.324 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 375, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "total_flos": 6.763019396186112e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |