| { | |
| "best_metric": 3.0555238723754883, | |
| "best_model_checkpoint": "output/50-cent/checkpoint-786", | |
| "epoch": 3.0, | |
| "global_step": 786, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00013707289568217504, | |
| "loss": 4.0779, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000136692053734753, | |
| "loss": 4.1148, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00013605888543050137, | |
| "loss": 4.0688, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000135175737079198, | |
| "loss": 3.9408, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001340458813329925, | |
| "loss": 3.722, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001326735050590528, | |
| "loss": 3.9874, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001310636938244372, | |
| "loss": 3.8949, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00012922241305068514, | |
| "loss": 3.9061, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00012715648590796197, | |
| "loss": 3.8976, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00012487356803067445, | |
| "loss": 3.672, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00012238211914825242, | |
| "loss": 3.6326, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00011969137173622343, | |
| "loss": 3.7851, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00011681129680374918, | |
| "loss": 3.7425, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00011375256694440331, | |
| "loss": 3.7256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00011052651678711246, | |
| "loss": 3.6549, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00010714510099381572, | |
| "loss": 3.7504, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00010362084995948948, | |
| "loss": 3.7761, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 9.99668233786982e-05, | |
| "loss": 3.6905, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.619656185073828e-05, | |
| "loss": 3.6569, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.232403670270978e-05, | |
| "loss": 3.6521, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.83635982164553e-05, | |
| "loss": 3.7574, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.432992245121922e-05, | |
| "loss": 3.6063, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 8.023795685908513e-05, | |
| "loss": 3.6537, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.61028648947222e-05, | |
| "loss": 3.6364, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.193996982469813e-05, | |
| "loss": 3.7473, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.776469794458297e-05, | |
| "loss": 3.6127, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.359252141426174e-05, | |
| "loss": 3.6751, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.943890092328931e-05, | |
| "loss": 3.6126, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.531922839875045e-05, | |
| "loss": 3.6423, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 5.1248769967930697e-05, | |
| "loss": 3.4521, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.724260938715906e-05, | |
| "loss": 3.6127, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.331559214645638e-05, | |
| "loss": 3.7675, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.9482270457119095e-05, | |
| "loss": 3.6344, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.575684932609548e-05, | |
| "loss": 3.5602, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.215313391698519e-05, | |
| "loss": 3.6583, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.8684478392724447e-05, | |
| "loss": 3.6643, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.5363736429528382e-05, | |
| "loss": 3.4823, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.2203213585470068e-05, | |
| "loss": 3.4787, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.9214621700201495e-05, | |
| "loss": 3.4209, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.6409035494796526e-05, | |
| "loss": 3.3871, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.3796851532541997e-05, | |
| "loss": 3.3947, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.1387749692754382e-05, | |
| "loss": 3.4866, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.190657300387505e-06, | |
| "loss": 3.5918, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.213716044354821e-06, | |
| "loss": 3.4983, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.464251807155671e-06, | |
| "loss": 3.7741, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.948747517607189e-06, | |
| "loss": 3.7211, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.6728191272803667e-06, | |
| "loss": 3.429, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.6411947996634035e-06, | |
| "loss": 3.5074, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.57697389170679e-07, | |
| "loss": 3.7424, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.252302749240256e-07, | |
| "loss": 3.5668, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.576660180157781e-08, | |
| "loss": 3.5697, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.57045841217041, | |
| "eval_runtime": 16.4667, | |
| "eval_samples_per_second": 22.47, | |
| "eval_steps_per_second": 2.854, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.969441783384187e-09, | |
| "loss": 3.3342, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.3782756328582764, | |
| "eval_runtime": 16.6767, | |
| "eval_samples_per_second": 20.508, | |
| "eval_steps_per_second": 2.578, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.949666974663345e-08, | |
| "loss": 3.4715, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.0213613921093164e-07, | |
| "loss": 3.4357, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.717195750166447e-07, | |
| "loss": 3.3402, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.7861845197078197e-06, | |
| "loss": 3.395, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.842581801675534e-06, | |
| "loss": 3.3568, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.137086214086682e-06, | |
| "loss": 3.6921, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.66501036593004e-06, | |
| "loss": 3.4689, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 7.420821655024756e-06, | |
| "loss": 3.5794, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.39816230153247e-06, | |
| "loss": 3.4054, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.1589872369431459e-05, | |
| "loss": 3.5207, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.3988015692592823e-05, | |
| "loss": 3.352, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.658390861157988e-05, | |
| "loss": 3.5263, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.936815141711555e-05, | |
| "loss": 3.4251, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.2330662386360735e-05, | |
| "loss": 3.5036, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.5460714288759305e-05, | |
| "loss": 3.6056, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8746973229261208e-05, | |
| "loss": 3.4339, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.2177539688273746e-05, | |
| "loss": 3.2688, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.5739991609734934e-05, | |
| "loss": 3.6447, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.9421429381287695e-05, | |
| "loss": 3.4024, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.320852254368187e-05, | |
| "loss": 3.332, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.7087558060269536e-05, | |
| "loss": 3.4638, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.1044489971810725e-05, | |
| "loss": 3.5294, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.506499025678891e-05, | |
| "loss": 3.4554, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.9134500713072235e-05, | |
| "loss": 3.2164, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 6.323828567305678e-05, | |
| "loss": 3.5425, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 6.736148536141151e-05, | |
| "loss": 3.3742, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 7.148916970221591e-05, | |
| "loss": 3.4436, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 7.560639238065579e-05, | |
| "loss": 3.4376, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 7.969824496351964e-05, | |
| "loss": 3.5896, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.374991088252677e-05, | |
| "loss": 3.4729, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.774671908501242e-05, | |
| "loss": 3.4136, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.1674197157702e-05, | |
| "loss": 3.475, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.551812373121417e-05, | |
| "loss": 3.3729, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.926457997553504e-05, | |
| "loss": 3.5851, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 3.4967, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.0001064112199752845, | |
| "loss": 3.3427, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001097855257995339, | |
| "loss": 3.5208, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011301069913603334, | |
| "loss": 3.4049, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011607506165571554, | |
| "loss": 3.452, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011896751732430487, | |
| "loss": 3.5148, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00012167759258097654, | |
| "loss": 3.3147, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00012419547426304373, | |
| "loss": 3.38, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00012651204513934757, | |
| "loss": 3.3329, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00012861891692368509, | |
| "loss": 3.5188, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00013050846064873163, | |
| "loss": 3.4628, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0001321738342904763, | |
| "loss": 3.5377, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00013360900754314024, | |
| "loss": 3.4683, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00013480878365487042, | |
| "loss": 3.4532, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00013576881824513962, | |
| "loss": 3.5582, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00013648563503571674, | |
| "loss": 3.5528, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00013695663843824482, | |
| "loss": 3.4235, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00013718012295284757, | |
| "loss": 3.5739, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 3.3637726306915283, | |
| "eval_runtime": 16.4835, | |
| "eval_samples_per_second": 20.748, | |
| "eval_steps_per_second": 2.609, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00013603195463831566, | |
| "loss": 3.4382, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00013512806981200165, | |
| "loss": 3.3096, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00013397182122930294, | |
| "loss": 3.2608, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00013256759493713883, | |
| "loss": 3.3047, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00013092071764681933, | |
| "loss": 3.3901, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00012903743652800486, | |
| "loss": 3.3488, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00012692489551105156, | |
| "loss": 3.3604, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.0001245911081876368, | |
| "loss": 3.2288, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00012204492741246097, | |
| "loss": 3.3826, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0001192960117213372, | |
| "loss": 3.3225, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0001163547886930568, | |
| "loss": 3.39, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00011323241539401106, | |
| "loss": 3.2293, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00010994073605561706, | |
| "loss": 3.24, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00010649223714509067, | |
| "loss": 3.3242, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.00010289999999999998, | |
| "loss": 3.4037, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 9.917765120627052e-05, | |
| "loss": 3.4103, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 9.53393109078778e-05, | |
| "loss": 3.2591, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.139953924430467e-05, | |
| "loss": 3.2654, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 8.737328111894491e-05, | |
| "loss": 3.1585, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 8.327580950796576e-05, | |
| "loss": 3.257, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 7.912266752467782e-05, | |
| "loss": 3.2541, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 7.492960945918252e-05, | |
| "loss": 3.2145, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 7.071254101695329e-05, | |
| "loss": 3.3335, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 6.648745898304675e-05, | |
| "loss": 3.3844, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 6.227039054081752e-05, | |
| "loss": 3.3846, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 5.807733247532229e-05, | |
| "loss": 3.3111, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 5.392419049203428e-05, | |
| "loss": 3.1908, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 4.9826718881055135e-05, | |
| "loss": 3.3639, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.580046075569537e-05, | |
| "loss": 3.0676, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 4.1860689092122226e-05, | |
| "loss": 3.2607, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.8022348793729525e-05, | |
| "loss": 3.249, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.4300000000000054e-05, | |
| "loss": 3.2803, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.0707762854909365e-05, | |
| "loss": 3.292, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.7259263944382986e-05, | |
| "loss": 3.3944, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.3967584605988973e-05, | |
| "loss": 3.3062, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.0845211306943224e-05, | |
| "loss": 3.2958, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.7903988278662788e-05, | |
| "loss": 3.0765, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.515507258753902e-05, | |
| "loss": 3.2299, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.2608891812363205e-05, | |
| "loss": 3.3755, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.0275104488948488e-05, | |
| "loss": 3.1482, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.162563471995208e-06, | |
| "loss": 3.1407, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 6.279282353180702e-06, | |
| "loss": 3.3813, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.6324050628612214e-06, | |
| "loss": 3.0962, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.228178770697088e-06, | |
| "loss": 3.2479, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.0719301879983714e-06, | |
| "loss": 3.0949, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1680453616843376e-06, | |
| "loss": 3.2467, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5.199530365052086e-07, | |
| "loss": 3.1761, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.3011164863877445e-07, | |
| "loss": 3.1651, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 3.2256, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 3.1829922199249268, | |
| "eval_runtime": 18.683, | |
| "eval_samples_per_second": 20.875, | |
| "eval_steps_per_second": 2.623, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.2586318832182472e-06, | |
| "loss": 3.1497, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5.958637034415764e-07, | |
| "loss": 3.0369, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.7746243957364735e-07, | |
| "loss": 3.0894, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.93157996134388e-09, | |
| "loss": 3.108, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 3.0555238723754883, | |
| "eval_runtime": 7.2147, | |
| "eval_samples_per_second": 46.433, | |
| "eval_steps_per_second": 5.821, | |
| "step": 786 | |
| } | |
| ], | |
| "max_steps": 1048, | |
| "num_train_epochs": 4, | |
| "total_flos": 819411812352000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |