| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 60.0, | |
| "global_step": 3360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00013451892828543385, | |
| "loss": 3.9488, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00012668528006706028, | |
| "loss": 3.8298, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00011431137524750748, | |
| "loss": 3.7557, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.836442450346448e-05, | |
| "loss": 3.5311, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 8.009092691870492e-05, | |
| "loss": 3.3526, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 6.0919236939313083e-05, | |
| "loss": 3.5934, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.2347916539754844e-05, | |
| "loss": 3.3983, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.5828599592490882e-05, | |
| "loss": 3.6609, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.2652524389394753e-05, | |
| "loss": 3.4898, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.849603540845984e-06, | |
| "loss": 3.5749, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0792048977778093e-07, | |
| "loss": 3.4588, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.7199452243268996e-06, | |
| "loss": 3.2536, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.559673257059505e-06, | |
| "loss": 3.2439, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.009247481060283e-05, | |
| "loss": 3.2968, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.541688434458052e-05, | |
| "loss": 3.4346, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.333506393059682e-05, | |
| "loss": 3.3563, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 7.244643268047132e-05, | |
| "loss": 3.2696, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.125714365012444e-05, | |
| "loss": 3.4046, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00010829685091793463, | |
| "loss": 3.4708, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00012223363969730684, | |
| "loss": 3.2387, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 3.3163, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00013676865759867644, | |
| "loss": 3.2581, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.000136230751870351, | |
| "loss": 3.1941, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00013040646433810595, | |
| "loss": 3.0392, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00011975105251098516, | |
| "loss": 3.0188, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.00010509740044895205, | |
| "loss": 3.1013, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 8.759091608374473e-05, | |
| "loss": 3.0585, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 3.1302, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 4.9609083916255386e-05, | |
| "loss": 3.2358, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.210259955104798e-05, | |
| "loss": 3.119, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.744894748901483e-05, | |
| "loss": 2.9946, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 6.793535661894062e-06, | |
| "loss": 3.0184, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.692481296490106e-07, | |
| "loss": 2.9798, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4.313424013235498e-07, | |
| "loss": 3.1282, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 5.22186406972573e-06, | |
| "loss": 3.0772, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.496636030269314e-05, | |
| "loss": 2.8216, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.890314908206528e-05, | |
| "loss": 2.7665, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 4.594285634987545e-05, | |
| "loss": 3.0073, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 6.475356731952864e-05, | |
| "loss": 3.0372, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 8.386493606940314e-05, | |
| "loss": 2.807, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0001017831156554194, | |
| "loss": 3.1058, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.0001171075251893971, | |
| "loss": 2.961, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.0001286403267429405, | |
| "loss": 3.1032, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 0.0001354800547756731, | |
| "loss": 2.7667, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 3.0024, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 0.00013335039645915404, | |
| "loss": 2.8538, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.00012454747561060531, | |
| "loss": 2.8202, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 0.00011137140040750914, | |
| "loss": 2.6845, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.485208346024522e-05, | |
| "loss": 2.6865, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 7.62807630606869e-05, | |
| "loss": 2.8686, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 5.710907308129509e-05, | |
| "loss": 2.9936, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.883557549653544e-05, | |
| "loss": 2.5979, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.2888624752492583e-05, | |
| "loss": 2.7179, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.0514719932939762e-05, | |
| "loss": 2.9387, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.681071714566175e-06, | |
| "loss": 2.6822, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.7684, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.6810717145661523e-06, | |
| "loss": 2.6722, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 1.0514719932939649e-05, | |
| "loss": 2.597, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.2888624752492607e-05, | |
| "loss": 2.7343, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 3.8835575496535365e-05, | |
| "loss": 2.567, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 5.7109073081294886e-05, | |
| "loss": 2.6375, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 7.628076306068694e-05, | |
| "loss": 2.734, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 9.485208346024515e-05, | |
| "loss": 2.6448, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 0.00011137140040750908, | |
| "loss": 2.6255, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.0001245474756106052, | |
| "loss": 2.6455, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 0.00013335039645915407, | |
| "loss": 2.5969, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 2.6923, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.00013548005477567314, | |
| "loss": 2.3761, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.00012864032674294047, | |
| "loss": 2.4563, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 2.4791, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00010178311565541947, | |
| "loss": 2.446, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 8.386493606940322e-05, | |
| "loss": 2.5515, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 6.475356731952872e-05, | |
| "loss": 2.5469, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 4.594285634987565e-05, | |
| "loss": 2.6391, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 2.890314908206545e-05, | |
| "loss": 2.36, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.496636030269327e-05, | |
| "loss": 2.4806, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 5.221864069725715e-06, | |
| "loss": 2.6083, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.3134240132355735e-07, | |
| "loss": 2.6457, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 9.692481296490106e-07, | |
| "loss": 2.4165, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 6.793535661894024e-06, | |
| "loss": 2.301, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.744894748901478e-05, | |
| "loss": 2.4478, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 3.2102599551047805e-05, | |
| "loss": 2.3692, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 4.960908391625518e-05, | |
| "loss": 2.3269, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 6.859999999999982e-05, | |
| "loss": 2.1706, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 8.759091608374469e-05, | |
| "loss": 2.3618, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 0.00010509740044895209, | |
| "loss": 2.284, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.00011975105251098514, | |
| "loss": 2.3587, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 0.00013040646433810593, | |
| "loss": 2.4467, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 0.000136230751870351, | |
| "loss": 2.5326, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 0.00013676865759867642, | |
| "loss": 2.3045, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 0.00013197813593027432, | |
| "loss": 2.1819, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 0.00012223363969730697, | |
| "loss": 2.2893, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 0.00010829685091793466, | |
| "loss": 2.3117, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 9.12571436501247e-05, | |
| "loss": 2.272, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.24464326804714e-05, | |
| "loss": 2.3461, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 5.33350639305969e-05, | |
| "loss": 2.1348, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 3.541688434458043e-05, | |
| "loss": 2.2985, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 2.0092474810602934e-05, | |
| "loss": 2.077, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 8.559673257059573e-06, | |
| "loss": 2.2565, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 1.719945224326892e-06, | |
| "loss": 2.1992, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 1.0792048977777332e-07, | |
| "loss": 2.1455, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 3.849603540845977e-06, | |
| "loss": 2.1314, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 1.2652524389394722e-05, | |
| "loss": 1.9046, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 2.582859959249101e-05, | |
| "loss": 2.0235, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 4.234791653975475e-05, | |
| "loss": 2.0746, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 6.091923693931295e-05, | |
| "loss": 2.0545, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 8.0090926918705e-05, | |
| "loss": 2.0609, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 9.83644245034643e-05, | |
| "loss": 2.1458, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 0.00011431137524750748, | |
| "loss": 1.9214, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 0.00012668528006706028, | |
| "loss": 2.2293, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 0.00013451892828543387, | |
| "loss": 2.2592, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 2.1707, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 0.00013451892828543393, | |
| "loss": 1.7839, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "learning_rate": 0.0001266852800670604, | |
| "loss": 1.931, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 10.27, | |
| "learning_rate": 0.00011431137524750779, | |
| "loss": 1.981, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "learning_rate": 9.836442450346467e-05, | |
| "loss": 1.9186, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 8.009092691870492e-05, | |
| "loss": 1.9439, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 6.0919236939312867e-05, | |
| "loss": 1.7981, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 4.23479165397549e-05, | |
| "loss": 1.8503, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 2.5828599592491143e-05, | |
| "loss": 2.1472, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 1.265252438939482e-05, | |
| "loss": 1.8879, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 3.849603540846114e-06, | |
| "loss": 2.0268, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 1.0792048977779616e-07, | |
| "loss": 2.0845, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 1.7199452243269073e-06, | |
| "loss": 2.1026, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 8.559673257059612e-06, | |
| "loss": 1.8359, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 2.0092474810602812e-05, | |
| "loss": 1.6721, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 3.541688434458027e-05, | |
| "loss": 1.6882, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 5.333506393059674e-05, | |
| "loss": 1.7527, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 7.244643268047099e-05, | |
| "loss": 1.7121, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 9.125714365012432e-05, | |
| "loss": 1.8074, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 0.00010829685091793471, | |
| "loss": 1.8594, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 0.000122233639697307, | |
| "loss": 1.8073, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 1.8472, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 0.00013676865759867642, | |
| "loss": 1.8812, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 0.000136230751870351, | |
| "loss": 1.5812, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "learning_rate": 0.0001304064643381061, | |
| "loss": 1.6567, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 0.00011975105251098525, | |
| "loss": 1.6487, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "learning_rate": 0.000105097400448952, | |
| "loss": 1.698, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 8.759091608374439e-05, | |
| "loss": 1.7092, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 6.859999999999999e-05, | |
| "loss": 1.5692, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "learning_rate": 4.960908391625558e-05, | |
| "loss": 1.7959, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 3.210259955104795e-05, | |
| "loss": 1.769, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "learning_rate": 1.7448947489015055e-05, | |
| "loss": 1.672, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "learning_rate": 6.793535661894092e-06, | |
| "loss": 1.6729, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "learning_rate": 9.692481296490868e-07, | |
| "loss": 1.6378, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 4.3134240132354215e-07, | |
| "loss": 1.5157, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 5.221864069725745e-06, | |
| "loss": 1.5695, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "learning_rate": 1.496636030269301e-05, | |
| "loss": 1.4221, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 2.890314908206531e-05, | |
| "loss": 1.5325, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "learning_rate": 4.5942856349875256e-05, | |
| "loss": 1.5675, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 6.475356731952856e-05, | |
| "loss": 1.4491, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 13.57, | |
| "learning_rate": 8.386493606940281e-05, | |
| "loss": 1.4567, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "learning_rate": 0.00010178311565541931, | |
| "loss": 1.4152, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 0.00011710752518939722, | |
| "loss": 1.6904, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 13.84, | |
| "learning_rate": 0.00012864032674294042, | |
| "loss": 1.7226, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "learning_rate": 0.0001354800547756731, | |
| "loss": 1.6131, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 1.4874, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 0.00013335039645915404, | |
| "loss": 1.321, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "learning_rate": 0.00012454747561060542, | |
| "loss": 1.5378, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 0.00011137140040750922, | |
| "loss": 1.5117, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "learning_rate": 9.485208346024507e-05, | |
| "loss": 1.3334, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 7.62807630606871e-05, | |
| "loss": 1.418, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 5.710907308129505e-05, | |
| "loss": 1.4658, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "learning_rate": 3.883557549653573e-05, | |
| "loss": 1.6222, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 2.2888624752492553e-05, | |
| "loss": 1.4192, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 1.0514719932939869e-05, | |
| "loss": 1.346, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 14.91, | |
| "learning_rate": 2.681071714566198e-06, | |
| "loss": 1.3806, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.384, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "learning_rate": 2.6810717145661294e-06, | |
| "loss": 1.2256, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 15.18, | |
| "learning_rate": 1.0514719932939732e-05, | |
| "loss": 1.3474, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 15.27, | |
| "learning_rate": 2.2888624752492363e-05, | |
| "loss": 1.3215, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 3.88355754965355e-05, | |
| "loss": 1.2651, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 5.710907308129481e-05, | |
| "loss": 1.3224, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 15.54, | |
| "learning_rate": 7.628076306068686e-05, | |
| "loss": 1.1728, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "learning_rate": 9.485208346024484e-05, | |
| "loss": 1.3578, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 15.71, | |
| "learning_rate": 0.00011137140040750902, | |
| "loss": 1.1825, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 15.8, | |
| "learning_rate": 0.0001245474756106053, | |
| "loss": 1.3246, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 0.00013335039645915393, | |
| "loss": 1.332, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 1.388, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 0.00013548005477567304, | |
| "loss": 1.2832, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "learning_rate": 0.00012864032674294074, | |
| "loss": 1.2149, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 0.00011710752518939739, | |
| "loss": 1.0402, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "learning_rate": 0.00010178311565541954, | |
| "loss": 1.2199, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "learning_rate": 8.386493606940354e-05, | |
| "loss": 1.2836, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 6.47535673195288e-05, | |
| "loss": 1.2179, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 16.61, | |
| "learning_rate": 4.59428563498755e-05, | |
| "loss": 1.2299, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "learning_rate": 2.8903149082065114e-05, | |
| "loss": 1.072, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 16.79, | |
| "learning_rate": 1.4966360302693468e-05, | |
| "loss": 1.2192, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "learning_rate": 5.221864069725844e-06, | |
| "loss": 1.2081, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 4.3134240132356497e-07, | |
| "loss": 1.3213, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "learning_rate": 9.692481296489572e-07, | |
| "loss": 1.0468, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 6.793535661893986e-06, | |
| "loss": 1.1071, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 17.23, | |
| "learning_rate": 1.7448947489014885e-05, | |
| "loss": 1.0509, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "learning_rate": 3.210259955104815e-05, | |
| "loss": 0.96, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 17.41, | |
| "learning_rate": 4.9609083916254864e-05, | |
| "loss": 1.1523, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 6.859999999999973e-05, | |
| "loss": 0.9922, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "learning_rate": 8.759091608374462e-05, | |
| "loss": 1.1601, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "learning_rate": 0.0001050974004489518, | |
| "loss": 0.9979, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "learning_rate": 0.00011975105251098509, | |
| "loss": 1.1048, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 0.00013040646433810598, | |
| "loss": 1.0999, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "learning_rate": 0.00013623075187035104, | |
| "loss": 1.2753, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "learning_rate": 0.0001367686575986765, | |
| "loss": 1.0046, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 18.12, | |
| "learning_rate": 0.00013197813593027435, | |
| "loss": 0.9683, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 18.21, | |
| "learning_rate": 0.00012223363969730686, | |
| "loss": 1.0153, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 18.3, | |
| "learning_rate": 0.00010829685091793493, | |
| "loss": 0.9562, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "learning_rate": 9.125714365012455e-05, | |
| "loss": 1.0145, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "learning_rate": 7.244643268047124e-05, | |
| "loss": 1.0414, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 18.57, | |
| "learning_rate": 5.333506393059651e-05, | |
| "loss": 1.0547, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 18.66, | |
| "learning_rate": 3.541688434458093e-05, | |
| "loss": 1.038, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "learning_rate": 2.0092474810602995e-05, | |
| "loss": 0.9887, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "learning_rate": 8.559673257059497e-06, | |
| "loss": 1.0939, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "learning_rate": 1.7199452243269606e-06, | |
| "loss": 1.0271, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "learning_rate": 1.0792048977778093e-07, | |
| "loss": 0.8933, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 3.84960354084603e-06, | |
| "loss": 0.9411, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "learning_rate": 1.2652524389394958e-05, | |
| "loss": 0.8984, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 19.29, | |
| "learning_rate": 2.5828599592490564e-05, | |
| "loss": 0.8203, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 4.234791653975466e-05, | |
| "loss": 0.8312, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "learning_rate": 6.09192369393131e-05, | |
| "loss": 0.7923, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "learning_rate": 8.009092691870466e-05, | |
| "loss": 0.8649, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "learning_rate": 9.836442450346445e-05, | |
| "loss": 0.939, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 19.73, | |
| "learning_rate": 0.0001143113752475076, | |
| "loss": 0.8501, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 19.82, | |
| "learning_rate": 0.0001266852800670605, | |
| "loss": 0.9123, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 0.0001345189282854337, | |
| "loss": 0.9598, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 0.9292, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "learning_rate": 0.00013451892828543382, | |
| "loss": 0.8035, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 20.18, | |
| "learning_rate": 0.00012668528006706069, | |
| "loss": 0.792, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 20.27, | |
| "learning_rate": 0.00011431137524750785, | |
| "loss": 0.8557, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 20.36, | |
| "learning_rate": 9.836442450346476e-05, | |
| "loss": 0.8628, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "learning_rate": 8.0090926918705e-05, | |
| "loss": 0.8219, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "learning_rate": 6.091923693931392e-05, | |
| "loss": 0.8509, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "learning_rate": 4.234791653975543e-05, | |
| "loss": 0.8817, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 20.71, | |
| "learning_rate": 2.5828599592491204e-05, | |
| "loss": 0.8186, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 1.2652524389394875e-05, | |
| "loss": 0.8184, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "learning_rate": 3.849603540845984e-06, | |
| "loss": 0.8315, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 1.0792048977777332e-07, | |
| "loss": 0.8831, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "learning_rate": 1.7199452243269987e-06, | |
| "loss": 0.737, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "learning_rate": 8.559673257059337e-06, | |
| "loss": 0.776, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "learning_rate": 2.009247481060276e-05, | |
| "loss": 0.6944, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 21.34, | |
| "learning_rate": 3.541688434458063e-05, | |
| "loss": 0.6937, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 21.43, | |
| "learning_rate": 5.333506393059618e-05, | |
| "loss": 0.7423, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 21.52, | |
| "learning_rate": 7.24464326804709e-05, | |
| "loss": 0.7534, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 21.61, | |
| "learning_rate": 9.125714365012422e-05, | |
| "loss": 0.6734, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "learning_rate": 0.00010829685091793466, | |
| "loss": 0.7151, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "learning_rate": 0.00012223363969730635, | |
| "loss": 0.8225, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 21.88, | |
| "learning_rate": 0.00013197813593027405, | |
| "loss": 0.6728, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "learning_rate": 0.0001367686575986764, | |
| "loss": 0.7781, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "learning_rate": 0.00013623075187035101, | |
| "loss": 0.724, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 22.14, | |
| "learning_rate": 0.00013040646433810593, | |
| "loss": 0.5813, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 22.23, | |
| "learning_rate": 0.00011975105251098498, | |
| "loss": 0.7065, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 0.00010509740044895168, | |
| "loss": 0.6825, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 22.41, | |
| "learning_rate": 8.759091608374493e-05, | |
| "loss": 0.7155, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 6.860000000000005e-05, | |
| "loss": 0.7597, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 22.59, | |
| "learning_rate": 4.9609083916255196e-05, | |
| "loss": 0.749, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 22.68, | |
| "learning_rate": 3.2102599551048435e-05, | |
| "loss": 0.7066, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 22.77, | |
| "learning_rate": 1.7448947489015106e-05, | |
| "loss": 0.6666, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 6.7935356618941304e-06, | |
| "loss": 0.6573, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "learning_rate": 9.692481296490182e-07, | |
| "loss": 0.7112, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "learning_rate": 4.313424013234736e-07, | |
| "loss": 0.5373, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "learning_rate": 5.221864069725524e-06, | |
| "loss": 0.5479, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 23.21, | |
| "learning_rate": 1.4966360302692958e-05, | |
| "loss": 0.5882, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "learning_rate": 2.8903149082065243e-05, | |
| "loss": 0.5555, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "learning_rate": 4.5942856349875636e-05, | |
| "loss": 0.5804, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 6.475356731952897e-05, | |
| "loss": 0.5679, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 23.57, | |
| "learning_rate": 8.386493606940368e-05, | |
| "loss": 0.6024, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 23.66, | |
| "learning_rate": 0.00010178311565541925, | |
| "loss": 0.552, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 0.6404, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "learning_rate": 0.0001286403267429406, | |
| "loss": 0.581, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "learning_rate": 0.00013548005477567298, | |
| "loss": 0.6956, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 0.6156, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "learning_rate": 0.00013335039645915407, | |
| "loss": 0.5472, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 24.2, | |
| "learning_rate": 0.0001245474756106052, | |
| "loss": 0.5691, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "learning_rate": 0.00011137140040750965, | |
| "loss": 0.5632, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 24.38, | |
| "learning_rate": 9.485208346024561e-05, | |
| "loss": 0.6406, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 24.46, | |
| "learning_rate": 7.628076306068718e-05, | |
| "loss": 0.5618, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "learning_rate": 5.710907308129514e-05, | |
| "loss": 0.5619, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "learning_rate": 3.8835575496535365e-05, | |
| "loss": 0.5453, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 24.73, | |
| "learning_rate": 2.288862475249225e-05, | |
| "loss": 0.6172, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "learning_rate": 1.0514719932939396e-05, | |
| "loss": 0.5714, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 24.91, | |
| "learning_rate": 2.681071714566221e-06, | |
| "loss": 0.5045, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.5848, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 25.09, | |
| "learning_rate": 2.681071714566236e-06, | |
| "loss": 0.4364, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 25.18, | |
| "learning_rate": 1.0514719932939435e-05, | |
| "loss": 0.4791, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 25.27, | |
| "learning_rate": 2.2888624752492302e-05, | |
| "loss": 0.4854, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 25.36, | |
| "learning_rate": 3.8835575496535426e-05, | |
| "loss": 0.3908, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "learning_rate": 5.7109073081295205e-05, | |
| "loss": 0.4823, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 25.54, | |
| "learning_rate": 7.628076306068627e-05, | |
| "loss": 0.478, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 25.62, | |
| "learning_rate": 9.485208346024477e-05, | |
| "loss": 0.4768, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 0.00011137140040750896, | |
| "loss": 0.4795, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 25.8, | |
| "learning_rate": 0.0001245474756106047, | |
| "loss": 0.4712, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 25.89, | |
| "learning_rate": 0.00013335039645915377, | |
| "loss": 0.5162, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "learning_rate": 0.0001370920795102222, | |
| "loss": 0.6066, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "learning_rate": 0.0001354800547756732, | |
| "loss": 0.4381, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "learning_rate": 0.00012864032674294058, | |
| "loss": 0.4356, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 26.25, | |
| "learning_rate": 0.0001171075251893971, | |
| "loss": 0.4588, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 26.34, | |
| "learning_rate": 0.00010178311565541919, | |
| "loss": 0.4378, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 26.43, | |
| "learning_rate": 8.386493606940363e-05, | |
| "loss": 0.4972, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 26.52, | |
| "learning_rate": 6.47535673195289e-05, | |
| "loss": 0.5317, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "learning_rate": 4.5942856349875575e-05, | |
| "loss": 0.4449, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 26.7, | |
| "learning_rate": 2.8903149082065182e-05, | |
| "loss": 0.5294, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "learning_rate": 1.496636030269352e-05, | |
| "loss": 0.4735, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "learning_rate": 5.221864069725874e-06, | |
| "loss": 0.4522, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "learning_rate": 4.313424013235802e-07, | |
| "loss": 0.4696, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 27.05, | |
| "learning_rate": 9.69248129648866e-07, | |
| "loss": 0.3814, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "learning_rate": 6.793535661893734e-06, | |
| "loss": 0.3927, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 27.23, | |
| "learning_rate": 1.7448947489014506e-05, | |
| "loss": 0.4072, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "learning_rate": 3.210259955104767e-05, | |
| "loss": 0.4177, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 27.41, | |
| "learning_rate": 4.9609083916255264e-05, | |
| "loss": 0.3721, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 6.860000000000014e-05, | |
| "loss": 0.3918, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 27.59, | |
| "learning_rate": 8.7590916083745e-05, | |
| "loss": 0.391, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 27.68, | |
| "learning_rate": 0.00010509740044895174, | |
| "loss": 0.368, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "learning_rate": 0.00011975105251098503, | |
| "loss": 0.4225, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 27.86, | |
| "learning_rate": 0.00013040646433810595, | |
| "loss": 0.3877, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 27.95, | |
| "learning_rate": 0.00013623075187035101, | |
| "loss": 0.4493, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 28.04, | |
| "learning_rate": 0.00013676865759867652, | |
| "loss": 0.4004, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "learning_rate": 0.0001319781359302744, | |
| "loss": 0.3739, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "learning_rate": 0.00012223363969730692, | |
| "loss": 0.3801, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 28.3, | |
| "learning_rate": 0.00010829685091793539, | |
| "loss": 0.4067, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 9.125714365012509e-05, | |
| "loss": 0.3734, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "learning_rate": 7.244643268047182e-05, | |
| "loss": 0.3794, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 5.3335063930597066e-05, | |
| "loss": 0.4073, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 28.66, | |
| "learning_rate": 3.541688434458058e-05, | |
| "loss": 0.4003, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 28.75, | |
| "learning_rate": 2.0092474810602707e-05, | |
| "loss": 0.3867, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 28.84, | |
| "learning_rate": 8.559673257059307e-06, | |
| "loss": 0.368, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 28.93, | |
| "learning_rate": 1.7199452243269835e-06, | |
| "loss": 0.3861, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 29.02, | |
| "learning_rate": 1.0792048977777332e-07, | |
| "loss": 0.3113, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 29.11, | |
| "learning_rate": 3.849603540846007e-06, | |
| "loss": 0.2884, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "learning_rate": 1.2652524389394912e-05, | |
| "loss": 0.341, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 29.29, | |
| "learning_rate": 2.5828599592490496e-05, | |
| "loss": 0.2955, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 29.38, | |
| "learning_rate": 4.234791653975459e-05, | |
| "loss": 0.282, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 29.46, | |
| "learning_rate": 6.0919236939313016e-05, | |
| "loss": 0.2748, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 29.55, | |
| "learning_rate": 8.009092691870409e-05, | |
| "loss": 0.3101, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 29.64, | |
| "learning_rate": 9.836442450346394e-05, | |
| "loss": 0.334, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 29.73, | |
| "learning_rate": 0.00011431137524750716, | |
| "loss": 0.3008, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 29.82, | |
| "learning_rate": 0.0001266852800670602, | |
| "loss": 0.3448, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 29.91, | |
| "learning_rate": 0.00013451892828543358, | |
| "loss": 0.3626, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 0.3804, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 30.09, | |
| "learning_rate": 0.0001345189282854337, | |
| "loss": 0.2925, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 30.18, | |
| "learning_rate": 0.00012668528006706047, | |
| "loss": 0.3217, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 30.27, | |
| "learning_rate": 0.00011431137524750754, | |
| "loss": 0.3431, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 30.36, | |
| "learning_rate": 9.83644245034644e-05, | |
| "loss": 0.2936, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 30.45, | |
| "learning_rate": 8.009092691870459e-05, | |
| "loss": 0.3465, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 30.54, | |
| "learning_rate": 6.091923693931352e-05, | |
| "loss": 0.3303, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 30.62, | |
| "learning_rate": 4.234791653975505e-05, | |
| "loss": 0.3262, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 30.71, | |
| "learning_rate": 2.582859959249089e-05, | |
| "loss": 0.3351, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 30.8, | |
| "learning_rate": 1.2652524389395202e-05, | |
| "loss": 0.3175, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 30.89, | |
| "learning_rate": 3.849603540846175e-06, | |
| "loss": 0.2689, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "learning_rate": 1.0792048977780377e-07, | |
| "loss": 0.3047, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 31.07, | |
| "learning_rate": 1.7199452243268694e-06, | |
| "loss": 0.2869, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 31.16, | |
| "learning_rate": 8.559673257059063e-06, | |
| "loss": 0.2604, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 2.0092474810602348e-05, | |
| "loss": 0.2769, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 31.34, | |
| "learning_rate": 3.541688434458014e-05, | |
| "loss": 0.2263, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 31.43, | |
| "learning_rate": 5.333506393059658e-05, | |
| "loss": 0.2346, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 31.52, | |
| "learning_rate": 7.244643268047132e-05, | |
| "loss": 0.2531, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "learning_rate": 9.125714365012463e-05, | |
| "loss": 0.26, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 31.7, | |
| "learning_rate": 0.00010829685091793499, | |
| "loss": 0.2807, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 31.79, | |
| "learning_rate": 0.00012223363969730662, | |
| "loss": 0.2235, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 31.88, | |
| "learning_rate": 0.0001319781359302742, | |
| "loss": 0.2793, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 31.96, | |
| "learning_rate": 0.00013676865759867644, | |
| "loss": 0.2978, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 32.05, | |
| "learning_rate": 0.00013623075187035093, | |
| "loss": 0.274, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 32.14, | |
| "learning_rate": 0.00013040646433810576, | |
| "loss": 0.2887, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 32.23, | |
| "learning_rate": 0.00011975105251098601, | |
| "loss": 0.2648, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 32.32, | |
| "learning_rate": 0.00010509740044895298, | |
| "loss": 0.2468, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 32.41, | |
| "learning_rate": 8.759091608374549e-05, | |
| "loss": 0.2918, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 6.860000000000064e-05, | |
| "loss": 0.2799, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 32.59, | |
| "learning_rate": 4.9609083916255745e-05, | |
| "loss": 0.2562, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "learning_rate": 3.2102599551048096e-05, | |
| "loss": 0.2663, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 32.77, | |
| "learning_rate": 1.744894748901484e-05, | |
| "loss": 0.2774, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "learning_rate": 6.793535661894382e-06, | |
| "loss": 0.2487, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 32.95, | |
| "learning_rate": 9.692481296491097e-07, | |
| "loss": 0.2686, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 33.04, | |
| "learning_rate": 4.313424013235193e-07, | |
| "loss": 0.2175, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 33.12, | |
| "learning_rate": 5.221864069725684e-06, | |
| "loss": 0.218, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 33.21, | |
| "learning_rate": 1.4966360302693209e-05, | |
| "loss": 0.216, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 33.3, | |
| "learning_rate": 2.890314908206557e-05, | |
| "loss": 0.2596, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 33.39, | |
| "learning_rate": 4.5942856349876015e-05, | |
| "loss": 0.2206, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 33.48, | |
| "learning_rate": 6.475356731952742e-05, | |
| "loss": 0.1793, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 33.57, | |
| "learning_rate": 8.386493606940219e-05, | |
| "loss": 0.2108, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 33.66, | |
| "learning_rate": 0.00010178311565541875, | |
| "loss": 0.2287, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 33.75, | |
| "learning_rate": 0.00011710752518939675, | |
| "loss": 0.1961, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 33.84, | |
| "learning_rate": 0.00012864032674294034, | |
| "loss": 0.2072, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 33.93, | |
| "learning_rate": 0.00013548005477567306, | |
| "loss": 0.2405, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 34.02, | |
| "learning_rate": 0.00013709207951022223, | |
| "loss": 0.2522, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 34.11, | |
| "learning_rate": 0.00013335039645915423, | |
| "loss": 0.2533, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 34.2, | |
| "learning_rate": 0.00012454747561060553, | |
| "loss": 0.2087, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 0.00011137140040750936, | |
| "loss": 0.2236, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 34.38, | |
| "learning_rate": 9.485208346024522e-05, | |
| "loss": 0.2418, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 34.46, | |
| "learning_rate": 7.628076306068678e-05, | |
| "loss": 0.2321, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 34.55, | |
| "learning_rate": 5.710907308129474e-05, | |
| "loss": 0.2204, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 34.64, | |
| "learning_rate": 3.8835575496535006e-05, | |
| "loss": 0.253, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 34.73, | |
| "learning_rate": 2.2888624752493407e-05, | |
| "loss": 0.1896, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 34.82, | |
| "learning_rate": 1.051471993294022e-05, | |
| "loss": 0.2327, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 34.91, | |
| "learning_rate": 2.6810717145663806e-06, | |
| "loss": 0.1881, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.2131, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 35.09, | |
| "learning_rate": 2.6810717145660837e-06, | |
| "loss": 0.2035, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 35.18, | |
| "learning_rate": 1.0514719932939649e-05, | |
| "loss": 0.1686, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 35.27, | |
| "learning_rate": 2.28886247524926e-05, | |
| "loss": 0.1647, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 35.36, | |
| "learning_rate": 3.883557549653492e-05, | |
| "loss": 0.1727, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 35.45, | |
| "learning_rate": 5.710907308129464e-05, | |
| "loss": 0.1767, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 35.54, | |
| "learning_rate": 7.62807630606867e-05, | |
| "loss": 0.1769, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 35.62, | |
| "learning_rate": 9.485208346024514e-05, | |
| "loss": 0.1662, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 35.71, | |
| "learning_rate": 0.00011137140040750926, | |
| "loss": 0.1953, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 35.8, | |
| "learning_rate": 0.00012454747561060548, | |
| "loss": 0.186, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 35.89, | |
| "learning_rate": 0.0001333503964591542, | |
| "loss": 0.2162, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 35.98, | |
| "learning_rate": 0.00013709207951022217, | |
| "loss": 0.215, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 36.07, | |
| "learning_rate": 0.0001354800547756733, | |
| "loss": 0.2198, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 36.16, | |
| "learning_rate": 0.00012864032674294085, | |
| "loss": 0.189, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 36.25, | |
| "learning_rate": 0.00011710752518939751, | |
| "loss": 0.2029, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 36.34, | |
| "learning_rate": 0.0001017831156554197, | |
| "loss": 0.1957, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 36.43, | |
| "learning_rate": 8.386493606940322e-05, | |
| "loss": 0.2062, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 36.52, | |
| "learning_rate": 6.47535673195285e-05, | |
| "loss": 0.2046, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 36.61, | |
| "learning_rate": 4.594285634987612e-05, | |
| "loss": 0.1769, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 36.7, | |
| "learning_rate": 2.8903149082065656e-05, | |
| "loss": 0.193, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 36.79, | |
| "learning_rate": 1.496636030269327e-05, | |
| "loss": 0.1815, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 36.88, | |
| "learning_rate": 5.2218640697257225e-06, | |
| "loss": 0.1812, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "learning_rate": 4.3134240132353453e-07, | |
| "loss": 0.177, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 37.05, | |
| "learning_rate": 9.692481296490944e-07, | |
| "loss": 0.1544, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 37.14, | |
| "learning_rate": 6.793535661894336e-06, | |
| "loss": 0.1606, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 37.23, | |
| "learning_rate": 1.7448947489014123e-05, | |
| "loss": 0.1656, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 37.32, | |
| "learning_rate": 3.210259955104718e-05, | |
| "loss": 0.1505, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 37.41, | |
| "learning_rate": 4.9609083916254715e-05, | |
| "loss": 0.1481, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 6.859999999999957e-05, | |
| "loss": 0.1534, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 37.59, | |
| "learning_rate": 8.759091608374445e-05, | |
| "loss": 0.1499, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 37.68, | |
| "learning_rate": 0.00010509740044895207, | |
| "loss": 0.1818, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 37.77, | |
| "learning_rate": 0.0001197510525109853, | |
| "loss": 0.1553, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 37.86, | |
| "learning_rate": 0.0001304064643381057, | |
| "loss": 0.1696, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 37.95, | |
| "learning_rate": 0.0001362307518703509, | |
| "loss": 0.1848, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 38.04, | |
| "learning_rate": 0.00013676865759867644, | |
| "loss": 0.1581, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 38.12, | |
| "learning_rate": 0.00013197813593027424, | |
| "loss": 0.1639, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 38.21, | |
| "learning_rate": 0.00012223363969730668, | |
| "loss": 0.1736, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 38.3, | |
| "learning_rate": 0.00010829685091793427, | |
| "loss": 0.1716, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 38.39, | |
| "learning_rate": 9.125714365012379e-05, | |
| "loss": 0.1616, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 38.48, | |
| "learning_rate": 7.244643268047237e-05, | |
| "loss": 0.1674, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 38.57, | |
| "learning_rate": 5.333506393059762e-05, | |
| "loss": 0.181, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 38.66, | |
| "learning_rate": 3.541688434458107e-05, | |
| "loss": 0.1737, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 38.75, | |
| "learning_rate": 2.009247481060311e-05, | |
| "loss": 0.1692, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 38.84, | |
| "learning_rate": 8.559673257059581e-06, | |
| "loss": 0.1796, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 38.93, | |
| "learning_rate": 1.719945224326892e-06, | |
| "loss": 0.149, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 39.02, | |
| "learning_rate": 1.0792048977779616e-07, | |
| "loss": 0.1672, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 39.11, | |
| "learning_rate": 3.849603540845817e-06, | |
| "loss": 0.1322, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "learning_rate": 1.2652524389394578e-05, | |
| "loss": 0.134, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 39.29, | |
| "learning_rate": 2.5828599592490815e-05, | |
| "loss": 0.1382, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 39.38, | |
| "learning_rate": 4.234791653975496e-05, | |
| "loss": 0.131, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 39.46, | |
| "learning_rate": 6.0919236939313415e-05, | |
| "loss": 0.1396, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 39.55, | |
| "learning_rate": 8.009092691870546e-05, | |
| "loss": 0.1475, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 39.64, | |
| "learning_rate": 9.836442450346518e-05, | |
| "loss": 0.1374, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 39.73, | |
| "learning_rate": 0.00011431137524750674, | |
| "loss": 0.1528, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 39.82, | |
| "learning_rate": 0.0001266852800670599, | |
| "loss": 0.1468, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 39.91, | |
| "learning_rate": 0.00013451892828543368, | |
| "loss": 0.1589, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 0.153, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 40.09, | |
| "learning_rate": 0.00013451892828543387, | |
| "loss": 0.1516, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 40.18, | |
| "learning_rate": 0.00012668528006706025, | |
| "loss": 0.1432, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 40.27, | |
| "learning_rate": 0.00011431137524750726, | |
| "loss": 0.1714, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 40.36, | |
| "learning_rate": 9.836442450346578e-05, | |
| "loss": 0.1401, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 40.45, | |
| "learning_rate": 8.009092691870612e-05, | |
| "loss": 0.1529, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 40.54, | |
| "learning_rate": 6.0919236939314086e-05, | |
| "loss": 0.1557, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 40.62, | |
| "learning_rate": 4.234791653975558e-05, | |
| "loss": 0.1562, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 40.71, | |
| "learning_rate": 2.582859959249134e-05, | |
| "loss": 0.1449, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 40.8, | |
| "learning_rate": 1.2652524389394402e-05, | |
| "loss": 0.1595, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 40.89, | |
| "learning_rate": 3.849603540846038e-06, | |
| "loss": 0.1353, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 40.98, | |
| "learning_rate": 1.0792048977783424e-07, | |
| "loss": 0.1344, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 41.07, | |
| "learning_rate": 1.7199452243265265e-06, | |
| "loss": 0.1224, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 41.16, | |
| "learning_rate": 8.559673257059253e-06, | |
| "loss": 0.1157, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 41.25, | |
| "learning_rate": 2.0092474810601945e-05, | |
| "loss": 0.1231, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 41.34, | |
| "learning_rate": 3.541688434458049e-05, | |
| "loss": 0.1233, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 41.43, | |
| "learning_rate": 5.3335063930596016e-05, | |
| "loss": 0.1129, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 41.52, | |
| "learning_rate": 7.244643268047171e-05, | |
| "loss": 0.1297, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 41.61, | |
| "learning_rate": 9.125714365012407e-05, | |
| "loss": 0.1117, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 41.7, | |
| "learning_rate": 0.00010829685091793371, | |
| "loss": 0.117, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 41.79, | |
| "learning_rate": 0.00012223363969730686, | |
| "loss": 0.1357, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 41.88, | |
| "learning_rate": 0.00013197813593027397, | |
| "loss": 0.1191, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 41.96, | |
| "learning_rate": 0.00013676865759867647, | |
| "loss": 0.1514, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 42.05, | |
| "learning_rate": 0.00013623075187035104, | |
| "loss": 0.1221, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 42.14, | |
| "learning_rate": 0.00013040646433810557, | |
| "loss": 0.1356, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 42.23, | |
| "learning_rate": 0.0001197510525109864, | |
| "loss": 0.1469, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 42.32, | |
| "learning_rate": 0.00010509740044895266, | |
| "loss": 0.1364, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 42.41, | |
| "learning_rate": 8.759091608374603e-05, | |
| "loss": 0.146, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 42.5, | |
| "learning_rate": 6.860000000000023e-05, | |
| "loss": 0.1439, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 42.59, | |
| "learning_rate": 4.960908391625629e-05, | |
| "loss": 0.1339, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 42.68, | |
| "learning_rate": 3.210259955104775e-05, | |
| "loss": 0.1282, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 42.77, | |
| "learning_rate": 1.744894748901522e-05, | |
| "loss": 0.1341, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "learning_rate": 6.793535661894625e-06, | |
| "loss": 0.1195, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 42.95, | |
| "learning_rate": 9.69248129649041e-07, | |
| "loss": 0.128, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 43.04, | |
| "learning_rate": 4.3134240132345835e-07, | |
| "loss": 0.1227, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 43.12, | |
| "learning_rate": 5.221864069725836e-06, | |
| "loss": 0.1054, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 43.21, | |
| "learning_rate": 1.4966360302692852e-05, | |
| "loss": 0.113, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 43.3, | |
| "learning_rate": 2.8903149082065897e-05, | |
| "loss": 0.1135, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 43.39, | |
| "learning_rate": 4.594285634987549e-05, | |
| "loss": 0.0998, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 43.48, | |
| "learning_rate": 6.475356731952782e-05, | |
| "loss": 0.1042, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 43.57, | |
| "learning_rate": 8.386493606940162e-05, | |
| "loss": 0.1103, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 43.66, | |
| "learning_rate": 0.0001017831156554191, | |
| "loss": 0.0937, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 43.75, | |
| "learning_rate": 0.00011710752518939633, | |
| "loss": 0.1037, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 43.84, | |
| "learning_rate": 0.0001286403267429405, | |
| "loss": 0.1108, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 43.93, | |
| "learning_rate": 0.00013548005477567295, | |
| "loss": 0.1088, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "learning_rate": 0.0001370920795102222, | |
| "loss": 0.132, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 44.11, | |
| "learning_rate": 0.00013335039645915412, | |
| "loss": 0.1115, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 44.2, | |
| "learning_rate": 0.00012454747561060588, | |
| "loss": 0.1322, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 44.29, | |
| "learning_rate": 0.00011137140040750903, | |
| "loss": 0.1218, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 44.38, | |
| "learning_rate": 9.485208346024576e-05, | |
| "loss": 0.1266, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 44.46, | |
| "learning_rate": 7.628076306068638e-05, | |
| "loss": 0.1337, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 44.55, | |
| "learning_rate": 5.7109073081295306e-05, | |
| "loss": 0.1423, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 44.64, | |
| "learning_rate": 3.883557549653465e-05, | |
| "loss": 0.1254, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 44.73, | |
| "learning_rate": 2.2888624752493833e-05, | |
| "loss": 0.1102, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 44.82, | |
| "learning_rate": 1.0514719932940006e-05, | |
| "loss": 0.1057, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 44.91, | |
| "learning_rate": 2.6810717145665407e-06, | |
| "loss": 0.1127, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 45.09, | |
| "learning_rate": 2.6810717145659236e-06, | |
| "loss": 0.0923, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 45.18, | |
| "learning_rate": 1.0514719932939862e-05, | |
| "loss": 0.1096, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 45.27, | |
| "learning_rate": 2.288862475249218e-05, | |
| "loss": 0.1037, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 45.36, | |
| "learning_rate": 3.88355754965344e-05, | |
| "loss": 0.0967, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 45.45, | |
| "learning_rate": 5.7109073081295035e-05, | |
| "loss": 0.0873, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 45.54, | |
| "learning_rate": 7.628076306068611e-05, | |
| "loss": 0.1035, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "learning_rate": 9.485208346024552e-05, | |
| "loss": 0.0923, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 45.71, | |
| "learning_rate": 0.00011137140040750883, | |
| "loss": 0.0814, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 45.8, | |
| "learning_rate": 0.0001245474756106057, | |
| "loss": 0.0889, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 45.89, | |
| "learning_rate": 0.00013335039645915404, | |
| "loss": 0.1126, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 45.98, | |
| "learning_rate": 0.00013709207951022217, | |
| "loss": 0.1126, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 46.07, | |
| "learning_rate": 0.00013548005477567344, | |
| "loss": 0.115, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 46.16, | |
| "learning_rate": 0.00012864032674294066, | |
| "loss": 0.1107, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 46.25, | |
| "learning_rate": 0.00011710752518939791, | |
| "loss": 0.1185, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 46.34, | |
| "learning_rate": 0.00010178311565541935, | |
| "loss": 0.0988, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 46.43, | |
| "learning_rate": 8.386493606940379e-05, | |
| "loss": 0.106, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 46.52, | |
| "learning_rate": 6.475356731952809e-05, | |
| "loss": 0.1046, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 46.61, | |
| "learning_rate": 4.594285634987574e-05, | |
| "loss": 0.1119, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 46.7, | |
| "learning_rate": 2.890314908206612e-05, | |
| "loss": 0.1081, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 46.79, | |
| "learning_rate": 1.496636030269302e-05, | |
| "loss": 0.0968, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 46.88, | |
| "learning_rate": 5.221864069725935e-06, | |
| "loss": 0.1116, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 46.96, | |
| "learning_rate": 4.3134240132348884e-07, | |
| "loss": 0.1083, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 47.05, | |
| "learning_rate": 9.692481296489953e-07, | |
| "loss": 0.0932, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 47.14, | |
| "learning_rate": 6.793535661894512e-06, | |
| "loss": 0.0854, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 47.23, | |
| "learning_rate": 1.7448947489013744e-05, | |
| "loss": 0.0843, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 47.32, | |
| "learning_rate": 3.210259955104752e-05, | |
| "loss": 0.0891, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 47.41, | |
| "learning_rate": 4.960908391625416e-05, | |
| "loss": 0.0791, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 47.5, | |
| "learning_rate": 6.859999999999997e-05, | |
| "loss": 0.09, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 47.59, | |
| "learning_rate": 8.75909160837439e-05, | |
| "loss": 0.0897, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 47.68, | |
| "learning_rate": 0.00010509740044895241, | |
| "loss": 0.0856, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 47.77, | |
| "learning_rate": 0.00011975105251098491, | |
| "loss": 0.0932, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 47.86, | |
| "learning_rate": 0.00013040646433810547, | |
| "loss": 0.0998, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 47.95, | |
| "learning_rate": 0.000136230751870351, | |
| "loss": 0.1009, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 48.04, | |
| "learning_rate": 0.00013676865759867652, | |
| "loss": 0.0975, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 48.12, | |
| "learning_rate": 0.00013197813593027408, | |
| "loss": 0.1072, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 48.21, | |
| "learning_rate": 0.00012223363969730703, | |
| "loss": 0.105, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 48.3, | |
| "learning_rate": 0.00010829685091793393, | |
| "loss": 0.102, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "learning_rate": 9.125714365012433e-05, | |
| "loss": 0.0928, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 48.48, | |
| "learning_rate": 7.244643268047198e-05, | |
| "loss": 0.117, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 48.57, | |
| "learning_rate": 5.333506393059818e-05, | |
| "loss": 0.0985, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 48.66, | |
| "learning_rate": 3.541688434458072e-05, | |
| "loss": 0.1006, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 48.75, | |
| "learning_rate": 2.0092474810603514e-05, | |
| "loss": 0.0912, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 48.84, | |
| "learning_rate": 8.559673257059383e-06, | |
| "loss": 0.0824, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 48.93, | |
| "learning_rate": 1.7199452243270216e-06, | |
| "loss": 0.0914, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 49.02, | |
| "learning_rate": 1.07920489777819e-07, | |
| "loss": 0.103, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 49.11, | |
| "learning_rate": 3.849603540845946e-06, | |
| "loss": 0.0818, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 49.2, | |
| "learning_rate": 1.265252438939425e-05, | |
| "loss": 0.083, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 49.29, | |
| "learning_rate": 2.5828599592491126e-05, | |
| "loss": 0.0802, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 49.38, | |
| "learning_rate": 4.2347916539754424e-05, | |
| "loss": 0.0776, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 49.46, | |
| "learning_rate": 6.091923693931382e-05, | |
| "loss": 0.0897, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 49.55, | |
| "learning_rate": 8.00909269187049e-05, | |
| "loss": 0.0723, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 49.64, | |
| "learning_rate": 9.836442450346554e-05, | |
| "loss": 0.0913, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 49.73, | |
| "learning_rate": 0.00011431137524750631, | |
| "loss": 0.0869, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 49.82, | |
| "learning_rate": 0.00012668528006706012, | |
| "loss": 0.0839, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "learning_rate": 0.00013451892828543352, | |
| "loss": 0.0867, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 0.0961, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 50.09, | |
| "learning_rate": 0.00013451892828543404, | |
| "loss": 0.0873, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 50.18, | |
| "learning_rate": 0.00012668528006706004, | |
| "loss": 0.0834, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 50.27, | |
| "learning_rate": 0.00011431137524750768, | |
| "loss": 0.0971, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 50.36, | |
| "learning_rate": 9.836442450346542e-05, | |
| "loss": 0.0968, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 50.45, | |
| "learning_rate": 8.009092691870668e-05, | |
| "loss": 0.0985, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 50.54, | |
| "learning_rate": 6.0919236939313686e-05, | |
| "loss": 0.092, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 50.62, | |
| "learning_rate": 4.2347916539756105e-05, | |
| "loss": 0.0898, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 50.71, | |
| "learning_rate": 2.582859959249102e-05, | |
| "loss": 0.0918, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 50.8, | |
| "learning_rate": 1.2652524389395302e-05, | |
| "loss": 0.0936, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 50.89, | |
| "learning_rate": 3.849603540845908e-06, | |
| "loss": 0.0775, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 50.98, | |
| "learning_rate": 1.0792048977781139e-07, | |
| "loss": 0.086, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 51.07, | |
| "learning_rate": 1.719945224326618e-06, | |
| "loss": 0.071, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 51.16, | |
| "learning_rate": 8.559673257059451e-06, | |
| "loss": 0.0789, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 51.25, | |
| "learning_rate": 2.0092474810602236e-05, | |
| "loss": 0.0844, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 51.34, | |
| "learning_rate": 3.541688434458084e-05, | |
| "loss": 0.0717, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 51.43, | |
| "learning_rate": 5.333506393059641e-05, | |
| "loss": 0.0714, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 51.52, | |
| "learning_rate": 7.244643268047212e-05, | |
| "loss": 0.0735, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 51.61, | |
| "learning_rate": 9.125714365012261e-05, | |
| "loss": 0.0773, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 51.7, | |
| "learning_rate": 0.00010829685091793405, | |
| "loss": 0.0718, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 51.79, | |
| "learning_rate": 0.0001222336396973059, | |
| "loss": 0.0782, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 51.88, | |
| "learning_rate": 0.00013197813593027413, | |
| "loss": 0.0685, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 51.96, | |
| "learning_rate": 0.00013676865759867633, | |
| "loss": 0.0792, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 52.05, | |
| "learning_rate": 0.000136230751870351, | |
| "loss": 0.076, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 52.14, | |
| "learning_rate": 0.00013040646433810625, | |
| "loss": 0.0802, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 52.23, | |
| "learning_rate": 0.00011975105251098613, | |
| "loss": 0.0753, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 52.32, | |
| "learning_rate": 0.0001050974004489523, | |
| "loss": 0.0935, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 52.41, | |
| "learning_rate": 8.759091608374565e-05, | |
| "loss": 0.0924, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 52.5, | |
| "learning_rate": 6.859999999999984e-05, | |
| "loss": 0.0846, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 52.59, | |
| "learning_rate": 4.96090839162559e-05, | |
| "loss": 0.0729, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 52.68, | |
| "learning_rate": 3.210259955104741e-05, | |
| "loss": 0.0807, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 52.77, | |
| "learning_rate": 1.7448947489014953e-05, | |
| "loss": 0.0955, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 52.86, | |
| "learning_rate": 6.793535661894451e-06, | |
| "loss": 0.0765, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 52.95, | |
| "learning_rate": 9.692481296493e-07, | |
| "loss": 0.0808, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 53.04, | |
| "learning_rate": 4.3134240132350404e-07, | |
| "loss": 0.0749, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 53.12, | |
| "learning_rate": 5.221864069725242e-06, | |
| "loss": 0.0756, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 53.21, | |
| "learning_rate": 1.4966360302693102e-05, | |
| "loss": 0.0691, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 53.3, | |
| "learning_rate": 2.8903149082064643e-05, | |
| "loss": 0.0656, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 53.39, | |
| "learning_rate": 4.5942856349875866e-05, | |
| "loss": 0.0701, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 53.48, | |
| "learning_rate": 6.475356731952822e-05, | |
| "loss": 0.0626, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 53.57, | |
| "learning_rate": 8.386493606940203e-05, | |
| "loss": 0.066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 53.66, | |
| "learning_rate": 0.00010178311565541946, | |
| "loss": 0.0758, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 53.75, | |
| "learning_rate": 0.00011710752518939663, | |
| "loss": 0.0757, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 53.84, | |
| "learning_rate": 0.00012864032674294072, | |
| "loss": 0.0673, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 53.93, | |
| "learning_rate": 0.00013548005477567304, | |
| "loss": 0.0775, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 54.02, | |
| "learning_rate": 0.00013709207951022217, | |
| "loss": 0.0768, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 54.11, | |
| "learning_rate": 0.00013335039645915464, | |
| "loss": 0.0761, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 54.2, | |
| "learning_rate": 0.00012454747561060564, | |
| "loss": 0.0757, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 54.29, | |
| "learning_rate": 0.00011137140040751024, | |
| "loss": 0.0755, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 54.38, | |
| "learning_rate": 9.48520834602454e-05, | |
| "loss": 0.0803, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 54.46, | |
| "learning_rate": 7.628076306068793e-05, | |
| "loss": 0.0809, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 54.55, | |
| "learning_rate": 5.71090730812949e-05, | |
| "loss": 0.0821, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 54.64, | |
| "learning_rate": 3.8835575496536036e-05, | |
| "loss": 0.0792, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 54.73, | |
| "learning_rate": 2.288862475249353e-05, | |
| "loss": 0.073, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 54.82, | |
| "learning_rate": 1.0514719932939786e-05, | |
| "loss": 0.0754, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 54.91, | |
| "learning_rate": 2.6810717145664263e-06, | |
| "loss": 0.0837, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0772, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 55.09, | |
| "learning_rate": 2.681071714566038e-06, | |
| "loss": 0.0661, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 55.18, | |
| "learning_rate": 1.0514719932940075e-05, | |
| "loss": 0.0746, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 55.27, | |
| "learning_rate": 2.2888624752492478e-05, | |
| "loss": 0.0666, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 55.36, | |
| "learning_rate": 3.883557549653476e-05, | |
| "loss": 0.0662, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 55.45, | |
| "learning_rate": 5.710907308129351e-05, | |
| "loss": 0.0673, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 55.54, | |
| "learning_rate": 7.628076306068652e-05, | |
| "loss": 0.0648, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 55.62, | |
| "learning_rate": 9.485208346024408e-05, | |
| "loss": 0.0616, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 55.71, | |
| "learning_rate": 0.00011137140040750914, | |
| "loss": 0.066, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 55.8, | |
| "learning_rate": 0.00012454747561060483, | |
| "loss": 0.0587, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 55.89, | |
| "learning_rate": 0.00013335039645915415, | |
| "loss": 0.0635, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 55.98, | |
| "learning_rate": 0.0001370920795102222, | |
| "loss": 0.0683, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 56.07, | |
| "learning_rate": 0.00013548005477567333, | |
| "loss": 0.0734, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 56.16, | |
| "learning_rate": 0.00012864032674294044, | |
| "loss": 0.0734, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 56.25, | |
| "learning_rate": 0.00011710752518939762, | |
| "loss": 0.0714, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 56.34, | |
| "learning_rate": 0.00010178311565541898, | |
| "loss": 0.0763, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 56.43, | |
| "learning_rate": 8.38649360694034e-05, | |
| "loss": 0.076, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 56.52, | |
| "learning_rate": 6.47535673195277e-05, | |
| "loss": 0.065, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 56.61, | |
| "learning_rate": 4.594285634987719e-05, | |
| "loss": 0.0772, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 56.7, | |
| "learning_rate": 2.8903149082065792e-05, | |
| "loss": 0.0719, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 56.79, | |
| "learning_rate": 1.4966360302693987e-05, | |
| "loss": 0.0661, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 56.88, | |
| "learning_rate": 5.2218640697257835e-06, | |
| "loss": 0.0754, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 56.96, | |
| "learning_rate": 4.313424013236564e-07, | |
| "loss": 0.0747, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 57.05, | |
| "learning_rate": 9.69248129649064e-07, | |
| "loss": 0.067, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "learning_rate": 6.793535661893841e-06, | |
| "loss": 0.0616, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 57.23, | |
| "learning_rate": 1.744894748901401e-05, | |
| "loss": 0.0606, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 57.32, | |
| "learning_rate": 3.2102599551047865e-05, | |
| "loss": 0.0688, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 57.41, | |
| "learning_rate": 4.9609083916254546e-05, | |
| "loss": 0.0625, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 57.5, | |
| "learning_rate": 6.860000000000037e-05, | |
| "loss": 0.0574, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 57.59, | |
| "learning_rate": 8.75909160837443e-05, | |
| "loss": 0.0555, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 57.68, | |
| "learning_rate": 0.00010509740044895277, | |
| "loss": 0.0597, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 57.77, | |
| "learning_rate": 0.00011975105251098518, | |
| "loss": 0.0566, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 57.86, | |
| "learning_rate": 0.00013040646433810563, | |
| "loss": 0.0627, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 57.95, | |
| "learning_rate": 0.00013623075187035074, | |
| "loss": 0.0736, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 58.04, | |
| "learning_rate": 0.00013676865759867647, | |
| "loss": 0.07, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 58.12, | |
| "learning_rate": 0.00013197813593027467, | |
| "loss": 0.0627, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 58.21, | |
| "learning_rate": 0.00012223363969730676, | |
| "loss": 0.0709, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 58.3, | |
| "learning_rate": 0.0001082968509179352, | |
| "loss": 0.0699, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 58.39, | |
| "learning_rate": 9.125714365012395e-05, | |
| "loss": 0.0791, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 58.48, | |
| "learning_rate": 7.244643268047157e-05, | |
| "loss": 0.0732, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 58.57, | |
| "learning_rate": 5.333506393059779e-05, | |
| "loss": 0.0646, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 58.66, | |
| "learning_rate": 3.541688434458037e-05, | |
| "loss": 0.0675, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 58.75, | |
| "learning_rate": 2.0092474810603233e-05, | |
| "loss": 0.0586, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 58.84, | |
| "learning_rate": 8.559673257059193e-06, | |
| "loss": 0.0643, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 58.93, | |
| "learning_rate": 1.7199452243269301e-06, | |
| "loss": 0.0693, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 59.02, | |
| "learning_rate": 1.0792048977784186e-07, | |
| "loss": 0.0652, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 59.11, | |
| "learning_rate": 3.849603540845436e-06, | |
| "loss": 0.058, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 59.2, | |
| "learning_rate": 1.2652524389394478e-05, | |
| "loss": 0.0575, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 59.29, | |
| "learning_rate": 2.5828599592489917e-05, | |
| "loss": 0.0572, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 59.38, | |
| "learning_rate": 4.234791653975481e-05, | |
| "loss": 0.0532, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 59.46, | |
| "learning_rate": 6.0919236939312284e-05, | |
| "loss": 0.057, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 59.55, | |
| "learning_rate": 8.00909269187053e-05, | |
| "loss": 0.0541, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 59.64, | |
| "learning_rate": 9.836442450346415e-05, | |
| "loss": 0.0572, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 59.73, | |
| "learning_rate": 0.00011431137524750662, | |
| "loss": 0.0581, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 59.82, | |
| "learning_rate": 0.00012668528006705928, | |
| "loss": 0.0531, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 59.91, | |
| "learning_rate": 0.00013451892828543363, | |
| "loss": 0.0639, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 0.0604, | |
| "step": 3360 | |
| } | |
| ], | |
| "max_steps": 3360, | |
| "num_train_epochs": 60, | |
| "total_flos": 3472571105280000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |