| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999547121960056, | |
| "eval_steps": 500, | |
| "global_step": 5520, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.2849, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 1.3474, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 1.3589, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019954719225730847, | |
| "loss": 1.3078, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019679487013963564, | |
| "loss": 1.2688, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019161084574320696, | |
| "loss": 1.3346, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018412535328311814, | |
| "loss": 1.2897, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001745264449675755, | |
| "loss": 1.2717, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00016305526670845226, | |
| "loss": 1.2734, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 1.2862, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00013568862215918717, | |
| "loss": 1.2708, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00012048066680651908, | |
| "loss": 1.2435, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00010475819158237425, | |
| "loss": 1.2824, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.891618000989891e-05, | |
| "loss": 1.2778, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.335261863099651e-05, | |
| "loss": 1.2511, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.845849869981137e-05, | |
| "loss": 1.2728, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.4607993613388976e-05, | |
| "loss": 1.2866, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.21490588442868e-05, | |
| "loss": 1.2377, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.139469052572127e-05, | |
| "loss": 1.2673, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2615062293021507e-05, | |
| "loss": 1.2778, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.030737921409169e-06, | |
| "loss": 1.2666, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.804347826086957e-05, | |
| "loss": 1.2379, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.985507246376812e-05, | |
| "loss": 1.2613, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 1.2182, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 1.238, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.528985507246377e-05, | |
| "loss": 1.2098, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.710144927536232e-05, | |
| "loss": 1.2186, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.891304347826087e-05, | |
| "loss": 1.2178, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.072463768115943e-05, | |
| "loss": 1.2473, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.2536231884057975e-05, | |
| "loss": 1.2573, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.4347826086956524e-05, | |
| "loss": 1.2302, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.615942028985508e-05, | |
| "loss": 1.2523, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.797101449275363e-05, | |
| "loss": 1.3071, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.9782608695652175e-05, | |
| "loss": 1.2152, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.159420289855072e-05, | |
| "loss": 1.2359, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.340579710144928e-05, | |
| "loss": 1.2098, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.521739130434783e-05, | |
| "loss": 1.2444, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.702898550724638e-05, | |
| "loss": 1.2492, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.884057971014493e-05, | |
| "loss": 1.1941, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.065217391304349e-05, | |
| "loss": 1.2096, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.246376811594203e-05, | |
| "loss": 1.2363, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.427536231884058e-05, | |
| "loss": 1.2613, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.608695652173914e-05, | |
| "loss": 1.2459, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.789855072463769e-05, | |
| "loss": 1.2951, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.971014492753623e-05, | |
| "loss": 1.2114, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.152173913043478e-05, | |
| "loss": 1.2315, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 1.2311, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.514492753623189e-05, | |
| "loss": 1.2155, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 1.2456, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.876811594202898e-05, | |
| "loss": 1.2576, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.057971014492754e-05, | |
| "loss": 1.1968, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.239130434782609e-05, | |
| "loss": 1.361, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.420289855072463e-05, | |
| "loss": 1.2729, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.60144927536232e-05, | |
| "loss": 1.2613, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.782608695652174e-05, | |
| "loss": 1.2513, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.96376811594203e-05, | |
| "loss": 1.2838, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00010144927536231885, | |
| "loss": 1.216, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00010326086956521738, | |
| "loss": 1.2206, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00010507246376811595, | |
| "loss": 1.2322, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001068840579710145, | |
| "loss": 1.2467, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00010869565217391305, | |
| "loss": 1.2177, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001105072463768116, | |
| "loss": 1.2339, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011231884057971016, | |
| "loss": 1.2207, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001141304347826087, | |
| "loss": 1.2125, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011594202898550725, | |
| "loss": 1.2331, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001177536231884058, | |
| "loss": 1.2279, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011956521739130435, | |
| "loss": 1.2772, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001213768115942029, | |
| "loss": 1.2357, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00012318840579710145, | |
| "loss": 1.2611, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000125, | |
| "loss": 1.2308, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00012681159420289856, | |
| "loss": 1.2168, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001286231884057971, | |
| "loss": 1.1828, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013043478260869567, | |
| "loss": 1.2666, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013224637681159422, | |
| "loss": 1.2298, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013405797101449275, | |
| "loss": 1.2249, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001358695652173913, | |
| "loss": 1.2445, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013768115942028986, | |
| "loss": 1.2371, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013949275362318842, | |
| "loss": 1.2005, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014130434782608697, | |
| "loss": 1.2625, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001431159420289855, | |
| "loss": 1.226, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014492753623188405, | |
| "loss": 1.2484, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014673913043478264, | |
| "loss": 1.2482, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014855072463768116, | |
| "loss": 1.209, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00015036231884057972, | |
| "loss": 1.2102, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00015217391304347827, | |
| "loss": 1.2347, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001539855072463768, | |
| "loss": 1.2134, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00015579710144927538, | |
| "loss": 1.2436, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001576086956521739, | |
| "loss": 1.2616, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00015942028985507247, | |
| "loss": 1.2392, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00016123188405797102, | |
| "loss": 1.2507, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00016304347826086955, | |
| "loss": 1.2315, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00016485507246376813, | |
| "loss": 1.2151, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 1.168, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00016847826086956522, | |
| "loss": 1.2123, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017028985507246377, | |
| "loss": 1.2129, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017210144927536233, | |
| "loss": 1.201, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017391304347826088, | |
| "loss": 1.1735, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017572463768115944, | |
| "loss": 1.2197, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017753623188405796, | |
| "loss": 1.2151, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017934782608695652, | |
| "loss": 1.2171, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018115942028985507, | |
| "loss": 1.2377, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018297101449275363, | |
| "loss": 1.2137, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018478260869565218, | |
| "loss": 1.2363, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018659420289855074, | |
| "loss": 1.2076, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018840579710144927, | |
| "loss": 1.2246, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019021739130434782, | |
| "loss": 1.2535, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001920289855072464, | |
| "loss": 1.2652, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019384057971014493, | |
| "loss": 1.2043, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001956521739130435, | |
| "loss": 1.2072, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019746376811594204, | |
| "loss": 1.2167, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001992753623188406, | |
| "loss": 1.2739, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019999982005120014, | |
| "loss": 1.2288, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019999872036643513, | |
| "loss": 1.2974, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019999662097944096, | |
| "loss": 1.2527, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019999352191120556, | |
| "loss": 1.2544, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019998942319271077, | |
| "loss": 1.2211, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019998432486493205, | |
| "loss": 1.2673, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019997822697883822, | |
| "loss": 1.2358, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001999711295953907, | |
| "loss": 1.1941, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001999630327855431, | |
| "loss": 1.2176, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019995393663024054, | |
| "loss": 1.2103, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019994384122041853, | |
| "loss": 1.186, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019993274665700244, | |
| "loss": 1.2305, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001999206530509063, | |
| "loss": 1.2734, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019990756052303173, | |
| "loss": 1.2792, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019989346920426667, | |
| "loss": 1.2577, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001998783792354841, | |
| "loss": 1.2192, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001998622907675408, | |
| "loss": 1.2149, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019984520396127553, | |
| "loss": 1.2541, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001998271189875077, | |
| "loss": 1.1841, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001998080360270355, | |
| "loss": 1.2418, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001997879552706341, | |
| "loss": 1.1846, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019976687691905393, | |
| "loss": 1.2417, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019974480118301838, | |
| "loss": 1.2281, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001997217282832219, | |
| "loss": 1.2, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019969765845032775, | |
| "loss": 1.2256, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001996725919249657, | |
| "loss": 1.2207, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019964652895772947, | |
| "loss": 1.2042, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019961946980917456, | |
| "loss": 1.2317, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001995914147498153, | |
| "loss": 1.2431, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019956236406012232, | |
| "loss": 1.2345, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019953231803051974, | |
| "loss": 1.2213, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019950127696138225, | |
| "loss": 1.2243, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019946924116303206, | |
| "loss": 1.2061, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019943621095573586, | |
| "loss": 1.2563, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019940218666970161, | |
| "loss": 1.2556, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001993671686450752, | |
| "loss": 1.353, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019933115723193707, | |
| "loss": 1.4143, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019929415279029873, | |
| "loss": 1.2836, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019925615569009916, | |
| "loss": 1.3538, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019921716631120107, | |
| "loss": 1.3211, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019917718504338714, | |
| "loss": 1.2648, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001991362122863561, | |
| "loss": 1.2779, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019909424844971873, | |
| "loss": 1.2609, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001990512939529939, | |
| "loss": 1.2179, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019900734922560407, | |
| "loss": 1.2384, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001989624147068713, | |
| "loss": 1.2196, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019891649084601278, | |
| "loss": 1.2582, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019886957810213619, | |
| "loss": 1.2313, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001988216769442353, | |
| "loss": 1.2451, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019877278785118517, | |
| "loss": 1.2274, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019872291131173742, | |
| "loss": 1.2455, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001986720478245153, | |
| "loss": 1.215, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001986201978980087, | |
| "loss": 1.2123, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001985673620505692, | |
| "loss": 1.2027, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019851354081040467, | |
| "loss": 1.2324, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001984587347155741, | |
| "loss": 1.245, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019840294431398226, | |
| "loss": 1.2135, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001983461701633742, | |
| "loss": 1.1822, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019828841283132964, | |
| "loss": 1.2285, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001982296728952573, | |
| "loss": 1.237, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019816995094238912, | |
| "loss": 1.2098, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019810924756977444, | |
| "loss": 1.1901, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000198047563384274, | |
| "loss": 1.2362, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019798489900255389, | |
| "loss": 1.2439, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019792125505107931, | |
| "loss": 1.2188, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019785663216610844, | |
| "loss": 1.2794, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019779103099368595, | |
| "loss": 1.2214, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001977244521896366, | |
| "loss": 1.2287, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001976568964195587, | |
| "loss": 1.2682, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019758836435881746, | |
| "loss": 1.2253, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019751885669253816, | |
| "loss": 1.2026, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019744837411559942, | |
| "loss": 1.2285, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001973769173326261, | |
| "loss": 1.2431, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019730448705798239, | |
| "loss": 1.2365, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019723108401576466, | |
| "loss": 1.2019, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019715670893979414, | |
| "loss": 1.2133, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019708136257360966, | |
| "loss": 1.2687, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019700504567046013, | |
| "loss": 1.2272, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019692775899329707, | |
| "loss": 1.2396, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019684950331476706, | |
| "loss": 1.2365, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019677027941720384, | |
| "loss": 1.1987, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019669008809262062, | |
| "loss": 1.2087, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019660893014270212, | |
| "loss": 1.2339, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019652680637879654, | |
| "loss": 1.2094, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001964437176219075, | |
| "loss": 1.2445, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019635966470268583, | |
| "loss": 1.2229, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019627464846142111, | |
| "loss": 1.194, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001961886697480335, | |
| "loss": 1.2036, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019610172942206516, | |
| "loss": 1.2269, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001960138283526715, | |
| "loss": 1.2169, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019592496741861282, | |
| "loss": 1.2136, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019583514750824512, | |
| "loss": 1.2027, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019574436951951162, | |
| "loss": 1.2836, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001956526343599335, | |
| "loss": 1.1955, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019555994294660086, | |
| "loss": 1.2464, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019546629620616375, | |
| "loss": 1.2026, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001953716950748227, | |
| "loss": 1.2482, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001952761404983194, | |
| "loss": 1.2133, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019517963343192732, | |
| "loss": 1.2042, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001950821748404421, | |
| "loss": 1.2268, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019498376569817194, | |
| "loss": 1.2147, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001948844069889278, | |
| "loss": 1.2038, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019478409970601358, | |
| "loss": 1.2155, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001946828448522163, | |
| "loss": 1.2446, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019458064343979596, | |
| "loss": 1.3413, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019447749649047542, | |
| "loss": 3.8626, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001943734050354302, | |
| "loss": 6.3609, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019426837011527823, | |
| "loss": 5.2091, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001941623927800694, | |
| "loss": 1.9048, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019405547408927502, | |
| "loss": 1.3353, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019394761511177733, | |
| "loss": 1.586, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001938388169258587, | |
| "loss": 1.6837, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019372908061919097, | |
| "loss": 1.4677, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019361840728882447, | |
| "loss": 1.4636, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019350679804117711, | |
| "loss": 1.8272, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019339425399202327, | |
| "loss": 2.1575, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019328077626648278, | |
| "loss": 1.8073, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019316636599900946, | |
| "loss": 1.5317, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019305102433337998, | |
| "loss": 1.4068, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019293475242268223, | |
| "loss": 1.9292, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019281755142930407, | |
| "loss": 2.3396, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019269942252492133, | |
| "loss": 1.7311, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001925803668904865, | |
| "loss": 1.5029, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019246038571621657, | |
| "loss": 1.5402, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001923394802015814, | |
| "loss": 1.518, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019221765155529158, | |
| "loss": 1.4632, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019209490099528643, | |
| "loss": 1.3406, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019197122974872163, | |
| "loss": 1.3142, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001918466390519573, | |
| "loss": 1.3173, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019172113015054532, | |
| "loss": 1.2899, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019159470429921702, | |
| "loss": 1.2821, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019146736276187066, | |
| "loss": 1.3015, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019133910681155868, | |
| "loss": 1.2785, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019120993773047513, | |
| "loss": 1.2912, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019107985680994266, | |
| "loss": 1.2846, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019094886535039982, | |
| "loss": 1.2638, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001908169646613879, | |
| "loss": 1.2445, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019068415606153787, | |
| "loss": 1.262, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019055044087855726, | |
| "loss": 1.2625, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019041582044921688, | |
| "loss": 1.2291, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019028029611933739, | |
| "loss": 1.2873, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019014386924377582, | |
| "loss": 1.2883, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019000654118641211, | |
| "loss": 1.3117, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001898683133201356, | |
| "loss": 1.2681, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018972918702683092, | |
| "loss": 1.238, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001895891636973646, | |
| "loss": 1.2715, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018944824473157086, | |
| "loss": 1.2795, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018930643153823777, | |
| "loss": 1.255, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018916372553509314, | |
| "loss": 1.2555, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001890201281487903, | |
| "loss": 1.2326, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018887564081489393, | |
| "loss": 1.2387, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001887302649778656, | |
| "loss": 1.2303, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018858400209104933, | |
| "loss": 1.2334, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018843685361665723, | |
| "loss": 1.241, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018828882102575473, | |
| "loss": 1.2329, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001881399057982458, | |
| "loss": 1.244, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001879901094228584, | |
| "loss": 1.1944, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018783943339712938, | |
| "loss": 1.2184, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001876878792273896, | |
| "loss": 1.2478, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018753544842874887, | |
| "loss": 1.2352, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018738214252508073, | |
| "loss": 1.2322, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001872279630490074, | |
| "loss": 1.2599, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018707291154188425, | |
| "loss": 1.2495, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018691698955378445, | |
| "loss": 1.2369, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001867601986434836, | |
| "loss": 1.2732, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018660254037844388, | |
| "loss": 1.3559, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018644401633479874, | |
| "loss": 1.9588, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018628462809733683, | |
| "loss": 2.5337, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018612437725948631, | |
| "loss": 3.5829, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018596326542329888, | |
| "loss": 2.7139, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018580129419943373, | |
| "loss": 1.935, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018563846520714154, | |
| "loss": 1.5826, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018547478007424823, | |
| "loss": 1.4232, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018531024043713868, | |
| "loss": 1.4101, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018514484794074026, | |
| "loss": 1.4018, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001849786042385067, | |
| "loss": 1.3299, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018481151099240123, | |
| "loss": 1.306, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018464356987288013, | |
| "loss": 1.3258, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018447478255887595, | |
| "loss": 1.3129, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018430515073778093, | |
| "loss": 1.3296, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001841346761054298, | |
| "loss": 1.2667, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018396336036608307, | |
| "loss": 1.2641, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001837912052324099, | |
| "loss": 1.2361, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001836182124254711, | |
| "loss": 1.3251, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018344438367470168, | |
| "loss": 1.3017, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001832697207178938, | |
| "loss": 1.2847, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018309422530117924, | |
| "loss": 1.3278, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001829178991790121, | |
| "loss": 1.2803, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018274074411415105, | |
| "loss": 1.3346, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018256276187764197, | |
| "loss": 1.2782, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018238395424879992, | |
| "loss": 1.3485, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018220432301519168, | |
| "loss": 1.3187, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001820238699726177, | |
| "loss": 1.2878, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018184259692509406, | |
| "loss": 1.3199, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018166050568483474, | |
| "loss": 1.2897, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001814775980722332, | |
| "loss": 1.3277, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001812938759158443, | |
| "loss": 1.3135, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000181109341052366, | |
| "loss": 1.3059, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018092399532662113, | |
| "loss": 1.342, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018073784059153872, | |
| "loss": 1.3264, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018055087870813558, | |
| "loss": 1.3069, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018036311154549784, | |
| "loss": 1.3145, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018017454098076194, | |
| "loss": 1.313, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00017998516889909614, | |
| "loss": 1.2827, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017979499719368168, | |
| "loss": 1.2697, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001796040277656936, | |
| "loss": 1.3068, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001794122625242819, | |
| "loss": 1.2713, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017921970338655266, | |
| "loss": 1.2812, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017902635227754838, | |
| "loss": 1.2795, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017883221113022916, | |
| "loss": 1.2631, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017863728188545326, | |
| "loss": 1.2359, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017844156649195759, | |
| "loss": 1.272, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017824506690633832, | |
| "loss": 1.2845, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017804778509303138, | |
| "loss": 1.2616, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017784972302429264, | |
| "loss": 1.2332, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001776508826801784, | |
| "loss": 1.2521, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017745126604852538, | |
| "loss": 1.2938, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001772508751249311, | |
| "loss": 1.2933, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017704971191273368, | |
| "loss": 1.2702, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017684777842299205, | |
| "loss": 1.2851, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017664507667446554, | |
| "loss": 1.2907, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017644160869359404, | |
| "loss": 1.2637, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001762373765144775, | |
| "loss": 1.2571, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017603238217885569, | |
| "loss": 1.2194, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017582662773608777, | |
| "loss": 1.2755, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017562011524313185, | |
| "loss": 1.2936, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001754128467645243, | |
| "loss": 1.2564, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001752048243723593, | |
| "loss": 1.2177, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017499605014626788, | |
| "loss": 1.3018, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017478652617339738, | |
| "loss": 1.217, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017457625454839039, | |
| "loss": 1.2899, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017436523737336402, | |
| "loss": 1.2608, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017415347675788856, | |
| "loss": 1.2849, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017394097481896676, | |
| "loss": 1.3029, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001737277336810124, | |
| "loss": 1.2907, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001735137554758292, | |
| "loss": 1.308, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001732990423425894, | |
| "loss": 1.3432, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017308359642781242, | |
| "loss": 1.3239, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017286741988534348, | |
| "loss": 1.2793, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001726505148763319, | |
| "loss": 1.342, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001724328835692097, | |
| "loss": 1.2451, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001722145281396697, | |
| "loss": 1.2804, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017199545077064394, | |
| "loss": 1.3129, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017177565365228178, | |
| "loss": 1.2582, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017155513898192806, | |
| "loss": 1.2644, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017133390896410106, | |
| "loss": 1.2875, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001711119658104705, | |
| "loss": 1.3237, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017088931173983539, | |
| "loss": 1.3011, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00017066594897810196, | |
| "loss": 1.3125, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00017044187975826124, | |
| "loss": 1.35, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00017021710632036694, | |
| "loss": 1.4059, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016999163091151287, | |
| "loss": 1.4471, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016976545578581057, | |
| "loss": 1.3584, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016953858320436672, | |
| "loss": 1.3308, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001693110154352606, | |
| "loss": 1.3613, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001690827547535214, | |
| "loss": 1.3879, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016885380344110545, | |
| "loss": 1.3412, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001686241637868734, | |
| "loss": 1.3521, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016839383808656732, | |
| "loss": 1.3658, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016816282864278793, | |
| "loss": 1.3324, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016793113776497127, | |
| "loss": 1.3101, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001676987677693659, | |
| "loss": 1.3923, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001674657209790095, | |
| "loss": 1.3177, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016723199972370594, | |
| "loss": 1.2952, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016699760634000165, | |
| "loss": 1.3095, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016676254317116252, | |
| "loss": 1.3412, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016652681256715032, | |
| "loss": 1.2945, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016629041688459941, | |
| "loss": 1.3498, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001660533584867928, | |
| "loss": 1.38, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016581563974363902, | |
| "loss": 1.3754, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016557726303164803, | |
| "loss": 1.357, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016533823073390757, | |
| "loss": 1.3054, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016509854524005948, | |
| "loss": 1.3338, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001648582089462756, | |
| "loss": 1.2822, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016461722425523402, | |
| "loss": 1.3557, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016437559357609488, | |
| "loss": 1.3282, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016413331932447638, | |
| "loss": 1.3206, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016389040392243056, | |
| "loss": 1.2873, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016364684979841925, | |
| "loss": 1.3082, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016340265938728958, | |
| "loss": 1.3155, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016315783513024977, | |
| "loss": 1.3231, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001629123794748447, | |
| "loss": 1.3236, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016266629487493144, | |
| "loss": 1.3079, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001624195837906547, | |
| "loss": 1.3281, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001621722486884222, | |
| "loss": 1.3313, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001619242920408802, | |
| "loss": 1.3004, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001616757163268885, | |
| "loss": 1.3087, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016142652403149582, | |
| "loss": 1.3004, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016117671764591504, | |
| "loss": 1.3005, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001609262996674981, | |
| "loss": 1.3026, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016067527259971113, | |
| "loss": 1.3331, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016042363895210946, | |
| "loss": 1.2832, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016017140124031245, | |
| "loss": 1.3191, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001599185619859784, | |
| "loss": 1.3237, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015966512371677928, | |
| "loss": 1.2876, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001594110889663756, | |
| "loss": 1.307, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015915646027439087, | |
| "loss": 1.3195, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015890124018638638, | |
| "loss": 1.2279, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015864543125383574, | |
| "loss": 1.2658, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015838903603409925, | |
| "loss": 1.3215, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001581320570903984, | |
| "loss": 1.2436, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015787449699179035, | |
| "loss": 1.3196, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.000157616358313142, | |
| "loss": 1.3022, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001573576436351046, | |
| "loss": 1.3023, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015709835554408765, | |
| "loss": 1.2972, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015683849663223308, | |
| "loss": 1.2841, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015657806949738947, | |
| "loss": 1.2875, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015631707674308606, | |
| "loss": 1.2808, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001560555209785066, | |
| "loss": 1.2274, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015579340481846336, | |
| "loss": 1.2724, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015553073088337094, | |
| "loss": 1.2879, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015526750179922013, | |
| "loss": 1.3452, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015500372019755168, | |
| "loss": 1.3314, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015473938871542986, | |
| "loss": 1.3538, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00015447450999541616, | |
| "loss": 1.2747, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015420908668554298, | |
| "loss": 1.289, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.000153943121439287, | |
| "loss": 1.2945, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001536766169155428, | |
| "loss": 1.2736, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015340957577859605, | |
| "loss": 1.3336, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015314200069809712, | |
| "loss": 1.2652, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015287389434903435, | |
| "loss": 1.2656, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015260525941170712, | |
| "loss": 1.3415, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001523360985716993, | |
| "loss": 1.3093, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015206641451985222, | |
| "loss": 1.3094, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015179620995223783, | |
| "loss": 1.3243, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015152548757013182, | |
| "loss": 1.3247, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015125425007998653, | |
| "loss": 1.3668, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015098250019340387, | |
| "loss": 1.3491, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015071024062710824, | |
| "loss": 1.3402, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015043747410291945, | |
| "loss": 1.358, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015016420334772543, | |
| "loss": 1.32, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014989043109345498, | |
| "loss": 1.3746, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014961616007705042, | |
| "loss": 1.3256, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014934139304044033, | |
| "loss": 1.355, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014906613273051202, | |
| "loss": 1.2912, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014879038189908415, | |
| "loss": 1.3153, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014851414330287928, | |
| "loss": 1.3717, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014823741970349606, | |
| "loss": 1.4614, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014796021386738193, | |
| "loss": 1.3926, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001476825285658053, | |
| "loss": 1.3294, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014740436657482777, | |
| "loss": 1.3613, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014712573067527664, | |
| "loss": 1.2937, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014684662365271675, | |
| "loss": 1.2975, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000146567048297423, | |
| "loss": 1.3285, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014628700740435221, | |
| "loss": 1.3075, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014600650377311522, | |
| "loss": 1.3492, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000145725540207949, | |
| "loss": 1.2762, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014544411951768852, | |
| "loss": 1.2766, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001451622445157387, | |
| "loss": 1.2881, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014487991802004623, | |
| "loss": 1.3359, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014459714285307152, | |
| "loss": 1.3266, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014431392184176042, | |
| "loss": 1.237, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014403025781751594, | |
| "loss": 1.3363, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014374615361616985, | |
| "loss": 1.3084, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014346161207795462, | |
| "loss": 1.3463, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014317663604747477, | |
| "loss": 1.3328, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014289122837367834, | |
| "loss": 1.3092, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014260539190982886, | |
| "loss": 1.3619, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014231912951347632, | |
| "loss": 1.7222, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014203244404642894, | |
| "loss": 2.15, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001417453383747244, | |
| "loss": 1.6064, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014145781536860122, | |
| "loss": 1.5031, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014116987790247003, | |
| "loss": 1.409, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014088152885488502, | |
| "loss": 1.4162, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001405927711085149, | |
| "loss": 1.3969, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014030360755011424, | |
| "loss": 1.4638, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014001404107049454, | |
| "loss": 1.4287, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001397240745644954, | |
| "loss": 1.3563, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00013943371093095558, | |
| "loss": 1.5156, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00013914295307268396, | |
| "loss": 1.4148, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001388518038964304, | |
| "loss": 1.4221, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001385602663128571, | |
| "loss": 1.3772, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.000138268343236509, | |
| "loss": 1.3444, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013797603758578496, | |
| "loss": 1.3119, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013768335228290845, | |
| "loss": 1.3686, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013739029025389846, | |
| "loss": 1.3505, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013709685442854012, | |
| "loss": 1.3769, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013680304774035538, | |
| "loss": 1.3505, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013650887312657392, | |
| "loss": 1.362, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013621433352810353, | |
| "loss": 1.3773, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001359194318895008, | |
| "loss": 1.325, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013562417115894172, | |
| "loss": 1.3583, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013532855428819213, | |
| "loss": 1.3345, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013503258423257835, | |
| "loss": 1.3346, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001347362639509574, | |
| "loss": 1.2946, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001344395964056878, | |
| "loss": 1.3796, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013414258456259943, | |
| "loss": 1.3362, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001338452313909644, | |
| "loss": 1.2915, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013354753986346692, | |
| "loss": 1.3044, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013324951295617398, | |
| "loss": 1.3056, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013295115364850534, | |
| "loss": 1.3265, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00013265246492320383, | |
| "loss": 1.3577, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013235344976630546, | |
| "loss": 1.3357, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013205411116710972, | |
| "loss": 1.293, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013175445211814952, | |
| "loss": 1.3595, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013145447561516138, | |
| "loss": 1.3376, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001311541846570555, | |
| "loss": 1.2918, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013085358224588565, | |
| "loss": 1.3296, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013055267138681936, | |
| "loss": 1.2756, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001302514550881076, | |
| "loss": 1.3264, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.000129949936361055, | |
| "loss": 1.2966, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001296481182199896, | |
| "loss": 1.2693, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012934600368223265, | |
| "loss": 1.3288, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012904359576806858, | |
| "loss": 1.258, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012874089750071477, | |
| "loss": 1.2904, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001284379119062912, | |
| "loss": 1.2678, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012813464201379043, | |
| "loss": 1.3021, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001278310908550471, | |
| "loss": 1.2584, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012752726146470774, | |
| "loss": 1.2773, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012722315688020047, | |
| "loss": 1.2542, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001269187801417044, | |
| "loss": 1.2914, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012661413429211957, | |
| "loss": 1.2748, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001263092223770363, | |
| "loss": 1.2961, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001260040474447048, | |
| "loss": 1.2932, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001256986125460047, | |
| "loss": 1.2617, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001253929207344146, | |
| "loss": 1.3035, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012508697506598144, | |
| "loss": 1.3183, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012478077859929, | |
| "loss": 1.2684, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001244743343954324, | |
| "loss": 1.3013, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012416764551797732, | |
| "loss": 1.2886, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012386071503293962, | |
| "loss": 1.297, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001235535460087494, | |
| "loss": 1.2574, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012324614151622154, | |
| "loss": 1.259, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012293850462852496, | |
| "loss": 1.2833, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012263063842115184, | |
| "loss": 1.3061, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012232254597188688, | |
| "loss": 1.2917, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012201423036077657, | |
| "loss": 1.2163, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001217056946700984, | |
| "loss": 1.2623, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012139694198433004, | |
| "loss": 1.3024, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012108797539011847, | |
| "loss": 1.3067, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012077879797624909, | |
| "loss": 1.2481, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012046941283361502, | |
| "loss": 1.2721, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012015982305518594, | |
| "loss": 1.237, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001198500317359774, | |
| "loss": 1.2805, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00011954004197301978, | |
| "loss": 1.2614, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00011922985686532726, | |
| "loss": 1.3051, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011891947951386701, | |
| "loss": 1.2543, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011860891302152798, | |
| "loss": 1.3179, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011829816049309009, | |
| "loss": 1.2996, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011798722503519304, | |
| "loss": 1.2597, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001176761097563053, | |
| "loss": 1.2888, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011736481776669306, | |
| "loss": 1.3059, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011705335217838909, | |
| "loss": 1.2971, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011674171610516165, | |
| "loss": 1.3073, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011642991266248338, | |
| "loss": 1.2934, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011611794496750019, | |
| "loss": 1.2918, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011580581613899992, | |
| "loss": 1.2676, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011549352929738142, | |
| "loss": 1.2998, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001151810875646231, | |
| "loss": 1.2429, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011486849406425188, | |
| "loss": 1.2739, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011455575192131204, | |
| "loss": 1.2622, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011424286426233368, | |
| "loss": 1.2449, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011392983421530175, | |
| "loss": 1.2877, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011361666490962468, | |
| "loss": 1.3152, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.000113303359476103, | |
| "loss": 1.2471, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011298992104689825, | |
| "loss": 1.2751, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011267635275550148, | |
| "loss": 1.3161, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011236265773670196, | |
| "loss": 1.2622, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011204883912655597, | |
| "loss": 1.2995, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011173490006235528, | |
| "loss": 1.2573, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011142084368259585, | |
| "loss": 1.2543, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011110667312694653, | |
| "loss": 1.2807, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011079239153621752, | |
| "loss": 1.2458, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001104780020523291, | |
| "loss": 1.2905, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011016350781828019, | |
| "loss": 1.2601, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00010984891197811687, | |
| "loss": 1.2885, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00010953421767690104, | |
| "loss": 1.2934, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00010921942806067886, | |
| "loss": 1.2722, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00010890454627644944, | |
| "loss": 1.2924, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010858957547213327, | |
| "loss": 1.3087, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010827451879654076, | |
| "loss": 1.2663, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010795937939934088, | |
| "loss": 1.2771, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010764416043102952, | |
| "loss": 1.2424, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.000107328865042898, | |
| "loss": 1.2307, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010701349638700173, | |
| "loss": 1.2394, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010669805761612854, | |
| "loss": 1.2664, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010638255188376717, | |
| "loss": 1.2833, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010606698234407586, | |
| "loss": 1.2873, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001057513521518507, | |
| "loss": 1.2672, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010543566446249408, | |
| "loss": 1.252, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010511992243198334, | |
| "loss": 1.2806, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010480412921683888, | |
| "loss": 1.3012, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.000104488287974093, | |
| "loss": 1.2897, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010417240186125805, | |
| "loss": 1.2662, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010385647403629488, | |
| "loss": 1.2983, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010354050765758147, | |
| "loss": 1.3074, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010322450588388117, | |
| "loss": 1.2433, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010290847187431113, | |
| "loss": 1.2837, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010259240878831091, | |
| "loss": 1.2503, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010227631978561056, | |
| "loss": 1.2471, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010196020802619941, | |
| "loss": 1.2549, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010164407667029417, | |
| "loss": 1.3157, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010132792887830744, | |
| "loss": 1.2674, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010101176781081625, | |
| "loss": 1.2781, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010069559662853027, | |
| "loss": 1.2948, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010037941849226032, | |
| "loss": 1.3088, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010006323656288669, | |
| "loss": 1.277, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.974705400132764e-05, | |
| "loss": 1.2629, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.943087396850773e-05, | |
| "loss": 1.2973, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.911469962532627e-05, | |
| "loss": 1.3478, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.879853413262563e-05, | |
| "loss": 1.2582, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.848238065115975e-05, | |
| "loss": 1.247, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.816624234156249e-05, | |
| "loss": 1.2527, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.785012236431598e-05, | |
| "loss": 1.211, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.753402387971917e-05, | |
| "loss": 1.2773, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.721795004785605e-05, | |
| "loss": 1.2478, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.69019040285642e-05, | |
| "loss": 1.2596, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.658588898140322e-05, | |
| "loss": 1.2535, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.626990806562291e-05, | |
| "loss": 1.2332, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.595396444013205e-05, | |
| "loss": 1.2232, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.563806126346642e-05, | |
| "loss": 1.2822, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.532220169375761e-05, | |
| "loss": 1.2272, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.500638888870113e-05, | |
| "loss": 1.2345, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.469062600552509e-05, | |
| "loss": 1.3178, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.43749162009584e-05, | |
| "loss": 1.248, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.405926263119945e-05, | |
| "loss": 1.2272, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.374366845188442e-05, | |
| "loss": 1.265, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.342813681805565e-05, | |
| "loss": 1.2711, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.311267088413035e-05, | |
| "loss": 1.2777, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.27972738038688e-05, | |
| "loss": 1.3019, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.248194873034301e-05, | |
| "loss": 1.2909, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.216669881590515e-05, | |
| "loss": 1.2529, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.18515272121559e-05, | |
| "loss": 1.3024, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.153643706991318e-05, | |
| "loss": 1.2295, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.122143153918045e-05, | |
| "loss": 1.2647, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.09065137691153e-05, | |
| "loss": 1.2777, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.059168690799804e-05, | |
| "loss": 1.2287, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.027695410320004e-05, | |
| "loss": 1.2618, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.996231850115246e-05, | |
| "loss": 1.2821, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.964778324731467e-05, | |
| "loss": 1.2888, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.933335148614284e-05, | |
| "loss": 1.2946, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.901902636105854e-05, | |
| "loss": 1.3121, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.870481101441723e-05, | |
| "loss": 1.3313, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.839070858747697e-05, | |
| "loss": 1.2501, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.807672222036691e-05, | |
| "loss": 1.3022, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.77628550520559e-05, | |
| "loss": 1.3413, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.744911022032115e-05, | |
| "loss": 1.2728, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.713549086171691e-05, | |
| "loss": 1.2878, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.682200011154302e-05, | |
| "loss": 1.3301, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.650864110381357e-05, | |
| "loss": 1.2421, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.619541697122568e-05, | |
| "loss": 1.3447, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.5882330845128e-05, | |
| "loss": 1.2773, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.55693858554896e-05, | |
| "loss": 1.2936, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.525658513086857e-05, | |
| "loss": 1.3005, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.49439317983807e-05, | |
| "loss": 1.2903, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.463142898366834e-05, | |
| "loss": 1.2755, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.431907981086906e-05, | |
| "loss": 1.2755, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.400688740258447e-05, | |
| "loss": 1.2669, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.3694854879849e-05, | |
| "loss": 1.2689, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.33829853620986e-05, | |
| "loss": 1.2619, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.307128196713972e-05, | |
| "loss": 1.287, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.275974781111804e-05, | |
| "loss": 1.2392, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.244838600848727e-05, | |
| "loss": 1.3307, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.213719967197817e-05, | |
| "loss": 1.281, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.182619191256724e-05, | |
| "loss": 1.3068, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.15153658394458e-05, | |
| "loss": 1.2864, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.120472455998882e-05, | |
| "loss": 1.2889, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 8.089427117972378e-05, | |
| "loss": 1.2591, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 8.058400880229978e-05, | |
| "loss": 1.3001, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 8.027394052945648e-05, | |
| "loss": 1.2743, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.996406946099289e-05, | |
| "loss": 1.271, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.965439869473664e-05, | |
| "loss": 1.2902, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.934493132651294e-05, | |
| "loss": 1.3117, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.903567045011352e-05, | |
| "loss": 1.2478, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.872661915726584e-05, | |
| "loss": 1.2871, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.841778053760211e-05, | |
| "loss": 1.2891, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.810915767862837e-05, | |
| "loss": 1.2877, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.780075366569374e-05, | |
| "loss": 1.2385, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.749257158195943e-05, | |
| "loss": 1.2439, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.718461450836804e-05, | |
| "loss": 1.331, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.687688552361272e-05, | |
| "loss": 1.2249, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.65693877041063e-05, | |
| "loss": 1.2736, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.626212412395072e-05, | |
| "loss": 1.2991, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.595509785490617e-05, | |
| "loss": 1.2505, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.564831196636032e-05, | |
| "loss": 1.3018, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.534176952529782e-05, | |
| "loss": 1.2937, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.503547359626948e-05, | |
| "loss": 1.2756, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.472942724136174e-05, | |
| "loss": 1.306, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.442363352016598e-05, | |
| "loss": 1.2915, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.411809548974792e-05, | |
| "loss": 1.3106, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.381281620461722e-05, | |
| "loss": 1.2993, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.350779871669669e-05, | |
| "loss": 1.2557, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.3203046075292e-05, | |
| "loss": 1.2596, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.289856132706112e-05, | |
| "loss": 1.2693, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.25943475159838e-05, | |
| "loss": 1.245, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.229040768333115e-05, | |
| "loss": 1.2455, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.198674486763537e-05, | |
| "loss": 1.2628, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.168336210465928e-05, | |
| "loss": 1.2572, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.138026242736589e-05, | |
| "loss": 1.232, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.107744886588824e-05, | |
| "loss": 1.3451, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.077492444749895e-05, | |
| "loss": 1.3035, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.04726921965801e-05, | |
| "loss": 1.2449, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.017075513459292e-05, | |
| "loss": 1.239, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.986911628004753e-05, | |
| "loss": 1.2471, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.956777864847291e-05, | |
| "loss": 1.3258, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.926674525238663e-05, | |
| "loss": 1.2594, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.896601910126475e-05, | |
| "loss": 1.2699, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.866560320151179e-05, | |
| "loss": 1.2516, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.83655005564306e-05, | |
| "loss": 1.2536, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.806571416619246e-05, | |
| "loss": 1.2782, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.77662470278069e-05, | |
| "loss": 1.2205, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.74671021350919e-05, | |
| "loss": 1.2888, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.71682824786439e-05, | |
| "loss": 1.2725, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.686979104580788e-05, | |
| "loss": 1.2165, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.657163082064752e-05, | |
| "loss": 1.2787, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.627380478391543e-05, | |
| "loss": 1.2569, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.597631591302319e-05, | |
| "loss": 1.2977, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.567916718201174e-05, | |
| "loss": 1.2661, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.538236156152163e-05, | |
| "loss": 1.3089, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.508590201876317e-05, | |
| "loss": 1.2441, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.478979151748694e-05, | |
| "loss": 1.2697, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.449403301795416e-05, | |
| "loss": 1.2843, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.419862947690692e-05, | |
| "loss": 1.2846, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.390358384753881e-05, | |
| "loss": 1.2265, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.360889907946534e-05, | |
| "loss": 1.2765, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.331457811869437e-05, | |
| "loss": 1.2698, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.302062390759677e-05, | |
| "loss": 1.2683, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.272703938487694e-05, | |
| "loss": 1.3128, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.243382748554346e-05, | |
| "loss": 1.2875, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.214099114087975e-05, | |
| "loss": 1.2476, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.18485332784147e-05, | |
| "loss": 1.2495, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.155645682189351e-05, | |
| "loss": 1.2578, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.126476469124842e-05, | |
| "loss": 1.2243, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.097345980256942e-05, | |
| "loss": 1.2923, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.0682545068075317e-05, | |
| "loss": 1.2724, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.039202339608432e-05, | |
| "loss": 1.2479, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.010189769098529e-05, | |
| "loss": 1.2146, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.9812170853208496e-05, | |
| "loss": 1.2868, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.952284577919659e-05, | |
| "loss": 1.2859, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.9233925361375864e-05, | |
| "loss": 1.2669, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.8945412488127096e-05, | |
| "loss": 1.2299, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.865731004375683e-05, | |
| "loss": 1.2655, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.8369620908468503e-05, | |
| "loss": 1.2446, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.8082347958333625e-05, | |
| "loss": 1.2017, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.7795494065262956e-05, | |
| "loss": 1.2805, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.750906209697802e-05, | |
| "loss": 1.2901, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.722305491698219e-05, | |
| "loss": 1.266, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.693747538453229e-05, | |
| "loss": 1.2995, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.665232635460971e-05, | |
| "loss": 1.2705, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.6367610677892177e-05, | |
| "loss": 1.2111, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.6083331200725074e-05, | |
| "loss": 1.2395, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.579949076509305e-05, | |
| "loss": 1.2338, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.5516092208591594e-05, | |
| "loss": 1.2674, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.5233138364398604e-05, | |
| "loss": 1.2674, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.495063206124619e-05, | |
| "loss": 1.2451, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.466857612339229e-05, | |
| "loss": 1.2155, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.4386973370592485e-05, | |
| "loss": 1.2747, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.410582661807182e-05, | |
| "loss": 1.2164, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.382513867649663e-05, | |
| "loss": 1.2605, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.354491235194635e-05, | |
| "loss": 1.2679, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.32651504458857e-05, | |
| "loss": 1.2327, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.298585575513648e-05, | |
| "loss": 1.287, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.2707031071849644e-05, | |
| "loss": 1.2592, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.2428679183477505e-05, | |
| "loss": 1.2454, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.215080287274561e-05, | |
| "loss": 1.2447, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.18734049176252e-05, | |
| "loss": 1.2778, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.159648809130534e-05, | |
| "loss": 1.2305, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.1320055162165115e-05, | |
| "loss": 1.2759, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.104410889374611e-05, | |
| "loss": 1.2292, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.076865204472454e-05, | |
| "loss": 1.282, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.0493687368883904e-05, | |
| "loss": 1.2767, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.021921761508739e-05, | |
| "loss": 1.2544, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.994524552725036e-05, | |
| "loss": 1.2261, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.967177384431293e-05, | |
| "loss": 1.2189, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.939880530021263e-05, | |
| "loss": 1.2411, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.912634262385695e-05, | |
| "loss": 1.235, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.8854388539096205e-05, | |
| "loss": 1.2501, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.8582945764696244e-05, | |
| "loss": 1.2903, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.831201701431124e-05, | |
| "loss": 1.2874, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.804160499645667e-05, | |
| "loss": 1.2588, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.7771712414482015e-05, | |
| "loss": 1.34, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.7502341966544e-05, | |
| "loss": 1.2294, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.7233496345579444e-05, | |
| "loss": 1.2135, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.696517823927842e-05, | |
| "loss": 1.2082, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6697390330057335e-05, | |
| "loss": 1.2583, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6430135295032184e-05, | |
| "loss": 1.2368, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6163415805991626e-05, | |
| "loss": 1.2219, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.589723452937049e-05, | |
| "loss": 1.2668, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.5631594126222995e-05, | |
| "loss": 1.2654, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.536649725219615e-05, | |
| "loss": 1.2436, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.510194655750326e-05, | |
| "loss": 1.2278, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.483794468689728e-05, | |
| "loss": 1.2904, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.457449427964463e-05, | |
| "loss": 1.277, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.431159796949862e-05, | |
| "loss": 1.2656, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.4049258384673085e-05, | |
| "loss": 1.2845, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.3787478147816296e-05, | |
| "loss": 1.2586, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.352625987598467e-05, | |
| "loss": 1.2354, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.326560618061639e-05, | |
| "loss": 1.2829, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.3005519667505675e-05, | |
| "loss": 1.2248, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.274600293677647e-05, | |
| "loss": 1.2534, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.248705858285649e-05, | |
| "loss": 1.2016, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.222868919445139e-05, | |
| "loss": 1.2276, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.197089735451868e-05, | |
| "loss": 1.2504, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.1713685640242165e-05, | |
| "loss": 1.2469, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.145705662300595e-05, | |
| "loss": 1.2582, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.1201012868368915e-05, | |
| "loss": 1.2125, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.094555693603891e-05, | |
| "loss": 1.2324, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.069069137984731e-05, | |
| "loss": 1.3027, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.0436418747723295e-05, | |
| "loss": 1.2772, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.0182741581668593e-05, | |
| "loss": 1.2269, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.992966241773194e-05, | |
| "loss": 1.2746, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.967718378598376e-05, | |
| "loss": 1.2536, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.9425308210490905e-05, | |
| "loss": 1.2511, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.917403820929126e-05, | |
| "loss": 1.2734, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.8923376294368806e-05, | |
| "loss": 1.2622, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.8673324971628357e-05, | |
| "loss": 1.3099, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.8423886740870566e-05, | |
| "loss": 1.2349, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.817506409576691e-05, | |
| "loss": 1.2152, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.7926859523834725e-05, | |
| "loss": 1.2543, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.767927550641237e-05, | |
| "loss": 1.2365, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.743231451863448e-05, | |
| "loss": 1.2476, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.718597902940717e-05, | |
| "loss": 1.2462, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.694027150138331e-05, | |
| "loss": 1.2745, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.669519439093801e-05, | |
| "loss": 1.2563, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.6450750148143884e-05, | |
| "loss": 1.2764, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.620694121674679e-05, | |
| "loss": 1.2162, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.596377003414124e-05, | |
| "loss": 1.2474, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.5721239031346066e-05, | |
| "loss": 1.2109, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.547935063298018e-05, | |
| "loss": 1.2179, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.523810725723816e-05, | |
| "loss": 1.244, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.4997511315866306e-05, | |
| "loss": 1.2279, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.475756521413839e-05, | |
| "loss": 1.2368, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.4518271350831647e-05, | |
| "loss": 1.3006, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.427963211820274e-05, | |
| "loss": 1.2726, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.4041649901964e-05, | |
| "loss": 1.3108, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.3804327081259304e-05, | |
| "loss": 1.2491, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.356766602864056e-05, | |
| "loss": 1.2916, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.33316691100439e-05, | |
| "loss": 1.2198, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.309633868476594e-05, | |
| "loss": 1.2532, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.2861677105440336e-05, | |
| "loss": 1.2575, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.262768671801407e-05, | |
| "loss": 1.2602, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.239436986172425e-05, | |
| "loss": 1.2675, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.216172886907452e-05, | |
| "loss": 1.2613, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.192976606581186e-05, | |
| "loss": 1.2542, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.1698483770903207e-05, | |
| "loss": 1.2262, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.146788429651246e-05, | |
| "loss": 1.2854, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.1237969947977153e-05, | |
| "loss": 1.2374, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.100874302378559e-05, | |
| "loss": 1.2252, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.078020581555376e-05, | |
| "loss": 1.2981, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.055236060800247e-05, | |
| "loss": 1.268, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.032520967893453e-05, | |
| "loss": 1.2497, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.009875529921181e-05, | |
| "loss": 1.3, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.987299973273282e-05, | |
| "loss": 1.2536, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.9647945236409848e-05, | |
| "loss": 1.2717, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.942359406014652e-05, | |
| "loss": 1.2778, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.919994844681524e-05, | |
| "loss": 1.247, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.8977010632234826e-05, | |
| "loss": 1.2549, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.8754782845148043e-05, | |
| "loss": 1.257, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.8533267307199497e-05, | |
| "loss": 1.2337, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.8312466232913282e-05, | |
| "loss": 1.2896, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.809238182967092e-05, | |
| "loss": 1.264, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7873016297689268e-05, | |
| "loss": 1.2336, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.765437182999846e-05, | |
| "loss": 1.205, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7436450612420095e-05, | |
| "loss": 1.2147, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7219254823545336e-05, | |
| "loss": 1.2887, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7002786634713094e-05, | |
| "loss": 1.2237, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.678704820998842e-05, | |
| "loss": 1.2635, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.6572041706140683e-05, | |
| "loss": 1.2738, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.635776927262227e-05, | |
| "loss": 1.24, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.6144233051546796e-05, | |
| "loss": 1.2806, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.5931435177668006e-05, | |
| "loss": 1.2598, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.57193777783582e-05, | |
| "loss": 1.1954, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.5508062973587076e-05, | |
| "loss": 1.2031, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.529749287590042e-05, | |
| "loss": 1.2506, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.5087669590399178e-05, | |
| "loss": 1.225, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4878595214718236e-05, | |
| "loss": 1.2695, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4670271839005542e-05, | |
| "loss": 1.2714, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.446270154590117e-05, | |
| "loss": 1.2735, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.425588641051656e-05, | |
| "loss": 1.2415, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.404982850041363e-05, | |
| "loss": 1.271, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3844529875584278e-05, | |
| "loss": 1.2445, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3639992588429705e-05, | |
| "loss": 1.2269, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3436218683739896e-05, | |
| "loss": 1.2577, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3233210198673218e-05, | |
| "loss": 1.2917, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3030969162735926e-05, | |
| "loss": 1.2436, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.282949759776206e-05, | |
| "loss": 1.2601, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.262879751789314e-05, | |
| "loss": 1.308, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.242887092955801e-05, | |
| "loss": 1.2468, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.2229719831452823e-05, | |
| "loss": 1.2763, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.2031346214520966e-05, | |
| "loss": 1.2405, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.183375206193331e-05, | |
| "loss": 1.2632, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.1636939349068308e-05, | |
| "loss": 1.2975, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.1440910043492212e-05, | |
| "loss": 1.2662, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.12456661049394e-05, | |
| "loss": 1.2363, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.105120948529291e-05, | |
| "loss": 1.2217, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.085754212856471e-05, | |
| "loss": 1.2094, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.0664665970876496e-05, | |
| "loss": 1.2848, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.04725829404402e-05, | |
| "loss": 1.2706, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.028129495753871e-05, | |
| "loss": 1.2804, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.0090803934506764e-05, | |
| "loss": 1.2642, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.9901111775711677e-05, | |
| "loss": 1.2477, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.9712220377534496e-05, | |
| "loss": 1.2972, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.9524131628350883e-05, | |
| "loss": 1.2812, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.9336847408512328e-05, | |
| "loss": 1.2165, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.915036959032732e-05, | |
| "loss": 1.2503, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8964700038042626e-05, | |
| "loss": 1.2767, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8779840607824618e-05, | |
| "loss": 1.2897, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.859579314774079e-05, | |
| "loss": 1.1977, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8412559497741278e-05, | |
| "loss": 1.2283, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8230141489640394e-05, | |
| "loss": 1.2615, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.804854094709838e-05, | |
| "loss": 1.2232, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7867759685603114e-05, | |
| "loss": 1.2341, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.768779951245202e-05, | |
| "loss": 1.2484, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7508662226734006e-05, | |
| "loss": 1.255, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7330349619311415e-05, | |
| "loss": 1.2868, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7152863472802195e-05, | |
| "loss": 1.2346, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6976205561561975e-05, | |
| "loss": 1.2512, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6800377651666465e-05, | |
| "loss": 1.2173, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6625381500893655e-05, | |
| "loss": 1.2773, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6451218858706374e-05, | |
| "loss": 1.2677, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6277891466234708e-05, | |
| "loss": 1.2503, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6105401056258674e-05, | |
| "loss": 1.2477, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5933749353190764e-05, | |
| "loss": 1.2885, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5762938073058853e-05, | |
| "loss": 1.2545, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.559296892348897e-05, | |
| "loss": 1.3032, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5423843603688236e-05, | |
| "loss": 1.2375, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5255563804427885e-05, | |
| "loss": 1.2736, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5088131208026367e-05, | |
| "loss": 1.2613, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.4921547488332454e-05, | |
| "loss": 1.2215, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.475581431070865e-05, | |
| "loss": 1.2022, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.4590933332014401e-05, | |
| "loss": 1.242, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.442690620058964e-05, | |
| "loss": 1.2596, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.4263734556238263e-05, | |
| "loss": 1.2308, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.4101420030211654e-05, | |
| "loss": 1.2559, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3939964245192538e-05, | |
| "loss": 1.2474, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3779368815278647e-05, | |
| "loss": 1.2696, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3619635345966641e-05, | |
| "loss": 1.2504, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3460765434136003e-05, | |
| "loss": 1.2421, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3302760668033076e-05, | |
| "loss": 1.2001, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.314562262725526e-05, | |
| "loss": 1.2147, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.298935288273515e-05, | |
| "loss": 1.2805, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2833952996724863e-05, | |
| "loss": 1.257, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2679424522780426e-05, | |
| "loss": 1.2333, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.252576900574618e-05, | |
| "loss": 1.2537, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.2372987981739393e-05, | |
| "loss": 1.2611, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.2221082978134935e-05, | |
| "loss": 1.2067, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.2070055513549938e-05, | |
| "loss": 1.2493, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1919907097828653e-05, | |
| "loss": 1.2574, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1770639232027358e-05, | |
| "loss": 1.2496, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1622253408399341e-05, | |
| "loss": 1.2225, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1474751110379933e-05, | |
| "loss": 1.2766, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1328133812571784e-05, | |
| "loss": 1.256, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1182402980730044e-05, | |
| "loss": 1.2526, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1037560071747732e-05, | |
| "loss": 1.218, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0893606533641187e-05, | |
| "loss": 1.2296, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0750543805535518e-05, | |
| "loss": 1.2528, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0608373317650323e-05, | |
| "loss": 1.2522, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0467096491285333e-05, | |
| "loss": 1.2501, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0326714738806198e-05, | |
| "loss": 1.2425, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.01872294636304e-05, | |
| "loss": 1.2714, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0048642060213154e-05, | |
| "loss": 1.2768, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.910953914033572e-06, | |
| "loss": 1.2464, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.774166401580732e-06, | |
| "loss": 1.2267, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.638280890339945e-06, | |
| "loss": 1.2463, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.503298738779098e-06, | |
| "loss": 1.2651, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.369221296335006e-06, | |
| "loss": 1.2172, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.236049903400012e-06, | |
| "loss": 1.2365, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.103785891308547e-06, | |
| "loss": 1.2667, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.972430582323787e-06, | |
| "loss": 1.2578, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.84198528962451e-06, | |
| "loss": 1.1834, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.712451317291902e-06, | |
| "loss": 1.2584, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.58382996029652e-06, | |
| "loss": 1.284, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.456122504485397e-06, | |
| "loss": 1.2217, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.329330226569166e-06, | |
| "loss": 1.227, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.203454394109266e-06, | |
| "loss": 1.2694, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.07849626550531e-06, | |
| "loss": 1.2169, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.954457089982492e-06, | |
| "loss": 1.2436, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.831338107579056e-06, | |
| "loss": 1.2592, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.709140549133975e-06, | |
| "loss": 1.2316, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.587865636274594e-06, | |
| "loss": 1.2511, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.467514581404444e-06, | |
| "loss": 1.2475, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.3480885876911156e-06, | |
| "loss": 1.2627, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.229588849054158e-06, | |
| "loss": 1.2263, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.1120165501533e-06, | |
| "loss": 1.259, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.995372866376459e-06, | |
| "loss": 1.222, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.879658963828062e-06, | |
| "loss": 1.2719, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.7648759993174104e-06, | |
| "loss": 1.2065, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.651025120346988e-06, | |
| "loss": 1.2355, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.538107465101162e-06, | |
| "loss": 1.2374, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.426124162434688e-06, | |
| "loss": 1.2555, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.3150763318614695e-06, | |
| "loss": 1.2187, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.204965083543368e-06, | |
| "loss": 1.2505, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.095791518279059e-06, | |
| "loss": 1.2313, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.987556727493049e-06, | |
| "loss": 1.216, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.880261793224828e-06, | |
| "loss": 1.2232, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.77390778811796e-06, | |
| "loss": 1.2839, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.6684957754094105e-06, | |
| "loss": 1.2096, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.564026808918921e-06, | |
| "loss": 1.2564, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.460501933038442e-06, | |
| "loss": 1.2488, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.357922182721687e-06, | |
| "loss": 1.2381, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.256288583473834e-06, | |
| "loss": 1.2258, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.1556021513412544e-06, | |
| "loss": 1.1936, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.0558638929013354e-06, | |
| "loss": 1.2509, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.957074805252437e-06, | |
| "loss": 1.2375, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.859235876003876e-06, | |
| "loss": 1.2556, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.762348083266144e-06, | |
| "loss": 1.2136, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.666412395641062e-06, | |
| "loss": 1.2863, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.5714297722121106e-06, | |
| "loss": 1.243, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.477401162534856e-06, | |
| "loss": 1.2579, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.384327506627395e-06, | |
| "loss": 1.2646, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.2922097349610835e-06, | |
| "loss": 1.2615, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.20104876845111e-06, | |
| "loss": 1.2481, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.110845518447348e-06, | |
| "loss": 1.2763, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.021600886725263e-06, | |
| "loss": 1.2249, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.933315765476808e-06, | |
| "loss": 1.2437, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.84599103730161e-06, | |
| "loss": 1.2199, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.75962757519811e-06, | |
| "loss": 1.2349, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.6742262425548125e-06, | |
| "loss": 1.2319, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.5897878931416497e-06, | |
| "loss": 1.2168, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.5063133711014882e-06, | |
| "loss": 1.2151, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.4238035109416922e-06, | |
| "loss": 1.2785, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.342259137525694e-06, | |
| "loss": 1.2178, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.2616810660648588e-06, | |
| "loss": 1.2742, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.182070102110257e-06, | |
| "loss": 1.2287, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.103427041544682e-06, | |
| "loss": 1.2327, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.025752670574622e-06, | |
| "loss": 1.2371, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.9490477657224014e-06, | |
| "loss": 1.2718, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.873313093818486e-06, | |
| "loss": 1.2082, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.798549411993789e-06, | |
| "loss": 1.1962, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.7247574676720454e-06, | |
| "loss": 1.2537, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.651937998562437e-06, | |
| "loss": 1.2331, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.580091732652101e-06, | |
| "loss": 1.2437, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.509219388198958e-06, | |
| "loss": 1.2562, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.439321673724504e-06, | |
| "loss": 1.2432, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.3703992880066638e-06, | |
| "loss": 1.2832, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.302452920072895e-06, | |
| "loss": 1.2662, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.2354832491932486e-06, | |
| "loss": 1.2605, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.1694909448735645e-06, | |
| "loss": 1.2612, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.1044766668488424e-06, | |
| "loss": 1.2727, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.0404410650765817e-06, | |
| "loss": 1.2572, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.977384779730307e-06, | |
| "loss": 1.2633, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9153084411931863e-06, | |
| "loss": 1.229, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8542126700516804e-06, | |
| "loss": 1.2002, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.7940980770894122e-06, | |
| "loss": 1.2471, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.7349652632809744e-06, | |
| "loss": 1.257, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6768148197860212e-06, | |
| "loss": 1.2562, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6196473279432412e-06, | |
| "loss": 1.2322, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5634633592646609e-06, | |
| "loss": 1.225, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5082634754298385e-06, | |
| "loss": 1.2315, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4540482282803137e-06, | |
| "loss": 1.2741, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4008181598140547e-06, | |
| "loss": 1.2291, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.348573802180053e-06, | |
| "loss": 1.2508, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.2973156776729944e-06, | |
| "loss": 1.2438, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.2470442987280617e-06, | |
| "loss": 1.2337, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1977601679157625e-06, | |
| "loss": 1.2373, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1494637779369766e-06, | |
| "loss": 1.2654, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.10215561161795e-06, | |
| "loss": 1.2273, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.055836141905553e-06, | |
| "loss": 1.2503, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0105058318624517e-06, | |
| "loss": 1.2419, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.661651346625889e-07, | |
| "loss": 1.2124, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.228144935865657e-07, | |
| "loss": 1.276, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.804543420172562e-07, | |
| "loss": 1.1957, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.390851034354552e-07, | |
| "loss": 1.2669, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.987071914156596e-07, | |
| "loss": 1.2554, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.593210096219161e-07, | |
| "loss": 1.2942, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 7.20926951803802e-07, | |
| "loss": 1.2459, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.835254017924953e-07, | |
| "loss": 1.213, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.471167334968886e-07, | |
| "loss": 1.2581, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.117013108999037e-07, | |
| "loss": 1.2228, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.772794880548715e-07, | |
| "loss": 1.2393, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.438516090819024e-07, | |
| "loss": 1.2756, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.114180081645214e-07, | |
| "loss": 1.2465, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.799790095463164e-07, | |
| "loss": 1.2646, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.495349275276839e-07, | |
| "loss": 1.254, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.200860664626882e-07, | |
| "loss": 1.2278, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.9163272075599664e-07, | |
| "loss": 1.2163, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.641751748600042e-07, | |
| "loss": 1.2457, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3771370327190246e-07, | |
| "loss": 1.2151, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.122485705310041e-07, | |
| "loss": 1.1919, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.877800312160783e-07, | |
| "loss": 1.2623, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.643083299427751e-07, | |
| "loss": 1.2373, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.4183370136121595e-07, | |
| "loss": 1.2287, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.2035637015365152e-07, | |
| "loss": 1.2602, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.9987655103217428e-07, | |
| "loss": 1.231, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.8039444873663158e-07, | |
| "loss": 1.1957, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.6191025803250492e-07, | |
| "loss": 1.2533, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.444241637090338e-07, | |
| "loss": 1.2552, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.2793634057732818e-07, | |
| "loss": 1.2871, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.1244695346864786e-07, | |
| "loss": 1.2085, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9.795615723270369e-08, | |
| "loss": 1.2469, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.446409673615874e-08, | |
| "loss": 1.2277, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 7.197090686119623e-08, | |
| "loss": 1.2988, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.047671250408726e-08, | |
| "loss": 1.2706, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.998162857402511e-08, | |
| "loss": 1.2576, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.0485759991937264e-08, | |
| "loss": 1.2001, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.1989201689452967e-08, | |
| "loss": 1.2299, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.4492038607948353e-08, | |
| "loss": 1.2341, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.7994345697680547e-08, | |
| "loss": 1.2108, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2496187917065972e-08, | |
| "loss": 1.2237, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.997620232014225e-09, | |
| "loss": 1.3129, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.498687615372976e-09, | |
| "loss": 1.1938, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.999425046506076e-09, | |
| "loss": 1.224, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.998575109160797e-10, | |
| "loss": 1.254, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.2513, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2233707904815674, | |
| "eval_runtime": 1795.568, | |
| "eval_samples_per_second": 16.333, | |
| "eval_steps_per_second": 1.361, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 5520, | |
| "total_flos": 3958946251407360.0, | |
| "train_loss": 1.269419441966043, | |
| "train_runtime": 32984.8618, | |
| "train_samples_per_second": 8.033, | |
| "train_steps_per_second": 0.167 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 5520, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 3958946251407360.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |