diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15381 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.015184011236168315, + "eval_steps": 500, + "global_step": 12800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0001999999999957118, + "loss": 3.6491, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999999827435, + "loss": 3.5313, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999999610949, + "loss": 3.4388, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999993076608, + "loss": 3.1787, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999989175698, + "loss": 3.0979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999984406761, + "loss": 3.183, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999999787698, + "loss": 3.24, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999972264808, + "loss": 3.2554, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999964891794, + "loss": 3.1969, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999956650752, + "loss": 3.013, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999947541687, + "loss": 2.9786, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999937564593, + "loss": 3.3584, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999926719473, + "loss": 3.3326, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999915006324, + "loss": 3.1324, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999990242515, + "loss": 3.3203, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999888975953, + "loss": 3.05, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999874658727, + "loss": 2.9144, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999859473478, + "loss": 3.1871, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999843420198, + "loss": 3.1406, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999826498895, + "loss": 2.7888, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999808709563, + "loss": 2.8994, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999790052206, + "loss": 3.0518, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999770526822, + "loss": 3.0918, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999750133416, + "loss": 3.0768, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999728871978, + "loss": 2.9068, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999706742517, + "loss": 3.1022, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999968374503, + "loss": 3.0395, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999659879515, + "loss": 2.9104, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999635145975, + "loss": 3.0261, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999609544408, + "loss": 2.9319, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999583074815, + "loss": 3.0491, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999555737197, + "loss": 3.0118, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999952753155, + "loss": 2.9509, + "step": 165 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999949845788, + "loss": 3.0506, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999946851618, + "loss": 2.9915, + "step": 175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999437706457, + "loss": 3.0174, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999406028707, + "loss": 3.0289, + "step": 185 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999937348293, + "loss": 2.9562, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999340069126, + "loss": 3.1903, + "step": 195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999305787301, + "loss": 2.9751, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999270637443, + "loss": 2.8893, + "step": 205 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999923461956, + "loss": 2.8216, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999197733653, + "loss": 3.0853, + "step": 215 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999159979722, + "loss": 3.1333, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999121357763, + "loss": 2.9536, + "step": 225 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999081867775, + "loss": 2.937, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999041509764, + "loss": 2.8704, + "step": 235 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999000283722, + "loss": 3.06, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999895818966, + "loss": 2.8869, + "step": 245 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998915227568, + "loss": 2.8376, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999887139745, + "loss": 2.8362, + "step": 255 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998826699308, + "loss": 2.9185, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998781133137, + "loss": 2.8776, + "step": 265 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998734698943, + "loss": 2.9575, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998687396722, + "loss": 3.0368, + "step": 275 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999863922647, + "loss": 2.8667, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999985901882, + "loss": 2.8613, + "step": 285 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999985402819, + "loss": 2.9864, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998489507573, + "loss": 2.6927, + "step": 295 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999843786522, + "loss": 2.7765, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999838535484, + "loss": 2.8588, + "step": 305 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998331976434, + "loss": 2.7963, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998277730005, + "loss": 2.7799, + "step": 315 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999822261555, + "loss": 2.9825, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998166633063, + "loss": 2.9661, + "step": 325 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998109782553, + "loss": 2.821, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999805206402, + "loss": 2.8383, + "step": 335 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997993477457, + "loss": 2.9595, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999793402287, + "loss": 2.8529, + "step": 345 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997873700257, + "loss": 2.7759, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997812509616, + "loss": 2.8958, + "step": 355 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999775045095, + "loss": 2.9969, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997687524256, + "loss": 2.9635, + "step": 365 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999762372954, + "loss": 3.0617, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997559066795, + "loss": 2.7889, + "step": 375 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997493536027, + "loss": 2.9124, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997427137229, + "loss": 2.866, + "step": 385 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997359870407, + "loss": 2.8843, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997291735556, + "loss": 2.8506, + "step": 395 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997222732682, + "loss": 3.0383, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997152861783, + "loss": 2.8744, + "step": 405 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997082122858, + "loss": 2.8794, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999997010515904, + "loss": 2.8376, + "step": 415 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996938040927, + "loss": 2.8557, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996864697922, + "loss": 2.6925, + "step": 425 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999679048689, + "loss": 2.5931, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996715407833, + "loss": 3.012, + "step": 435 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999663946075, + "loss": 2.6536, + "step": 440 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996562645645, + "loss": 2.8957, + "step": 445 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996484962508, + "loss": 2.7622, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996406411347, + "loss": 2.8935, + "step": 455 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999632699216, + "loss": 2.9804, + "step": 460 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999624670495, + "loss": 2.8613, + "step": 465 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996165549712, + "loss": 3.0941, + "step": 470 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996083526445, + "loss": 2.6941, + "step": 475 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996000635156, + "loss": 2.9191, + "step": 480 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999591687584, + "loss": 3.0899, + "step": 485 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995832248497, + "loss": 2.9366, + "step": 490 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999574675313, + "loss": 2.9312, + "step": 495 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995660389735, + "loss": 2.9255, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995573158317, + "loss": 2.8753, + "step": 505 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999548505887, + "loss": 2.8521, + "step": 510 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999953960914, + "loss": 2.8991, + "step": 515 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995306255902, + "loss": 2.8577, + "step": 520 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995215552377, + "loss": 2.61, + "step": 525 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995123980827, + "loss": 2.8954, + "step": 530 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995031541254, + "loss": 2.9163, + "step": 535 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994938233652, + "loss": 2.8967, + "step": 540 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994844058024, + "loss": 2.7085, + "step": 545 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994749014374, + "loss": 3.0516, + "step": 550 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994653102695, + "loss": 2.7746, + "step": 555 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999455632299, + "loss": 2.742, + "step": 560 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999445867526, + "loss": 2.8224, + "step": 565 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994360159505, + "loss": 2.9539, + "step": 570 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994260775722, + "loss": 2.7028, + "step": 575 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994160523917, + "loss": 2.763, + "step": 580 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999994059404083, + "loss": 2.6874, + "step": 585 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993957416225, + "loss": 2.9416, + "step": 590 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999385456034, + "loss": 2.7574, + "step": 595 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993750836428, + "loss": 2.9096, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993646244494, + "loss": 2.6635, + "step": 605 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999354078453, + "loss": 2.8138, + "step": 610 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993434456544, + "loss": 2.8623, + "step": 615 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999332726053, + "loss": 2.716, + "step": 620 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993219196492, + "loss": 2.8355, + "step": 625 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993110264428, + "loss": 2.5897, + "step": 630 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999993000464336, + "loss": 2.574, + "step": 635 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999288979622, + "loss": 2.6675, + "step": 640 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999277826008, + "loss": 2.7379, + "step": 645 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999992665855912, + "loss": 2.7875, + "step": 650 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999992552583717, + "loss": 2.9414, + "step": 655 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999924384435, + "loss": 2.6379, + "step": 660 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999992323435257, + "loss": 2.9688, + "step": 665 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999992207558987, + "loss": 2.8554, + "step": 670 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999209081469, + "loss": 2.8075, + "step": 675 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999197320237, + "loss": 2.8249, + "step": 680 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999991854722024, + "loss": 2.4325, + "step": 685 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999173537365, + "loss": 2.822, + "step": 690 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999991615157251, + "loss": 2.7508, + "step": 695 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999149407283, + "loss": 2.8431, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999137212038, + "loss": 2.7273, + "step": 705 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999991249299907, + "loss": 2.8935, + "step": 710 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999991125611407, + "loss": 2.7908, + "step": 715 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999991001054884, + "loss": 2.7172, + "step": 720 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990875630333, + "loss": 2.5546, + "step": 725 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990749337756, + "loss": 2.9523, + "step": 730 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990622177153, + "loss": 2.7861, + "step": 735 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990494148527, + "loss": 2.7639, + "step": 740 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990365251872, + "loss": 2.6946, + "step": 745 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999990235487194, + "loss": 2.8027, + "step": 750 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999999010485449, + "loss": 2.8132, + "step": 755 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989973353764, + "loss": 2.6649, + "step": 760 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989840985006, + "loss": 2.805, + "step": 765 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989707748228, + "loss": 2.5914, + "step": 770 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989573643424, + "loss": 2.6823, + "step": 775 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998943867059, + "loss": 2.7104, + "step": 780 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989302829735, + "loss": 2.7045, + "step": 785 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999989166120857, + "loss": 2.7507, + "step": 790 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998902854395, + "loss": 2.6759, + "step": 795 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988890099016, + "loss": 2.8907, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988750786057, + "loss": 3.0281, + "step": 805 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988610605075, + "loss": 2.6338, + "step": 810 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988469556067, + "loss": 2.7324, + "step": 815 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988327639036, + "loss": 2.7257, + "step": 820 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998818485398, + "loss": 2.7441, + "step": 825 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999988041200894, + "loss": 2.7089, + "step": 830 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987896679786, + "loss": 2.6935, + "step": 835 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987751290652, + "loss": 2.6191, + "step": 840 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987605033492, + "loss": 2.7976, + "step": 845 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987457908306, + "loss": 2.7551, + "step": 850 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987309915097, + "loss": 2.7361, + "step": 855 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987161053865, + "loss": 2.8261, + "step": 860 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999987011324604, + "loss": 2.8417, + "step": 865 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986860727318, + "loss": 2.7262, + "step": 870 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986709262008, + "loss": 2.6199, + "step": 875 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986556928673, + "loss": 2.8727, + "step": 880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986403727312, + "loss": 2.7327, + "step": 885 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986249657928, + "loss": 2.6243, + "step": 890 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999986094720518, + "loss": 2.7786, + "step": 895 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998593891508, + "loss": 2.5695, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998578224162, + "loss": 2.7535, + "step": 905 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999985624700136, + "loss": 2.9243, + "step": 910 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999985466290622, + "loss": 2.8599, + "step": 915 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999985307013086, + "loss": 2.6587, + "step": 920 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999985146867526, + "loss": 2.7066, + "step": 925 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998498585394, + "loss": 2.7567, + "step": 930 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998482397233, + "loss": 2.8307, + "step": 935 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999984661222696, + "loss": 2.6447, + "step": 940 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999984497605033, + "loss": 2.916, + "step": 945 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999984333119345, + "loss": 2.8343, + "step": 950 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999984167765638, + "loss": 2.5178, + "step": 955 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999840015439, + "loss": 2.8607, + "step": 960 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999983834454138, + "loss": 2.7741, + "step": 965 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999983666496357, + "loss": 2.8707, + "step": 970 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999983497670547, + "loss": 2.9699, + "step": 975 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998332797671, + "loss": 2.5461, + "step": 980 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998315741485, + "loss": 2.7899, + "step": 985 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999982985984967, + "loss": 2.6976, + "step": 990 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999982813687057, + "loss": 2.6443, + "step": 995 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998264052112, + "loss": 2.7354, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999982466487165, + "loss": 2.7049, + "step": 1005 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998229158518, + "loss": 2.4594, + "step": 1010 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998211581517, + "loss": 2.5669, + "step": 1015 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999981939177136, + "loss": 2.92, + "step": 1020 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998176167108, + "loss": 2.7088, + "step": 1025 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999981583297, + "loss": 2.6139, + "step": 1030 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998140405489, + "loss": 2.5142, + "step": 1035 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998122394476, + "loss": 2.7047, + "step": 1040 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999981042966602, + "loss": 2.6954, + "step": 1045 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999980861120422, + "loss": 2.8237, + "step": 1050 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999980678406213, + "loss": 2.7916, + "step": 1055 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999980494823984, + "loss": 2.7145, + "step": 1060 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998031037373, + "loss": 2.8736, + "step": 1065 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999998012505545, + "loss": 2.656, + "step": 1070 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999979938869147, + "loss": 2.5206, + "step": 1075 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997975181482, + "loss": 2.8481, + "step": 1080 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999979563892467, + "loss": 2.8695, + "step": 1085 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999979375102088, + "loss": 2.5611, + "step": 1090 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999979185443686, + "loss": 2.7148, + "step": 1095 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999978994917256, + "loss": 2.8026, + "step": 1100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999978803522808, + "loss": 2.5893, + "step": 1105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999978611260334, + "loss": 2.6701, + "step": 1110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999978418129831, + "loss": 2.7724, + "step": 1115 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999978224131306, + "loss": 2.4461, + "step": 1120 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997802926476, + "loss": 2.7933, + "step": 1125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999977833530186, + "loss": 2.6923, + "step": 1130 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999977636927588, + "loss": 2.6523, + "step": 1135 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999977439456968, + "loss": 2.6436, + "step": 1140 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999977241118319, + "loss": 2.8382, + "step": 1145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999977041911652, + "loss": 2.7091, + "step": 1150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999976841836956, + "loss": 2.8385, + "step": 1155 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999976640894235, + "loss": 2.3165, + "step": 1160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999976439083494, + "loss": 2.8163, + "step": 1165 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999976236404726, + "loss": 2.7067, + "step": 1170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999976032857933, + "loss": 2.5457, + "step": 1175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999975828443117, + "loss": 2.7439, + "step": 1180 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999975623160278, + "loss": 2.6707, + "step": 1185 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999975417009415, + "loss": 2.5346, + "step": 1190 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999975209990524, + "loss": 2.7652, + "step": 1195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999975002103613, + "loss": 2.6012, + "step": 1200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999974793348676, + "loss": 2.7155, + "step": 1205 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999974583725716, + "loss": 2.8679, + "step": 1210 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999974373234732, + "loss": 2.6568, + "step": 1215 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999974161875723, + "loss": 2.763, + "step": 1220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999973949648688, + "loss": 2.5574, + "step": 1225 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999973736553633, + "loss": 2.629, + "step": 1230 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999973522590554, + "loss": 2.5492, + "step": 1235 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997330775945, + "loss": 2.4964, + "step": 1240 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997309206032, + "loss": 2.8257, + "step": 1245 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999972875493167, + "loss": 2.9205, + "step": 1250 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997265805799, + "loss": 2.5601, + "step": 1255 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997243975479, + "loss": 2.6179, + "step": 1260 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999972220583568, + "loss": 2.5755, + "step": 1265 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999972000544317, + "loss": 2.4769, + "step": 1270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999971779637046, + "loss": 2.6268, + "step": 1275 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999971557861749, + "loss": 2.4707, + "step": 1280 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997133521843, + "loss": 2.6769, + "step": 1285 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999971111707088, + "loss": 2.514, + "step": 1290 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999970887327718, + "loss": 2.5784, + "step": 1295 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999997066208033, + "loss": 2.8341, + "step": 1300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999970435964913, + "loss": 2.6296, + "step": 1305 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999970208981475, + "loss": 2.6278, + "step": 1310 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996998113001, + "loss": 2.5416, + "step": 1315 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999969752410528, + "loss": 2.673, + "step": 1320 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999969522823018, + "loss": 2.6878, + "step": 1325 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999969292367484, + "loss": 2.5721, + "step": 1330 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999969061043927, + "loss": 2.6098, + "step": 1335 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999968828852347, + "loss": 2.7936, + "step": 1340 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999968595792744, + "loss": 2.5121, + "step": 1345 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999968361865116, + "loss": 2.8514, + "step": 1350 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999968127069466, + "loss": 2.4604, + "step": 1355 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999967891405791, + "loss": 2.6636, + "step": 1360 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999967654874093, + "loss": 2.5993, + "step": 1365 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999967417474372, + "loss": 2.6292, + "step": 1370 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999967179206628, + "loss": 2.6366, + "step": 1375 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996694007086, + "loss": 2.7079, + "step": 1380 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999966700067065, + "loss": 2.6132, + "step": 1385 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996645919525, + "loss": 2.6106, + "step": 1390 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999966217455412, + "loss": 2.352, + "step": 1395 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999965974847552, + "loss": 2.439, + "step": 1400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999965731371666, + "loss": 2.7842, + "step": 1405 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999965487027757, + "loss": 2.5492, + "step": 1410 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999965241815826, + "loss": 2.8821, + "step": 1415 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996499573587, + "loss": 2.6805, + "step": 1420 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999964748787893, + "loss": 2.4762, + "step": 1425 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999964500971892, + "loss": 2.6549, + "step": 1430 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999964252287867, + "loss": 2.6132, + "step": 1435 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999964002735817, + "loss": 2.4747, + "step": 1440 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996375231575, + "loss": 2.8531, + "step": 1445 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999963501027653, + "loss": 2.7625, + "step": 1450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999963248871537, + "loss": 2.5995, + "step": 1455 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999962995847397, + "loss": 2.6859, + "step": 1460 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999962741955234, + "loss": 2.8208, + "step": 1465 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999962487195048, + "loss": 2.6235, + "step": 1470 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999962231566836, + "loss": 2.6238, + "step": 1475 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999961975070604, + "loss": 2.6362, + "step": 1480 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996171770635, + "loss": 2.6904, + "step": 1485 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999961459474073, + "loss": 2.5746, + "step": 1490 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999961200373772, + "loss": 2.7593, + "step": 1495 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999960940405448, + "loss": 2.3081, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999606795691, + "loss": 2.8279, + "step": 1505 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999996041786473, + "loss": 2.6013, + "step": 1510 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999960155292338, + "loss": 2.7186, + "step": 1515 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999959891851924, + "loss": 2.3538, + "step": 1520 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999959627543486, + "loss": 2.6952, + "step": 1525 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999959362367023, + "loss": 2.4529, + "step": 1530 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999959096322537, + "loss": 2.8643, + "step": 1535 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999958829410033, + "loss": 2.6397, + "step": 1540 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999585616295, + "loss": 2.7624, + "step": 1545 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999995829298095, + "loss": 2.4717, + "step": 1550 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999958023464375, + "loss": 2.9581, + "step": 1555 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999957753079778, + "loss": 2.5924, + "step": 1560 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999957481827162, + "loss": 2.8503, + "step": 1565 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999957209706516, + "loss": 2.7108, + "step": 1570 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999995693671785, + "loss": 2.7926, + "step": 1575 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999956662861165, + "loss": 2.6825, + "step": 1580 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999956388136453, + "loss": 2.4061, + "step": 1585 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999956112543723, + "loss": 2.8156, + "step": 1590 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999955836082965, + "loss": 2.6561, + "step": 1595 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999995555875419, + "loss": 2.6375, + "step": 1600 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999995528055739, + "loss": 2.5209, + "step": 1605 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999955001492566, + "loss": 2.5621, + "step": 1610 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999954721559724, + "loss": 2.7921, + "step": 1615 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999954440758856, + "loss": 2.7439, + "step": 1620 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999954159089967, + "loss": 2.6912, + "step": 1625 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999953876553056, + "loss": 2.4257, + "step": 1630 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999995359314812, + "loss": 2.566, + "step": 1635 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999953308875166, + "loss": 2.687, + "step": 1640 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999953023734185, + "loss": 2.8141, + "step": 1645 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999952737725186, + "loss": 2.6272, + "step": 1650 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999952450848162, + "loss": 2.539, + "step": 1655 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999952163103117, + "loss": 2.3768, + "step": 1660 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999951874490052, + "loss": 2.5902, + "step": 1665 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999951585008963, + "loss": 2.5818, + "step": 1670 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999951294659852, + "loss": 2.6016, + "step": 1675 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999951003442718, + "loss": 2.843, + "step": 1680 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999950711357563, + "loss": 2.7841, + "step": 1685 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999950418404385, + "loss": 2.4404, + "step": 1690 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999950124583186, + "loss": 2.7421, + "step": 1695 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999949829893968, + "loss": 2.9662, + "step": 1700 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999949534336723, + "loss": 2.5873, + "step": 1705 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994923791146, + "loss": 2.5671, + "step": 1710 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999948940618173, + "loss": 2.5707, + "step": 1715 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999948642456861, + "loss": 2.7794, + "step": 1720 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999948343427533, + "loss": 2.7436, + "step": 1725 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994804353018, + "loss": 2.8675, + "step": 1730 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999947742764803, + "loss": 2.6696, + "step": 1735 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994744113141, + "loss": 2.7555, + "step": 1740 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999947138629992, + "loss": 2.6759, + "step": 1745 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999946835260555, + "loss": 2.5621, + "step": 1750 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999946531023093, + "loss": 2.7678, + "step": 1755 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994622591761, + "loss": 2.6351, + "step": 1760 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999945919944105, + "loss": 2.6966, + "step": 1765 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994561310258, + "loss": 2.6475, + "step": 1770 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999945305393033, + "loss": 2.7657, + "step": 1775 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999944996815466, + "loss": 2.3656, + "step": 1780 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999944687369874, + "loss": 2.7185, + "step": 1785 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994437705626, + "loss": 2.5218, + "step": 1790 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994406587463, + "loss": 2.6173, + "step": 1795 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999943753824974, + "loss": 2.6548, + "step": 1800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999943440907297, + "loss": 2.8232, + "step": 1805 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999431271216, + "loss": 2.6127, + "step": 1810 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999942812467882, + "loss": 2.5608, + "step": 1815 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999942496946142, + "loss": 2.869, + "step": 1820 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994218055638, + "loss": 2.442, + "step": 1825 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999418632986, + "loss": 2.4997, + "step": 1830 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999941545172795, + "loss": 2.4263, + "step": 1835 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994122617897, + "loss": 2.6923, + "step": 1840 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999940906317124, + "loss": 2.479, + "step": 1845 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999940585587256, + "loss": 2.6618, + "step": 1850 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999994026398937, + "loss": 2.8523, + "step": 1855 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999939941523458, + "loss": 2.6514, + "step": 1860 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999939618189528, + "loss": 2.4561, + "step": 1865 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999939293987575, + "loss": 2.8679, + "step": 1870 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999938968917605, + "loss": 2.81, + "step": 1875 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999938642979611, + "loss": 2.7132, + "step": 1880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999938316173598, + "loss": 2.6953, + "step": 1885 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993798849956, + "loss": 2.5594, + "step": 1890 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999937659957503, + "loss": 2.8072, + "step": 1895 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999937330547428, + "loss": 2.7259, + "step": 1900 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993700026933, + "loss": 2.602, + "step": 1905 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993666912321, + "loss": 2.6015, + "step": 1910 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993633710907, + "loss": 2.7177, + "step": 1915 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993600422691, + "loss": 2.7859, + "step": 1920 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999935670476728, + "loss": 2.4358, + "step": 1925 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999935335858528, + "loss": 2.5998, + "step": 1930 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999935000372305, + "loss": 2.7423, + "step": 1935 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993466401806, + "loss": 2.5114, + "step": 1940 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999934326795797, + "loss": 2.4934, + "step": 1945 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999933988705513, + "loss": 2.6945, + "step": 1950 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999933649747208, + "loss": 2.6669, + "step": 1955 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999933309920885, + "loss": 2.787, + "step": 1960 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999932969226537, + "loss": 2.6692, + "step": 1965 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993262766417, + "loss": 2.6079, + "step": 1970 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999932285233785, + "loss": 2.4989, + "step": 1975 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999931941935378, + "loss": 2.774, + "step": 1980 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999993159776895, + "loss": 2.6556, + "step": 1985 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999931252734503, + "loss": 2.5328, + "step": 1990 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999930906832033, + "loss": 2.6393, + "step": 1995 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999930560061547, + "loss": 2.6058, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999930212423036, + "loss": 2.8369, + "step": 2005 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992986391651, + "loss": 2.64, + "step": 2010 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992951454196, + "loss": 2.6686, + "step": 2015 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992916429939, + "loss": 2.9274, + "step": 2020 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999288131888, + "loss": 2.4095, + "step": 2025 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999928461210192, + "loss": 2.5919, + "step": 2030 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992810836356, + "loss": 2.6322, + "step": 2035 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999927754648912, + "loss": 2.587, + "step": 2040 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999927400066243, + "loss": 2.6958, + "step": 2045 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999927044615554, + "loss": 2.5001, + "step": 2050 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999926688296844, + "loss": 2.7739, + "step": 2055 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999926331110116, + "loss": 2.6311, + "step": 2060 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999925973055365, + "loss": 2.2464, + "step": 2065 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999925614132597, + "loss": 2.6054, + "step": 2070 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999925254341806, + "loss": 2.7537, + "step": 2075 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999924893683, + "loss": 2.674, + "step": 2080 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992453215617, + "loss": 2.7459, + "step": 2085 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999924169761322, + "loss": 2.6581, + "step": 2090 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999923806498452, + "loss": 2.5208, + "step": 2095 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999923442367567, + "loss": 2.9183, + "step": 2100 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992307736866, + "loss": 2.4701, + "step": 2105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999922711501732, + "loss": 2.6732, + "step": 2110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999922344766786, + "loss": 2.6728, + "step": 2115 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999992197716382, + "loss": 2.561, + "step": 2120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999921608692835, + "loss": 2.4256, + "step": 2125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999921239353832, + "loss": 2.5284, + "step": 2130 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999920869146807, + "loss": 2.7438, + "step": 2135 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999920498071761, + "loss": 2.5915, + "step": 2140 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999201261287, + "loss": 2.6054, + "step": 2145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999919753317617, + "loss": 2.8368, + "step": 2150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999919379638516, + "loss": 2.5809, + "step": 2155 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999919005091394, + "loss": 2.6437, + "step": 2160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999918629676254, + "loss": 2.611, + "step": 2165 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999918253393094, + "loss": 2.516, + "step": 2170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999917876241917, + "loss": 2.3178, + "step": 2175 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991749822272, + "loss": 2.5397, + "step": 2180 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999171193355, + "loss": 2.6697, + "step": 2185 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999916739580264, + "loss": 2.5902, + "step": 2190 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991635895701, + "loss": 2.6564, + "step": 2195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999915977465737, + "loss": 2.514, + "step": 2200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999915595106442, + "loss": 2.6288, + "step": 2205 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991521187913, + "loss": 2.7036, + "step": 2210 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999148277838, + "loss": 2.6969, + "step": 2215 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991444282045, + "loss": 2.4181, + "step": 2220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999914056989082, + "loss": 2.5399, + "step": 2225 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999913670289696, + "loss": 2.3588, + "step": 2230 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999913282722287, + "loss": 2.7133, + "step": 2235 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999912894286864, + "loss": 2.5371, + "step": 2240 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991250498342, + "loss": 2.8503, + "step": 2245 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999912114811958, + "loss": 2.4967, + "step": 2250 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999911723772478, + "loss": 2.6704, + "step": 2255 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999911331864976, + "loss": 2.5961, + "step": 2260 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999910939089458, + "loss": 2.5097, + "step": 2265 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999991054544592, + "loss": 2.598, + "step": 2270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999910150934368, + "loss": 2.7947, + "step": 2275 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999909755554794, + "loss": 2.4228, + "step": 2280 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199999093593072, + "loss": 2.5667, + "step": 2285 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990896219159, + "loss": 2.5555, + "step": 2290 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990856420796, + "loss": 2.5606, + "step": 2295 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999908165356314, + "loss": 2.6759, + "step": 2300 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990776563665, + "loss": 2.8376, + "step": 2305 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999907365048965, + "loss": 2.5424, + "step": 2310 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999906963593262, + "loss": 2.4368, + "step": 2315 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999906561269542, + "loss": 2.6155, + "step": 2320 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999906158077804, + "loss": 2.5441, + "step": 2325 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999905754018046, + "loss": 2.5587, + "step": 2330 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999905349090273, + "loss": 2.5802, + "step": 2335 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999904943294482, + "loss": 2.6721, + "step": 2340 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999904536630668, + "loss": 2.5217, + "step": 2345 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999904129098842, + "loss": 2.6426, + "step": 2350 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999903720698992, + "loss": 2.4764, + "step": 2355 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990331143113, + "loss": 2.6398, + "step": 2360 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999902901295243, + "loss": 2.8396, + "step": 2365 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999902490291347, + "loss": 2.6032, + "step": 2370 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990207841943, + "loss": 2.7985, + "step": 2375 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999901665679492, + "loss": 2.7499, + "step": 2380 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990125207154, + "loss": 2.7408, + "step": 2385 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999900837595566, + "loss": 2.5313, + "step": 2390 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999900422251575, + "loss": 2.3107, + "step": 2395 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999990000603957, + "loss": 2.7134, + "step": 2400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999899588959546, + "loss": 2.4859, + "step": 2405 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999899171011505, + "loss": 2.6712, + "step": 2410 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999898752195446, + "loss": 2.8095, + "step": 2415 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989833251137, + "loss": 2.5598, + "step": 2420 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999897911959273, + "loss": 2.4414, + "step": 2425 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989749053916, + "loss": 2.5381, + "step": 2430 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999897068251031, + "loss": 2.5387, + "step": 2435 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999896645094887, + "loss": 2.607, + "step": 2440 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999896221070725, + "loss": 2.8016, + "step": 2445 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989579617854, + "loss": 2.7545, + "step": 2450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999895370418345, + "loss": 2.7226, + "step": 2455 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999894943790127, + "loss": 2.7263, + "step": 2460 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999894516293897, + "loss": 2.714, + "step": 2465 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999894087929646, + "loss": 2.8175, + "step": 2470 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989365869738, + "loss": 2.5644, + "step": 2475 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999893228597095, + "loss": 2.4785, + "step": 2480 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999892797628794, + "loss": 2.6025, + "step": 2485 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999892365792475, + "loss": 2.7969, + "step": 2490 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999891933088142, + "loss": 2.7892, + "step": 2495 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989149951579, + "loss": 2.556, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999891065075422, + "loss": 2.5106, + "step": 2505 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999890629767038, + "loss": 2.5166, + "step": 2510 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999890193590634, + "loss": 2.5847, + "step": 2515 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999889756546215, + "loss": 2.6806, + "step": 2520 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999988931863378, + "loss": 2.6937, + "step": 2525 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999888879853326, + "loss": 2.6377, + "step": 2530 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999888440204856, + "loss": 2.3986, + "step": 2535 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999887999688372, + "loss": 2.8614, + "step": 2540 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999887558303872, + "loss": 2.8034, + "step": 2545 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999887116051352, + "loss": 2.6652, + "step": 2550 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999886672930818, + "loss": 2.703, + "step": 2555 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999886228942265, + "loss": 2.5679, + "step": 2560 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999885784085698, + "loss": 2.3774, + "step": 2565 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999885338361115, + "loss": 2.7305, + "step": 2570 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999884891768512, + "loss": 2.1613, + "step": 2575 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999884444307897, + "loss": 2.6024, + "step": 2580 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999883995979264, + "loss": 2.5547, + "step": 2585 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999883546782614, + "loss": 2.6603, + "step": 2590 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999883096717948, + "loss": 2.3867, + "step": 2595 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999882645785268, + "loss": 2.5352, + "step": 2600 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999988219398457, + "loss": 2.5642, + "step": 2605 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999881741315857, + "loss": 2.86, + "step": 2610 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999881287779126, + "loss": 2.7749, + "step": 2615 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999988083337438, + "loss": 2.3401, + "step": 2620 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999988037810162, + "loss": 2.4999, + "step": 2625 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999879921960841, + "loss": 2.6999, + "step": 2630 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999879464952048, + "loss": 2.6279, + "step": 2635 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999879007075237, + "loss": 2.5854, + "step": 2640 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999878548330414, + "loss": 2.6713, + "step": 2645 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999878088717573, + "loss": 2.8, + "step": 2650 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999987762823672, + "loss": 2.4333, + "step": 2655 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999877166887843, + "loss": 2.8168, + "step": 2660 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999876704670957, + "loss": 2.467, + "step": 2665 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999876241586054, + "loss": 2.5969, + "step": 2670 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999875777633135, + "loss": 2.5396, + "step": 2675 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998753128122, + "loss": 2.5875, + "step": 2680 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999987484712325, + "loss": 2.5898, + "step": 2685 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999874380566285, + "loss": 2.7047, + "step": 2690 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999873913141306, + "loss": 2.5165, + "step": 2695 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999987344484831, + "loss": 2.5659, + "step": 2700 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998729756873, + "loss": 2.696, + "step": 2705 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999872505658273, + "loss": 2.474, + "step": 2710 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999872034761232, + "loss": 2.5292, + "step": 2715 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999871562996173, + "loss": 2.8167, + "step": 2720 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999871090363103, + "loss": 2.6075, + "step": 2725 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999870616862017, + "loss": 2.3508, + "step": 2730 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999870142492913, + "loss": 2.6551, + "step": 2735 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999869667255795, + "loss": 2.3473, + "step": 2740 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999869191150664, + "loss": 2.5731, + "step": 2745 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999868714177515, + "loss": 2.5612, + "step": 2750 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999868236336355, + "loss": 2.4575, + "step": 2755 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986775762718, + "loss": 2.6937, + "step": 2760 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999867278049988, + "loss": 2.7548, + "step": 2765 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986679760478, + "loss": 2.7528, + "step": 2770 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986631629156, + "loss": 2.552, + "step": 2775 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999865834110326, + "loss": 2.6023, + "step": 2780 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999865351061076, + "loss": 2.7166, + "step": 2785 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999864867143813, + "loss": 2.5235, + "step": 2790 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986438235853, + "loss": 2.5894, + "step": 2795 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999863896705237, + "loss": 2.625, + "step": 2800 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986341018393, + "loss": 2.506, + "step": 2805 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999862922794607, + "loss": 2.8826, + "step": 2810 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986243453727, + "loss": 2.3339, + "step": 2815 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986194541192, + "loss": 2.7514, + "step": 2820 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999861455418554, + "loss": 2.6697, + "step": 2825 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999860964557171, + "loss": 2.6295, + "step": 2830 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999986047282778, + "loss": 2.7592, + "step": 2835 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985998023037, + "loss": 2.6853, + "step": 2840 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999859486764948, + "loss": 2.4563, + "step": 2845 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985899243151, + "loss": 2.5586, + "step": 2850 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999858497230058, + "loss": 2.4144, + "step": 2855 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999858001160597, + "loss": 2.5669, + "step": 2860 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999857504223115, + "loss": 2.5779, + "step": 2865 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999857006417626, + "loss": 2.5671, + "step": 2870 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985650774412, + "loss": 2.6281, + "step": 2875 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998560082026, + "loss": 2.7483, + "step": 2880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999855507793065, + "loss": 2.496, + "step": 2885 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985500651552, + "loss": 2.7721, + "step": 2890 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999854504369955, + "loss": 2.463, + "step": 2895 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999854001356382, + "loss": 2.4655, + "step": 2900 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985349747479, + "loss": 2.6399, + "step": 2905 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999985299272519, + "loss": 2.8935, + "step": 2910 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999852487107575, + "loss": 2.6516, + "step": 2915 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999851980621945, + "loss": 2.7925, + "step": 2920 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999851473268302, + "loss": 2.5695, + "step": 2925 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999850965046647, + "loss": 2.2234, + "step": 2930 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999850455956978, + "loss": 2.7042, + "step": 2935 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999849945999295, + "loss": 2.5192, + "step": 2940 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999849435173598, + "loss": 2.6676, + "step": 2945 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999848923479892, + "loss": 2.5299, + "step": 2950 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999848410918168, + "loss": 2.5762, + "step": 2955 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999847897488434, + "loss": 2.6976, + "step": 2960 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999847383190688, + "loss": 2.5738, + "step": 2965 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999846868024924, + "loss": 2.5577, + "step": 2970 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999984635199115, + "loss": 2.7278, + "step": 2975 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999845835089365, + "loss": 2.6825, + "step": 2980 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999845317319564, + "loss": 2.694, + "step": 2985 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999844798681752, + "loss": 2.4306, + "step": 2990 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999844279175924, + "loss": 2.6339, + "step": 2995 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999843758802084, + "loss": 2.3934, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999843237560234, + "loss": 2.5188, + "step": 3005 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999984271545037, + "loss": 2.7121, + "step": 3010 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999842192472493, + "loss": 2.6028, + "step": 3015 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998416686266, + "loss": 2.5672, + "step": 3020 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999841143912702, + "loss": 2.6899, + "step": 3025 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999984061833079, + "loss": 2.509, + "step": 3030 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999840091880863, + "loss": 2.5404, + "step": 3035 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999839564562922, + "loss": 2.6446, + "step": 3040 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999839036376972, + "loss": 2.6019, + "step": 3045 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999838507323006, + "loss": 2.6164, + "step": 3050 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999983797740103, + "loss": 2.4493, + "step": 3055 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999837446611041, + "loss": 2.7621, + "step": 3060 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999836914953042, + "loss": 2.6179, + "step": 3065 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999983638242703, + "loss": 2.8092, + "step": 3070 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999835849033006, + "loss": 2.4296, + "step": 3075 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999983531477097, + "loss": 2.5016, + "step": 3080 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999983477964092, + "loss": 2.5551, + "step": 3085 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999834243642858, + "loss": 2.7123, + "step": 3090 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999833706776788, + "loss": 2.7107, + "step": 3095 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999833169042703, + "loss": 2.5635, + "step": 3100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999832630440608, + "loss": 2.4079, + "step": 3105 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998320909705, + "loss": 2.6807, + "step": 3110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999831550632382, + "loss": 2.6134, + "step": 3115 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999831009426247, + "loss": 2.4098, + "step": 3120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999830467352106, + "loss": 2.3827, + "step": 3125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999829924409953, + "loss": 2.6387, + "step": 3130 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999829380599787, + "loss": 2.261, + "step": 3135 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982883592161, + "loss": 2.6373, + "step": 3140 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982829037542, + "loss": 2.4054, + "step": 3145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999827743961222, + "loss": 2.5333, + "step": 3150 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982719667901, + "loss": 2.4284, + "step": 3155 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982664852879, + "loss": 2.2998, + "step": 3160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999826099510552, + "loss": 2.4798, + "step": 3165 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982554962431, + "loss": 2.7227, + "step": 3170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999824998870054, + "loss": 2.5127, + "step": 3175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999824447247786, + "loss": 2.5939, + "step": 3180 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999823894757506, + "loss": 2.5919, + "step": 3185 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982334139922, + "loss": 2.5951, + "step": 3190 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999822787172917, + "loss": 2.7247, + "step": 3195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999822232078605, + "loss": 2.6713, + "step": 3200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999821676116284, + "loss": 2.4816, + "step": 3205 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999982111928595, + "loss": 2.5262, + "step": 3210 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999820561587608, + "loss": 2.7087, + "step": 3215 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999820003021253, + "loss": 2.8628, + "step": 3220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999819443586889, + "loss": 2.6129, + "step": 3225 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999818883284514, + "loss": 2.3413, + "step": 3230 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999818322114128, + "loss": 2.6626, + "step": 3235 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999981776007573, + "loss": 2.6738, + "step": 3240 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999817197169324, + "loss": 2.5838, + "step": 3245 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999816633394906, + "loss": 2.5262, + "step": 3250 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999981606875248, + "loss": 2.852, + "step": 3255 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999815503242042, + "loss": 2.4902, + "step": 3260 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999814936863593, + "loss": 2.6218, + "step": 3265 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999814369617137, + "loss": 2.6053, + "step": 3270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999813801502666, + "loss": 2.6452, + "step": 3275 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999813232520185, + "loss": 2.7253, + "step": 3280 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999812662669698, + "loss": 2.4042, + "step": 3285 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199998120919512, + "loss": 2.531, + "step": 3290 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999811520364692, + "loss": 2.5915, + "step": 3295 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999981094791017, + "loss": 2.7373, + "step": 3300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999810374587642, + "loss": 2.6961, + "step": 3305 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999809800397105, + "loss": 2.6234, + "step": 3310 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999809225338555, + "loss": 2.7268, + "step": 3315 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999808649412, + "loss": 2.5442, + "step": 3320 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999980807261743, + "loss": 2.489, + "step": 3325 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999807494954855, + "loss": 2.5636, + "step": 3330 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999980691642427, + "loss": 2.3505, + "step": 3335 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999980633702567, + "loss": 2.2822, + "step": 3340 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999805756759065, + "loss": 2.5858, + "step": 3345 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999805175624448, + "loss": 2.5202, + "step": 3350 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999804593621824, + "loss": 2.5763, + "step": 3355 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999804010751193, + "loss": 2.3721, + "step": 3360 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999803427012548, + "loss": 2.8356, + "step": 3365 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999802842405898, + "loss": 2.676, + "step": 3370 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999802256931234, + "loss": 2.6306, + "step": 3375 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999801670588565, + "loss": 2.5209, + "step": 3380 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999801083377884, + "loss": 2.4824, + "step": 3385 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999800495299196, + "loss": 2.8387, + "step": 3390 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999799906352496, + "loss": 2.8534, + "step": 3395 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999799316537792, + "loss": 2.3639, + "step": 3400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999798725855078, + "loss": 2.6524, + "step": 3405 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999798134304352, + "loss": 2.8767, + "step": 3410 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999979754188562, + "loss": 2.4607, + "step": 3415 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999796948598877, + "loss": 2.6159, + "step": 3420 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999796354444127, + "loss": 2.5035, + "step": 3425 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999795759421368, + "loss": 2.5866, + "step": 3430 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199997951635306, + "loss": 2.5947, + "step": 3435 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999794566771825, + "loss": 2.6712, + "step": 3440 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999793969145043, + "loss": 2.7967, + "step": 3445 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999793370650249, + "loss": 2.4733, + "step": 3450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999792771287447, + "loss": 2.5811, + "step": 3455 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999792171056637, + "loss": 2.7599, + "step": 3460 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999979156995782, + "loss": 2.4645, + "step": 3465 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999790967990995, + "loss": 2.7224, + "step": 3470 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999979036515616, + "loss": 2.5087, + "step": 3475 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999789761453317, + "loss": 2.8189, + "step": 3480 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999789156882467, + "loss": 2.5146, + "step": 3485 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999978855144361, + "loss": 2.7451, + "step": 3490 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999787945136746, + "loss": 2.6152, + "step": 3495 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999978733796187, + "loss": 2.399, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999786729918987, + "loss": 2.5335, + "step": 3505 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199997861210081, + "loss": 2.4403, + "step": 3510 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999785511229203, + "loss": 2.6851, + "step": 3515 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999784900582296, + "loss": 2.7007, + "step": 3520 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999784289067386, + "loss": 2.4623, + "step": 3525 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999783676684466, + "loss": 2.5301, + "step": 3530 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999978306343354, + "loss": 2.2027, + "step": 3535 + }, + { + "epoch": 0.0, + "learning_rate": 0.000199997824493146, + "loss": 2.6527, + "step": 3540 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999978183432766, + "loss": 2.631, + "step": 3545 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999978121847271, + "loss": 2.5573, + "step": 3550 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999780601749753, + "loss": 2.4258, + "step": 3555 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999977998415879, + "loss": 2.6852, + "step": 3560 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999977936569982, + "loss": 2.5886, + "step": 3565 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999778746372838, + "loss": 2.6689, + "step": 3570 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999778126177854, + "loss": 2.6879, + "step": 3575 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999777505114863, + "loss": 2.6052, + "step": 3580 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999776883183862, + "loss": 2.5364, + "step": 3585 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999776260384858, + "loss": 2.7237, + "step": 3590 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999775636717843, + "loss": 2.6191, + "step": 3595 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999775012182825, + "loss": 2.6068, + "step": 3600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999774386779797, + "loss": 2.4453, + "step": 3605 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999773760508762, + "loss": 2.6097, + "step": 3610 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999773133369723, + "loss": 2.4836, + "step": 3615 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999772505362677, + "loss": 2.7328, + "step": 3620 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999771876487622, + "loss": 2.6265, + "step": 3625 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999771246744565, + "loss": 2.5548, + "step": 3630 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999770616133496, + "loss": 2.4861, + "step": 3635 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999769984654426, + "loss": 2.6367, + "step": 3640 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999769352307346, + "loss": 2.5729, + "step": 3645 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976871909226, + "loss": 2.6693, + "step": 3650 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976808500917, + "loss": 2.6048, + "step": 3655 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976745005807, + "loss": 2.5641, + "step": 3660 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999766814238968, + "loss": 2.5885, + "step": 3665 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976617755186, + "loss": 2.7146, + "step": 3670 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999765539996744, + "loss": 2.632, + "step": 3675 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999764901573624, + "loss": 2.4664, + "step": 3680 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999764262282495, + "loss": 2.2652, + "step": 3685 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999763622123362, + "loss": 2.5475, + "step": 3690 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999762981096224, + "loss": 2.5403, + "step": 3695 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976233920108, + "loss": 2.8656, + "step": 3700 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999976169643793, + "loss": 2.6758, + "step": 3705 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999761052806773, + "loss": 2.4518, + "step": 3710 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999760408307614, + "loss": 2.5698, + "step": 3715 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999759762940445, + "loss": 2.5766, + "step": 3720 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999759116705272, + "loss": 2.6878, + "step": 3725 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999758469602097, + "loss": 2.4704, + "step": 3730 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999757821630913, + "loss": 2.56, + "step": 3735 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999757172791725, + "loss": 2.4563, + "step": 3740 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999756523084533, + "loss": 2.2081, + "step": 3745 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999755872509333, + "loss": 2.6101, + "step": 3750 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999755221066127, + "loss": 2.5998, + "step": 3755 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999975456875492, + "loss": 2.4085, + "step": 3760 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999753915575706, + "loss": 2.3679, + "step": 3765 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999975326152849, + "loss": 2.7311, + "step": 3770 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999752606613265, + "loss": 2.6587, + "step": 3775 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999751950830038, + "loss": 2.4725, + "step": 3780 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999751294178805, + "loss": 2.5657, + "step": 3785 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999750636659567, + "loss": 2.4418, + "step": 3790 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999749978272325, + "loss": 2.4117, + "step": 3795 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999974931901708, + "loss": 2.6565, + "step": 3800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999748658893827, + "loss": 2.6, + "step": 3805 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999747997902573, + "loss": 2.4808, + "step": 3810 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999747336043312, + "loss": 2.4266, + "step": 3815 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999746673316046, + "loss": 2.5674, + "step": 3820 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999746009720777, + "loss": 2.6225, + "step": 3825 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999745345257504, + "loss": 2.7522, + "step": 3830 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999744679926226, + "loss": 2.4364, + "step": 3835 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999744013726947, + "loss": 2.5721, + "step": 3840 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999743346659662, + "loss": 2.4399, + "step": 3845 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999742678724372, + "loss": 2.671, + "step": 3850 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999742009921078, + "loss": 2.4909, + "step": 3855 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999974134024978, + "loss": 2.5082, + "step": 3860 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999740669710478, + "loss": 2.375, + "step": 3865 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999739998303177, + "loss": 2.6448, + "step": 3870 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999739326027866, + "loss": 2.5758, + "step": 3875 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999738652884554, + "loss": 2.512, + "step": 3880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999737978873238, + "loss": 2.4797, + "step": 3885 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999973730399392, + "loss": 2.4649, + "step": 3890 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999736628246594, + "loss": 2.6255, + "step": 3895 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999973595163127, + "loss": 2.2548, + "step": 3900 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999735274147938, + "loss": 2.485, + "step": 3905 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999973459579661, + "loss": 2.5685, + "step": 3910 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999973391657727, + "loss": 2.6105, + "step": 3915 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999733236489932, + "loss": 2.7149, + "step": 3920 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999973255553459, + "loss": 2.6762, + "step": 3925 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999731873711246, + "loss": 2.3135, + "step": 3930 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999731191019897, + "loss": 2.4766, + "step": 3935 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999730507460546, + "loss": 2.6979, + "step": 3940 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999972982303319, + "loss": 2.3291, + "step": 3945 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999729137737835, + "loss": 2.4732, + "step": 3950 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999728451574472, + "loss": 2.8404, + "step": 3955 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999727764543113, + "loss": 2.4579, + "step": 3960 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999727076643747, + "loss": 2.5311, + "step": 3965 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999726387876377, + "loss": 2.677, + "step": 3970 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999725698241009, + "loss": 2.7106, + "step": 3975 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999725007737636, + "loss": 2.4307, + "step": 3980 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999724316366262, + "loss": 2.5786, + "step": 3985 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999723624126886, + "loss": 2.4742, + "step": 3990 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999722931019504, + "loss": 2.7372, + "step": 3995 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999972223704412, + "loss": 2.4513, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999721542200737, + "loss": 2.5488, + "step": 4005 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999972084648935, + "loss": 2.5753, + "step": 4010 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999720149909962, + "loss": 2.6319, + "step": 4015 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999719452462573, + "loss": 2.5538, + "step": 4020 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999718754147182, + "loss": 2.4668, + "step": 4025 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999718054963787, + "loss": 2.6052, + "step": 4030 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999971735491239, + "loss": 2.3874, + "step": 4035 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999716653992992, + "loss": 2.62, + "step": 4040 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999715952205593, + "loss": 2.6644, + "step": 4045 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999715249550192, + "loss": 2.4428, + "step": 4050 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999971454602679, + "loss": 2.6147, + "step": 4055 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999713841635386, + "loss": 2.639, + "step": 4060 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999971313637598, + "loss": 2.6309, + "step": 4065 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999712430248575, + "loss": 2.373, + "step": 4070 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999711723253167, + "loss": 2.6889, + "step": 4075 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999711015389755, + "loss": 2.3853, + "step": 4080 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999710306658347, + "loss": 2.4082, + "step": 4085 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999709597058932, + "loss": 2.5107, + "step": 4090 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999970888659152, + "loss": 2.7697, + "step": 4095 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999708175256106, + "loss": 2.4964, + "step": 4100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999707463052695, + "loss": 2.4221, + "step": 4105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999706749981278, + "loss": 2.6319, + "step": 4110 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999970603604186, + "loss": 2.6704, + "step": 4115 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999705321234443, + "loss": 2.5134, + "step": 4120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999704605559027, + "loss": 2.6706, + "step": 4125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999703889015606, + "loss": 2.5624, + "step": 4130 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999703171604186, + "loss": 2.2552, + "step": 4135 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999702453324768, + "loss": 2.528, + "step": 4140 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999970173417735, + "loss": 2.7461, + "step": 4145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999701014161928, + "loss": 2.5758, + "step": 4150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999700293278508, + "loss": 2.63, + "step": 4155 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999969957152709, + "loss": 2.4362, + "step": 4160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999698848907667, + "loss": 2.4211, + "step": 4165 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999698125420246, + "loss": 2.4353, + "step": 4170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999697401064823, + "loss": 2.7979, + "step": 4175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999696675841402, + "loss": 2.6134, + "step": 4180 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999969594974998, + "loss": 2.4935, + "step": 4185 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999695222790558, + "loss": 2.5087, + "step": 4190 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999694494963137, + "loss": 2.7233, + "step": 4195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999693766267719, + "loss": 2.6594, + "step": 4200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999693036704296, + "loss": 2.5277, + "step": 4205 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999692306272877, + "loss": 2.413, + "step": 4210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999691574973456, + "loss": 2.4858, + "step": 4215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999690842806037, + "loss": 2.4431, + "step": 4220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999969010977062, + "loss": 2.2495, + "step": 4225 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199996893758672, + "loss": 2.5721, + "step": 4230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999688641095785, + "loss": 2.7159, + "step": 4235 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999687905456365, + "loss": 2.5314, + "step": 4240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999968716894895, + "loss": 2.6268, + "step": 4245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999686431573536, + "loss": 2.4966, + "step": 4250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999685693330123, + "loss": 2.3179, + "step": 4255 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999968495421871, + "loss": 2.7163, + "step": 4260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999684214239298, + "loss": 2.3001, + "step": 4265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999683473391887, + "loss": 2.6254, + "step": 4270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999682731676477, + "loss": 2.468, + "step": 4275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999681989093067, + "loss": 2.5722, + "step": 4280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999681245641662, + "loss": 2.6148, + "step": 4285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999680501322256, + "loss": 2.4819, + "step": 4290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999679756134854, + "loss": 2.6615, + "step": 4295 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999967901007945, + "loss": 2.5589, + "step": 4300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999967826315605, + "loss": 2.6975, + "step": 4305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999677515364649, + "loss": 2.5358, + "step": 4310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999676766705254, + "loss": 2.6952, + "step": 4315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999676017177858, + "loss": 2.4479, + "step": 4320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999675266782463, + "loss": 2.5406, + "step": 4325 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999967451551907, + "loss": 2.7184, + "step": 4330 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999967376338768, + "loss": 2.6204, + "step": 4335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999673010388292, + "loss": 2.4248, + "step": 4340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999672256520908, + "loss": 2.5927, + "step": 4345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999671501785523, + "loss": 2.5201, + "step": 4350 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999967074618214, + "loss": 2.6657, + "step": 4355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999669989710764, + "loss": 2.3259, + "step": 4360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999669232371385, + "loss": 2.7273, + "step": 4365 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999966847416401, + "loss": 2.7191, + "step": 4370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999966771508864, + "loss": 2.5581, + "step": 4375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999666955145272, + "loss": 2.721, + "step": 4380 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199996661943339, + "loss": 2.5538, + "step": 4385 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999665432654537, + "loss": 2.5718, + "step": 4390 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999966467010718, + "loss": 2.435, + "step": 4395 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999966390669182, + "loss": 2.438, + "step": 4400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999663142408465, + "loss": 2.6362, + "step": 4405 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999966237725711, + "loss": 2.6154, + "step": 4410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999661611237762, + "loss": 2.343, + "step": 4415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999660844350413, + "loss": 2.4809, + "step": 4420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999660076595071, + "loss": 2.4247, + "step": 4425 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999965930797173, + "loss": 2.5994, + "step": 4430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999658538480395, + "loss": 2.573, + "step": 4435 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999657768121062, + "loss": 2.585, + "step": 4440 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999965699689373, + "loss": 2.7138, + "step": 4445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999656224798407, + "loss": 2.4242, + "step": 4450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999965545183508, + "loss": 2.6523, + "step": 4455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999654678003764, + "loss": 2.5392, + "step": 4460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999653903304446, + "loss": 2.4328, + "step": 4465 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999653127737138, + "loss": 2.4089, + "step": 4470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999652351301828, + "loss": 2.6982, + "step": 4475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999651573998522, + "loss": 2.6046, + "step": 4480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999650795827223, + "loss": 2.5182, + "step": 4485 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999650016787928, + "loss": 2.8916, + "step": 4490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999649236880634, + "loss": 2.6526, + "step": 4495 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999964845610535, + "loss": 2.5543, + "step": 4500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999647674462064, + "loss": 2.5293, + "step": 4505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999646891950785, + "loss": 2.4421, + "step": 4510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999964610857151, + "loss": 2.8573, + "step": 4515 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999964532432424, + "loss": 2.5611, + "step": 4520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999644539208972, + "loss": 2.4044, + "step": 4525 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999643753225712, + "loss": 2.6072, + "step": 4530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999642966374458, + "loss": 2.5995, + "step": 4535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999642178655203, + "loss": 2.5983, + "step": 4540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999641390067957, + "loss": 2.431, + "step": 4545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999640600612716, + "loss": 2.4112, + "step": 4550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999639810289475, + "loss": 2.7427, + "step": 4555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999639019098244, + "loss": 2.4633, + "step": 4560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999638227039017, + "loss": 2.7051, + "step": 4565 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999637434111795, + "loss": 2.6208, + "step": 4570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999636640316578, + "loss": 2.3909, + "step": 4575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999635845653366, + "loss": 2.6401, + "step": 4580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999635050122158, + "loss": 2.544, + "step": 4585 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999963425372296, + "loss": 2.4398, + "step": 4590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999633456455765, + "loss": 2.5029, + "step": 4595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999632658320574, + "loss": 2.4968, + "step": 4600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999963185931739, + "loss": 2.595, + "step": 4605 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999963105944621, + "loss": 2.5892, + "step": 4610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999630258707036, + "loss": 2.418, + "step": 4615 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999962945709987, + "loss": 2.3198, + "step": 4620 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999962865462471, + "loss": 2.5866, + "step": 4625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999627851281554, + "loss": 2.6689, + "step": 4630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999627047070405, + "loss": 2.6745, + "step": 4635 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999962624199126, + "loss": 2.5815, + "step": 4640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999625436044124, + "loss": 2.4527, + "step": 4645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999624629228992, + "loss": 2.3316, + "step": 4650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999623821545867, + "loss": 2.6772, + "step": 4655 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999623012994752, + "loss": 2.5055, + "step": 4660 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999962220357564, + "loss": 2.6098, + "step": 4665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999621393288535, + "loss": 2.5823, + "step": 4670 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999620582133437, + "loss": 2.5924, + "step": 4675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999619770110343, + "loss": 2.5865, + "step": 4680 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999961895721926, + "loss": 2.4947, + "step": 4685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999618143460183, + "loss": 2.5983, + "step": 4690 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999961732883311, + "loss": 2.3177, + "step": 4695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999616513338044, + "loss": 2.4433, + "step": 4700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999615696974986, + "loss": 2.5015, + "step": 4705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999614879743935, + "loss": 2.618, + "step": 4710 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999961406164489, + "loss": 2.2983, + "step": 4715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999613242677856, + "loss": 2.4485, + "step": 4720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999612422842827, + "loss": 2.506, + "step": 4725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999611602139808, + "loss": 2.7101, + "step": 4730 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999961078056879, + "loss": 2.6012, + "step": 4735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999609958129785, + "loss": 2.6079, + "step": 4740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999609134822787, + "loss": 2.6829, + "step": 4745 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999608310647794, + "loss": 2.6195, + "step": 4750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999607485604812, + "loss": 2.3357, + "step": 4755 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999606659693836, + "loss": 2.6011, + "step": 4760 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999960583291487, + "loss": 2.7226, + "step": 4765 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999960500526791, + "loss": 2.5665, + "step": 4770 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999604176752957, + "loss": 2.5458, + "step": 4775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999603347370013, + "loss": 2.6584, + "step": 4780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999602517119076, + "loss": 2.8255, + "step": 4785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999601686000151, + "loss": 2.5149, + "step": 4790 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999960085401323, + "loss": 2.5149, + "step": 4795 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999960002115832, + "loss": 2.529, + "step": 4800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999599187435417, + "loss": 2.4177, + "step": 4805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999598352844525, + "loss": 2.8094, + "step": 4810 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999959751738564, + "loss": 2.726, + "step": 4815 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999959668105876, + "loss": 2.493, + "step": 4820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999595843863893, + "loss": 2.4468, + "step": 4825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999595005801036, + "loss": 2.451, + "step": 4830 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999594166870185, + "loss": 2.4086, + "step": 4835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999593327071344, + "loss": 2.6423, + "step": 4840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999592486404512, + "loss": 2.6097, + "step": 4845 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999959164486969, + "loss": 2.7155, + "step": 4850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999590802466876, + "loss": 2.3753, + "step": 4855 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999958995919607, + "loss": 2.7185, + "step": 4860 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999589115057275, + "loss": 2.565, + "step": 4865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999588270050488, + "loss": 2.4212, + "step": 4870 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999587424175715, + "loss": 2.5576, + "step": 4875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999586577432944, + "loss": 2.5254, + "step": 4880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999958572982219, + "loss": 2.3984, + "step": 4885 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999958488134344, + "loss": 2.5082, + "step": 4890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999584031996702, + "loss": 2.6574, + "step": 4895 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999583181781977, + "loss": 2.3945, + "step": 4900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999582330699255, + "loss": 2.5044, + "step": 4905 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999958147874855, + "loss": 2.6875, + "step": 4910 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999958062592985, + "loss": 2.5123, + "step": 4915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999579772243163, + "loss": 2.4858, + "step": 4920 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999578917688485, + "loss": 2.3913, + "step": 4925 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999957806226582, + "loss": 2.6742, + "step": 4930 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999957720597516, + "loss": 2.7184, + "step": 4935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999576348816514, + "loss": 2.5892, + "step": 4940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999575490789876, + "loss": 2.6998, + "step": 4945 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999574631895253, + "loss": 2.4694, + "step": 4950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999573772132637, + "loss": 2.569, + "step": 4955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999572911502032, + "loss": 2.3539, + "step": 4960 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999957205000344, + "loss": 2.2503, + "step": 4965 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999571187636857, + "loss": 2.4465, + "step": 4970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999570324402287, + "loss": 2.6047, + "step": 4975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999569460299723, + "loss": 2.6365, + "step": 4980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999568595329174, + "loss": 2.5386, + "step": 4985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999567729490637, + "loss": 2.5822, + "step": 4990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999566862784107, + "loss": 2.411, + "step": 4995 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999956599520959, + "loss": 2.9174, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999565126767088, + "loss": 2.4363, + "step": 5005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999564257456594, + "loss": 2.4856, + "step": 5010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999563387278113, + "loss": 2.4739, + "step": 5015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999562516231643, + "loss": 2.6323, + "step": 5020 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999561644317186, + "loss": 2.8467, + "step": 5025 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999956077153474, + "loss": 2.4187, + "step": 5030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999559897884305, + "loss": 2.5242, + "step": 5035 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999955902336588, + "loss": 2.494, + "step": 5040 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999558147979475, + "loss": 2.4001, + "step": 5045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999557271725073, + "loss": 2.6845, + "step": 5050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999556394602688, + "loss": 2.5759, + "step": 5055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999555516612313, + "loss": 2.5307, + "step": 5060 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999955463775395, + "loss": 2.4896, + "step": 5065 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999553758027602, + "loss": 2.4706, + "step": 5070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999552877433265, + "loss": 2.488, + "step": 5075 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999955199597094, + "loss": 2.5807, + "step": 5080 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999955111364063, + "loss": 2.6193, + "step": 5085 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999955023044233, + "loss": 2.4777, + "step": 5090 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999549346376045, + "loss": 2.662, + "step": 5095 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999954846144177, + "loss": 2.2384, + "step": 5100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999954757563951, + "loss": 2.4218, + "step": 5105 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999954668896926, + "loss": 2.507, + "step": 5110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999545801431027, + "loss": 2.4263, + "step": 5115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999544913024806, + "loss": 2.6976, + "step": 5120 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199995440237506, + "loss": 2.6457, + "step": 5125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999543133608402, + "loss": 2.4234, + "step": 5130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999542242598223, + "loss": 2.6219, + "step": 5135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999541350720055, + "loss": 2.6571, + "step": 5140 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199995404579739, + "loss": 2.5993, + "step": 5145 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999953956435976, + "loss": 2.8249, + "step": 5150 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999953866987763, + "loss": 2.6825, + "step": 5155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999537774527518, + "loss": 2.3894, + "step": 5160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999536878309416, + "loss": 2.356, + "step": 5165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999535981223332, + "loss": 2.5617, + "step": 5170 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999953508326926, + "loss": 2.4124, + "step": 5175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999534184447204, + "loss": 2.5813, + "step": 5180 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999533284757162, + "loss": 2.4507, + "step": 5185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999532384199132, + "loss": 2.6176, + "step": 5190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999531482773114, + "loss": 2.3542, + "step": 5195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999530580479114, + "loss": 2.4879, + "step": 5200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999529677317132, + "loss": 2.5045, + "step": 5205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999528773287159, + "loss": 2.3635, + "step": 5210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999527868389203, + "loss": 2.6403, + "step": 5215 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999952696262326, + "loss": 2.5783, + "step": 5220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999526055989334, + "loss": 2.5197, + "step": 5225 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999952514848742, + "loss": 2.3619, + "step": 5230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999524240117523, + "loss": 2.4988, + "step": 5235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999952333087964, + "loss": 2.5926, + "step": 5240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999522420773773, + "loss": 2.2195, + "step": 5245 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999952150979992, + "loss": 2.4463, + "step": 5250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999520597958084, + "loss": 2.4735, + "step": 5255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999519685248262, + "loss": 2.4903, + "step": 5260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999518771670455, + "loss": 2.4087, + "step": 5265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999517857224666, + "loss": 2.7214, + "step": 5270 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999951694191089, + "loss": 2.8339, + "step": 5275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999516025729134, + "loss": 2.3704, + "step": 5280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999515108679389, + "loss": 2.7017, + "step": 5285 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999951419076166, + "loss": 2.4457, + "step": 5290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999513271975948, + "loss": 2.3882, + "step": 5295 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999951235232225, + "loss": 2.6735, + "step": 5300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999951143180057, + "loss": 2.4836, + "step": 5305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999510510410908, + "loss": 2.8034, + "step": 5310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999950958815326, + "loss": 2.5311, + "step": 5315 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999950866502763, + "loss": 2.387, + "step": 5320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999507741034014, + "loss": 2.3596, + "step": 5325 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999506816172416, + "loss": 2.6885, + "step": 5330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999505890442833, + "loss": 2.6245, + "step": 5335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999504963845268, + "loss": 2.5971, + "step": 5340 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999950403637972, + "loss": 2.6143, + "step": 5345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999503108046187, + "loss": 2.2141, + "step": 5350 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999950217884467, + "loss": 2.3336, + "step": 5355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999501248775173, + "loss": 2.3443, + "step": 5360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999500317837693, + "loss": 2.362, + "step": 5365 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999949938603223, + "loss": 2.478, + "step": 5370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999498453358782, + "loss": 2.6018, + "step": 5375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999497519817354, + "loss": 2.7575, + "step": 5380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999496585407938, + "loss": 2.6625, + "step": 5385 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999495650130546, + "loss": 2.6967, + "step": 5390 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999949471398517, + "loss": 2.3785, + "step": 5395 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999949377697181, + "loss": 2.5965, + "step": 5400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999492839090468, + "loss": 2.5036, + "step": 5405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999491900341143, + "loss": 2.3819, + "step": 5410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999490960723838, + "loss": 2.4559, + "step": 5415 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999949002023855, + "loss": 2.7719, + "step": 5420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999489078885278, + "loss": 2.5455, + "step": 5425 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999488136664026, + "loss": 2.4121, + "step": 5430 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999948719357479, + "loss": 2.5166, + "step": 5435 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999486249617574, + "loss": 2.642, + "step": 5440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999485304792377, + "loss": 2.3874, + "step": 5445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999484359099195, + "loss": 2.492, + "step": 5450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999948341253804, + "loss": 2.5342, + "step": 5455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999482465108895, + "loss": 2.1746, + "step": 5460 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999948151681177, + "loss": 2.4772, + "step": 5465 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999480567646667, + "loss": 2.2773, + "step": 5470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999479617613579, + "loss": 2.3527, + "step": 5475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999478666712515, + "loss": 2.6607, + "step": 5480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999477714943465, + "loss": 2.6414, + "step": 5485 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999476762306434, + "loss": 2.6242, + "step": 5490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999475808801423, + "loss": 2.5702, + "step": 5495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999474854428436, + "loss": 2.5194, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999473899187463, + "loss": 2.3377, + "step": 5505 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999947294307851, + "loss": 2.2831, + "step": 5510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999947198610158, + "loss": 2.5063, + "step": 5515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999471028256665, + "loss": 2.6298, + "step": 5520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999470069543773, + "loss": 2.3577, + "step": 5525 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199994691099629, + "loss": 2.4752, + "step": 5530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999468149514045, + "loss": 2.7031, + "step": 5535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999467188197212, + "loss": 2.514, + "step": 5540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999466226012396, + "loss": 2.6673, + "step": 5545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999465262959603, + "loss": 2.6299, + "step": 5550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999464299038828, + "loss": 2.667, + "step": 5555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999463334250076, + "loss": 2.5126, + "step": 5560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999946236859334, + "loss": 2.5674, + "step": 5565 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999946140206863, + "loss": 2.278, + "step": 5570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999460434675937, + "loss": 2.0909, + "step": 5575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999459466415263, + "loss": 2.2677, + "step": 5580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999458497286612, + "loss": 2.2392, + "step": 5585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999457527289982, + "loss": 2.7081, + "step": 5590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999456556425374, + "loss": 2.4179, + "step": 5595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999455584692784, + "loss": 2.5304, + "step": 5600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999454612092216, + "loss": 2.4348, + "step": 5605 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999945363862367, + "loss": 2.672, + "step": 5610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999452664287143, + "loss": 2.226, + "step": 5615 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999945168908264, + "loss": 2.543, + "step": 5620 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999450713010158, + "loss": 2.5913, + "step": 5625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999449736069695, + "loss": 2.7104, + "step": 5630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999448758261255, + "loss": 2.6155, + "step": 5635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999447779584837, + "loss": 2.6001, + "step": 5640 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999944680004044, + "loss": 2.2563, + "step": 5645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999445819628064, + "loss": 2.4679, + "step": 5650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999444838347713, + "loss": 2.2942, + "step": 5655 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999944385619938, + "loss": 2.4746, + "step": 5660 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999944287318307, + "loss": 2.6527, + "step": 5665 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999944188929878, + "loss": 2.7976, + "step": 5670 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999440904546517, + "loss": 2.6258, + "step": 5675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999439918926272, + "loss": 2.3405, + "step": 5680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999438932438054, + "loss": 2.5708, + "step": 5685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999437945081855, + "loss": 2.7506, + "step": 5690 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999943695685768, + "loss": 2.4775, + "step": 5695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999435967765528, + "loss": 2.6628, + "step": 5700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999434977805396, + "loss": 2.5022, + "step": 5705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999433986977287, + "loss": 2.4642, + "step": 5710 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999432995281203, + "loss": 2.5368, + "step": 5715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999432002717143, + "loss": 2.6161, + "step": 5720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999431009285102, + "loss": 2.6161, + "step": 5725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999430014985088, + "loss": 2.6724, + "step": 5730 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999429019817096, + "loss": 2.4299, + "step": 5735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999428023781125, + "loss": 2.6254, + "step": 5740 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999942702687718, + "loss": 2.4306, + "step": 5745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999942602910526, + "loss": 2.2805, + "step": 5750 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999942503046536, + "loss": 2.6213, + "step": 5755 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999424030957483, + "loss": 2.4528, + "step": 5760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999423030581633, + "loss": 2.5347, + "step": 5765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999422029337805, + "loss": 2.4189, + "step": 5770 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999421027226002, + "loss": 2.7847, + "step": 5775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999420024246223, + "loss": 2.386, + "step": 5780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999419020398467, + "loss": 2.3243, + "step": 5785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999418015682733, + "loss": 2.501, + "step": 5790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999417010099026, + "loss": 2.5756, + "step": 5795 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999941600364734, + "loss": 2.5002, + "step": 5800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999414996327685, + "loss": 2.5806, + "step": 5805 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999941398814005, + "loss": 2.6412, + "step": 5810 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999941297908444, + "loss": 2.2917, + "step": 5815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999411969160856, + "loss": 2.5026, + "step": 5820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999410958369297, + "loss": 2.4119, + "step": 5825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999409946709761, + "loss": 2.5459, + "step": 5830 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999940893418225, + "loss": 2.3567, + "step": 5835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999407920786766, + "loss": 2.5446, + "step": 5840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999406906523305, + "loss": 2.4614, + "step": 5845 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999405891391872, + "loss": 2.4516, + "step": 5850 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999940487539246, + "loss": 2.7649, + "step": 5855 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999940385852508, + "loss": 2.4139, + "step": 5860 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999940284078972, + "loss": 2.5276, + "step": 5865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999401822186385, + "loss": 2.6131, + "step": 5870 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999400802715077, + "loss": 2.2981, + "step": 5875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999399782375795, + "loss": 2.435, + "step": 5880 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999398761168542, + "loss": 2.4162, + "step": 5885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999397739093312, + "loss": 2.4246, + "step": 5890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999396716150107, + "loss": 2.6836, + "step": 5895 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999939569233893, + "loss": 2.537, + "step": 5900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999394667659777, + "loss": 2.4223, + "step": 5905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999393642112652, + "loss": 2.376, + "step": 5910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999392615697553, + "loss": 2.5755, + "step": 5915 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999939158841448, + "loss": 2.5933, + "step": 5920 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999390560263434, + "loss": 2.0977, + "step": 5925 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999389531244414, + "loss": 2.705, + "step": 5930 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999938850135742, + "loss": 2.5654, + "step": 5935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999387470602455, + "loss": 2.3034, + "step": 5940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999386438979518, + "loss": 2.5059, + "step": 5945 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999385406488601, + "loss": 2.5865, + "step": 5950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999384373129718, + "loss": 2.7423, + "step": 5955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999383338902858, + "loss": 2.5133, + "step": 5960 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999938230380803, + "loss": 2.347, + "step": 5965 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999381267845226, + "loss": 2.657, + "step": 5970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999380231014452, + "loss": 2.4238, + "step": 5975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999379193315703, + "loss": 2.4641, + "step": 5980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999378154748982, + "loss": 2.5307, + "step": 5985 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999937711531429, + "loss": 2.5566, + "step": 5990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999376075011623, + "loss": 2.5494, + "step": 5995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999375033840985, + "loss": 2.4661, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999373991802378, + "loss": 2.6792, + "step": 6005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999372948895796, + "loss": 2.4679, + "step": 6010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999371905121243, + "loss": 2.4224, + "step": 6015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999370860478716, + "loss": 2.58, + "step": 6020 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999936981496822, + "loss": 2.4141, + "step": 6025 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999368768589752, + "loss": 2.4102, + "step": 6030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999367721343313, + "loss": 2.4277, + "step": 6035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999366673228904, + "loss": 2.3389, + "step": 6040 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999936562424652, + "loss": 2.6248, + "step": 6045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999364574396165, + "loss": 2.5639, + "step": 6050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999363523677842, + "loss": 2.3538, + "step": 6055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999362472091544, + "loss": 2.6417, + "step": 6060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999361419637278, + "loss": 2.4795, + "step": 6065 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999936036631504, + "loss": 2.6217, + "step": 6070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999359312124836, + "loss": 2.5051, + "step": 6075 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999358257066655, + "loss": 2.5224, + "step": 6080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999357201140505, + "loss": 2.4752, + "step": 6085 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999356144346386, + "loss": 2.5671, + "step": 6090 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999355086684295, + "loss": 2.553, + "step": 6095 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999354028154233, + "loss": 2.7043, + "step": 6100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999352968756205, + "loss": 2.415, + "step": 6105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999351908490202, + "loss": 2.5664, + "step": 6110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999935084735623, + "loss": 2.5261, + "step": 6115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999349785354288, + "loss": 2.4543, + "step": 6120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999348722484378, + "loss": 2.5206, + "step": 6125 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199993476587465, + "loss": 2.5553, + "step": 6130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999346594140648, + "loss": 2.4298, + "step": 6135 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999934552866683, + "loss": 2.5042, + "step": 6140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999344462325042, + "loss": 2.4607, + "step": 6145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999343395115282, + "loss": 2.6575, + "step": 6150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999342327037554, + "loss": 2.6144, + "step": 6155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999341258091857, + "loss": 2.4543, + "step": 6160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999340188278192, + "loss": 2.6144, + "step": 6165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999339117596557, + "loss": 2.8006, + "step": 6170 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999933804604695, + "loss": 2.7669, + "step": 6175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999336973629378, + "loss": 2.722, + "step": 6180 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999335900343837, + "loss": 2.6892, + "step": 6185 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999933482619033, + "loss": 2.5959, + "step": 6190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999333751168847, + "loss": 2.6445, + "step": 6195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999332675279402, + "loss": 2.6802, + "step": 6200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999331598521988, + "loss": 2.6725, + "step": 6205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999330520896602, + "loss": 2.3971, + "step": 6210 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999932944240325, + "loss": 2.7448, + "step": 6215 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999932836304193, + "loss": 2.7078, + "step": 6220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999327282812642, + "loss": 2.1525, + "step": 6225 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999326201715386, + "loss": 2.6948, + "step": 6230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999325119750164, + "loss": 2.6289, + "step": 6235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999932403691697, + "loss": 2.519, + "step": 6240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999322953215813, + "loss": 2.6539, + "step": 6245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999321868646687, + "loss": 2.5327, + "step": 6250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999320783209591, + "loss": 2.5113, + "step": 6255 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999931969690453, + "loss": 2.5714, + "step": 6260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999318609731503, + "loss": 2.4982, + "step": 6265 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999931752169051, + "loss": 2.4783, + "step": 6270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999316432781544, + "loss": 2.4137, + "step": 6275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999315343004616, + "loss": 2.3633, + "step": 6280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999931425235972, + "loss": 2.4293, + "step": 6285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999313160846854, + "loss": 2.4783, + "step": 6290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999312068466024, + "loss": 2.4389, + "step": 6295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999310975217228, + "loss": 2.4359, + "step": 6300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999930988110047, + "loss": 2.7623, + "step": 6305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999308786115738, + "loss": 2.5565, + "step": 6310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999930769026304, + "loss": 2.5909, + "step": 6315 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999930659354238, + "loss": 2.4685, + "step": 6320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999305495953753, + "loss": 2.3457, + "step": 6325 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999930439749716, + "loss": 2.4567, + "step": 6330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999303298172597, + "loss": 2.6181, + "step": 6335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999302197980075, + "loss": 2.6353, + "step": 6340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999301096919583, + "loss": 2.7282, + "step": 6345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999299994991128, + "loss": 2.2838, + "step": 6350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999298892194704, + "loss": 2.492, + "step": 6355 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999929778853032, + "loss": 2.7185, + "step": 6360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999296683997962, + "loss": 2.4134, + "step": 6365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999295578597645, + "loss": 2.3234, + "step": 6370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999294472329362, + "loss": 2.2621, + "step": 6375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999293365193115, + "loss": 2.5966, + "step": 6380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999292257188902, + "loss": 2.3967, + "step": 6385 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999929114831672, + "loss": 2.5198, + "step": 6390 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999929003857658, + "loss": 2.7163, + "step": 6395 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999928892796847, + "loss": 2.6591, + "step": 6400 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199992878164924, + "loss": 2.4431, + "step": 6405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999286704148363, + "loss": 2.5429, + "step": 6410 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999928559093636, + "loss": 2.476, + "step": 6415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999284476856396, + "loss": 2.7277, + "step": 6420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999283361908467, + "loss": 2.5014, + "step": 6425 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999282246092575, + "loss": 2.3429, + "step": 6430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999281129408716, + "loss": 2.4629, + "step": 6435 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999280011856897, + "loss": 2.5457, + "step": 6440 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999927889343711, + "loss": 2.72, + "step": 6445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999277774149362, + "loss": 2.6957, + "step": 6450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999927665399365, + "loss": 2.7006, + "step": 6455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999275532969974, + "loss": 2.1886, + "step": 6460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999274411078335, + "loss": 2.6278, + "step": 6465 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999273288318734, + "loss": 2.423, + "step": 6470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999272164691168, + "loss": 2.4197, + "step": 6475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999271040195638, + "loss": 2.4111, + "step": 6480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999269914832145, + "loss": 2.4731, + "step": 6485 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999926878860069, + "loss": 2.7607, + "step": 6490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999267661501274, + "loss": 2.5821, + "step": 6495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999266533533893, + "loss": 2.6406, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999926540469855, + "loss": 2.4918, + "step": 6505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999264274995246, + "loss": 2.5968, + "step": 6510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999926314442398, + "loss": 2.6865, + "step": 6515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999262012984748, + "loss": 2.4477, + "step": 6520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999260880677557, + "loss": 2.7181, + "step": 6525 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199992597475024, + "loss": 2.6592, + "step": 6530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999258613459287, + "loss": 2.4172, + "step": 6535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999257478548206, + "loss": 2.5165, + "step": 6540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999256342769165, + "loss": 2.2554, + "step": 6545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999255206122165, + "loss": 2.2255, + "step": 6550 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199992540686072, + "loss": 2.6928, + "step": 6555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999252930224276, + "loss": 2.5198, + "step": 6560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999925179097339, + "loss": 2.4785, + "step": 6565 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999925065085454, + "loss": 2.444, + "step": 6570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999249509867735, + "loss": 2.2624, + "step": 6575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999248368012963, + "loss": 2.5451, + "step": 6580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999247225290233, + "loss": 2.5046, + "step": 6585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999246081699542, + "loss": 2.651, + "step": 6590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999244937240888, + "loss": 2.5855, + "step": 6595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999243791914273, + "loss": 2.6744, + "step": 6600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999242645719697, + "loss": 2.4147, + "step": 6605 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999241498657163, + "loss": 2.4975, + "step": 6610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999240350726668, + "loss": 2.5738, + "step": 6615 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999239201928215, + "loss": 2.6384, + "step": 6620 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199992380522618, + "loss": 2.4977, + "step": 6625 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999923690172742, + "loss": 2.4549, + "step": 6630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999235750325087, + "loss": 2.5053, + "step": 6635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999234598054791, + "loss": 2.6512, + "step": 6640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999233444916535, + "loss": 2.6486, + "step": 6645 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999923229091032, + "loss": 2.6118, + "step": 6650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999231136036145, + "loss": 2.6748, + "step": 6655 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999922998029401, + "loss": 2.4618, + "step": 6660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999228823683915, + "loss": 2.667, + "step": 6665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999227666205862, + "loss": 2.5785, + "step": 6670 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999922650785985, + "loss": 2.3421, + "step": 6675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999225348645876, + "loss": 2.5338, + "step": 6680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999224188563947, + "loss": 2.378, + "step": 6685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999223027614057, + "loss": 2.5207, + "step": 6690 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999922186579621, + "loss": 2.4966, + "step": 6695 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199992207031104, + "loss": 2.6312, + "step": 6700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999219539556636, + "loss": 2.7636, + "step": 6705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999218375134912, + "loss": 2.5626, + "step": 6710 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999217209845226, + "loss": 2.6757, + "step": 6715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999216043687585, + "loss": 2.3545, + "step": 6720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999214876661986, + "loss": 2.4726, + "step": 6725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999213708768427, + "loss": 2.7951, + "step": 6730 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999212540006912, + "loss": 2.4741, + "step": 6735 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999921137037744, + "loss": 2.2713, + "step": 6740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999210199880008, + "loss": 2.3387, + "step": 6745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999920902851462, + "loss": 2.4719, + "step": 6750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999207856281271, + "loss": 2.5354, + "step": 6755 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999920668317997, + "loss": 2.3241, + "step": 6760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999205509210706, + "loss": 2.6475, + "step": 6765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999204334373487, + "loss": 2.554, + "step": 6770 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999920315866831, + "loss": 2.4501, + "step": 6775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999201982095178, + "loss": 2.3314, + "step": 6780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999200804654088, + "loss": 2.4591, + "step": 6785 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999919962634504, + "loss": 2.603, + "step": 6790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999198447168034, + "loss": 2.4301, + "step": 6795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999197267123075, + "loss": 2.4142, + "step": 6800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999196086210158, + "loss": 2.4162, + "step": 6805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999194904429284, + "loss": 2.4825, + "step": 6810 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999193721780456, + "loss": 2.3559, + "step": 6815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999192538263668, + "loss": 2.3979, + "step": 6820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999191353878924, + "loss": 2.355, + "step": 6825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999190168626228, + "loss": 2.3177, + "step": 6830 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999918898250557, + "loss": 2.7116, + "step": 6835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999187795516961, + "loss": 2.7235, + "step": 6840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999186607660394, + "loss": 2.5057, + "step": 6845 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999185418935873, + "loss": 2.5062, + "step": 6850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999184229343395, + "loss": 2.5559, + "step": 6855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999183038882961, + "loss": 2.4072, + "step": 6860 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999181847554572, + "loss": 2.3689, + "step": 6865 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999918065535823, + "loss": 2.3674, + "step": 6870 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999917946229393, + "loss": 2.4241, + "step": 6875 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999917826836168, + "loss": 2.6018, + "step": 6880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999917707356147, + "loss": 2.5347, + "step": 6885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999175877893305, + "loss": 2.3115, + "step": 6890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999174681357187, + "loss": 2.5077, + "step": 6895 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999173483953113, + "loss": 2.6317, + "step": 6900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999172285681084, + "loss": 2.5362, + "step": 6905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999171086541105, + "loss": 2.5657, + "step": 6910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999169886533168, + "loss": 2.5338, + "step": 6915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999168685657276, + "loss": 2.6462, + "step": 6920 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999916748391343, + "loss": 2.5496, + "step": 6925 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999166281301634, + "loss": 2.629, + "step": 6930 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999165077821883, + "loss": 2.7223, + "step": 6935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999163873474175, + "loss": 2.3582, + "step": 6940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999162668258514, + "loss": 2.4163, + "step": 6945 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199991614621749, + "loss": 2.6175, + "step": 6950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999160255223335, + "loss": 2.5896, + "step": 6955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999159047403816, + "loss": 2.5343, + "step": 6960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999157838716342, + "loss": 2.6274, + "step": 6965 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999156629160912, + "loss": 2.578, + "step": 6970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999155418737533, + "loss": 2.5981, + "step": 6975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999154207446198, + "loss": 2.4209, + "step": 6980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999152995286914, + "loss": 2.5243, + "step": 6985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999151782259676, + "loss": 2.7483, + "step": 6990 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999915056836448, + "loss": 2.4876, + "step": 6995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999149353601338, + "loss": 2.6989, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999148137970243, + "loss": 2.634, + "step": 7005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999146921471196, + "loss": 2.4256, + "step": 7010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999145704104195, + "loss": 2.4754, + "step": 7015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999144485869242, + "loss": 2.4822, + "step": 7020 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999143266766336, + "loss": 2.5492, + "step": 7025 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999914204679548, + "loss": 2.5732, + "step": 7030 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999914082595667, + "loss": 2.2351, + "step": 7035 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999913960424991, + "loss": 2.6789, + "step": 7040 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999138381675198, + "loss": 2.5905, + "step": 7045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999137158232535, + "loss": 2.6706, + "step": 7050 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999913593392192, + "loss": 2.4164, + "step": 7055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999134708743353, + "loss": 2.6302, + "step": 7060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999133482696837, + "loss": 2.5643, + "step": 7065 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999913225578237, + "loss": 2.6243, + "step": 7070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999131027999948, + "loss": 2.4086, + "step": 7075 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999912979934958, + "loss": 2.6565, + "step": 7080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999128569831259, + "loss": 2.6168, + "step": 7085 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999127339444988, + "loss": 2.4882, + "step": 7090 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999126108190764, + "loss": 2.2434, + "step": 7095 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999912487606859, + "loss": 2.5088, + "step": 7100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999123643078468, + "loss": 2.662, + "step": 7105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999122409220394, + "loss": 2.3635, + "step": 7110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999121174494374, + "loss": 2.5612, + "step": 7115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999119938900398, + "loss": 2.4165, + "step": 7120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999118702438478, + "loss": 2.5879, + "step": 7125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999117465108602, + "loss": 2.5018, + "step": 7130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999116226910782, + "loss": 2.6596, + "step": 7135 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999911498784501, + "loss": 2.6165, + "step": 7140 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999911374791129, + "loss": 2.2687, + "step": 7145 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999911250710962, + "loss": 2.7033, + "step": 7150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999111265439997, + "loss": 2.4913, + "step": 7155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999110022902428, + "loss": 2.7355, + "step": 7160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999910877949691, + "loss": 2.4507, + "step": 7165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999107535223444, + "loss": 2.5272, + "step": 7170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999106290082027, + "loss": 2.749, + "step": 7175 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999910504407266, + "loss": 2.4736, + "step": 7180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999910379719535, + "loss": 2.7673, + "step": 7185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999102549450086, + "loss": 2.6617, + "step": 7190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999101300836877, + "loss": 2.303, + "step": 7195 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999910005135572, + "loss": 3.012, + "step": 7200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999098801006614, + "loss": 2.5122, + "step": 7205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999097549789558, + "loss": 2.4943, + "step": 7210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999096297704556, + "loss": 2.3182, + "step": 7215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999095044751606, + "loss": 2.5661, + "step": 7220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999093790930706, + "loss": 2.3979, + "step": 7225 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999092536241862, + "loss": 2.4857, + "step": 7230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999091280685068, + "loss": 2.2486, + "step": 7235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999909002426033, + "loss": 2.5288, + "step": 7240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999908876696764, + "loss": 2.5035, + "step": 7245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999087508807003, + "loss": 2.4987, + "step": 7250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999086249778425, + "loss": 2.4817, + "step": 7255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999084989881895, + "loss": 2.4187, + "step": 7260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999083729117418, + "loss": 2.4947, + "step": 7265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999082467484996, + "loss": 2.5637, + "step": 7270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999081204984627, + "loss": 2.6972, + "step": 7275 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999907994161631, + "loss": 2.4978, + "step": 7280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999907867738005, + "loss": 2.61, + "step": 7285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999077412275842, + "loss": 2.674, + "step": 7290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999076146303685, + "loss": 2.8544, + "step": 7295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999074879463585, + "loss": 2.3708, + "step": 7300 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999073611755538, + "loss": 2.5549, + "step": 7305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999072343179547, + "loss": 2.4773, + "step": 7310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999071073735606, + "loss": 2.4491, + "step": 7315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999069803423725, + "loss": 2.3739, + "step": 7320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999068532243895, + "loss": 2.5656, + "step": 7325 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999067260196118, + "loss": 2.2732, + "step": 7330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999065987280398, + "loss": 2.3354, + "step": 7335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999064713496735, + "loss": 2.5572, + "step": 7340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999063438845124, + "loss": 2.4688, + "step": 7345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999062163325565, + "loss": 2.4249, + "step": 7350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999060886938068, + "loss": 2.4774, + "step": 7355 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999905960968262, + "loss": 2.523, + "step": 7360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999905833155923, + "loss": 2.5578, + "step": 7365 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199990570525679, + "loss": 2.3641, + "step": 7370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999055772708618, + "loss": 2.4622, + "step": 7375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999054491981398, + "loss": 2.6698, + "step": 7380 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999905321038623, + "loss": 2.5802, + "step": 7385 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999051927923115, + "loss": 2.5884, + "step": 7390 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999050644592062, + "loss": 2.4902, + "step": 7395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999049360393064, + "loss": 2.7938, + "step": 7400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999904807532612, + "loss": 2.5248, + "step": 7405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999046789391232, + "loss": 2.6136, + "step": 7410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999045502588404, + "loss": 2.627, + "step": 7415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999044214917628, + "loss": 2.4668, + "step": 7420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999042926378913, + "loss": 2.3442, + "step": 7425 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999041636972251, + "loss": 2.4571, + "step": 7430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999040346697648, + "loss": 2.5193, + "step": 7435 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999039055555105, + "loss": 2.8178, + "step": 7440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999037763544615, + "loss": 2.5323, + "step": 7445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999036470666183, + "loss": 2.3081, + "step": 7450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999035176919807, + "loss": 2.4393, + "step": 7455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999033882305491, + "loss": 2.5866, + "step": 7460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999032586823232, + "loss": 2.6877, + "step": 7465 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999903129047303, + "loss": 2.3796, + "step": 7470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999029993254886, + "loss": 2.5524, + "step": 7475 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199990286951688, + "loss": 2.5185, + "step": 7480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999027396214771, + "loss": 2.742, + "step": 7485 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199990260963928, + "loss": 2.492, + "step": 7490 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999902479570289, + "loss": 2.6437, + "step": 7495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999023494145037, + "loss": 2.5175, + "step": 7500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999022191719243, + "loss": 2.0803, + "step": 7505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999020888425507, + "loss": 2.5532, + "step": 7510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999901958426383, + "loss": 2.4239, + "step": 7515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999018279234212, + "loss": 2.2999, + "step": 7520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999901697333665, + "loss": 2.3891, + "step": 7525 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999901566657115, + "loss": 2.6701, + "step": 7530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999014358937708, + "loss": 2.6404, + "step": 7535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999013050436327, + "loss": 2.4318, + "step": 7540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999011741067007, + "loss": 2.5055, + "step": 7545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999010430829742, + "loss": 2.5732, + "step": 7550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999009119724538, + "loss": 2.5331, + "step": 7555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999007807751395, + "loss": 2.3426, + "step": 7560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999900649491031, + "loss": 2.5285, + "step": 7565 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999005181201287, + "loss": 2.2141, + "step": 7570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999003866624324, + "loss": 2.3738, + "step": 7575 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999900255117942, + "loss": 2.138, + "step": 7580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999001234866578, + "loss": 2.6575, + "step": 7585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998999917685795, + "loss": 2.4234, + "step": 7590 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999899859963707, + "loss": 2.4141, + "step": 7595 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999899728072041, + "loss": 2.683, + "step": 7600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999899596093581, + "loss": 2.6398, + "step": 7605 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998994640283267, + "loss": 2.4464, + "step": 7610 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999899331876279, + "loss": 2.5567, + "step": 7615 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999899199637437, + "loss": 2.4854, + "step": 7620 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998990673118016, + "loss": 2.6837, + "step": 7625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998989348993717, + "loss": 2.4272, + "step": 7630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998988024001486, + "loss": 2.5749, + "step": 7635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998986698141312, + "loss": 2.38, + "step": 7640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998985371413202, + "loss": 2.5107, + "step": 7645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998984043817153, + "loss": 2.5564, + "step": 7650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998982715353165, + "loss": 2.4367, + "step": 7655 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999898138602124, + "loss": 2.4698, + "step": 7660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998980055821377, + "loss": 2.5996, + "step": 7665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998978724753577, + "loss": 2.4685, + "step": 7670 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998977392817838, + "loss": 2.4938, + "step": 7675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998976060014162, + "loss": 2.3933, + "step": 7680 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999897472634255, + "loss": 2.3589, + "step": 7685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998973391802997, + "loss": 2.526, + "step": 7690 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999897205639551, + "loss": 2.3884, + "step": 7695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998970720120087, + "loss": 2.6155, + "step": 7700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998969382976727, + "loss": 2.347, + "step": 7705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998968044965428, + "loss": 2.3685, + "step": 7710 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999896670608619, + "loss": 2.6701, + "step": 7715 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999896536633902, + "loss": 2.5651, + "step": 7720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998964025723913, + "loss": 2.323, + "step": 7725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998962684240868, + "loss": 2.6822, + "step": 7730 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998961341889885, + "loss": 2.4658, + "step": 7735 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999895999867097, + "loss": 2.5973, + "step": 7740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998958654584117, + "loss": 2.3331, + "step": 7745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999895730962933, + "loss": 2.4025, + "step": 7750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998955963806606, + "loss": 2.4536, + "step": 7755 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998954617115943, + "loss": 2.6764, + "step": 7760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998953269557347, + "loss": 2.4254, + "step": 7765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998951921130814, + "loss": 2.4398, + "step": 7770 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999895057183635, + "loss": 2.5879, + "step": 7775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998949221673948, + "loss": 2.1881, + "step": 7780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998947870643612, + "loss": 2.5155, + "step": 7785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998946518745342, + "loss": 2.3662, + "step": 7790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998945165979133, + "loss": 2.3525, + "step": 7795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998943812344993, + "loss": 2.5677, + "step": 7800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998942457842916, + "loss": 2.4675, + "step": 7805 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999894110247291, + "loss": 2.5602, + "step": 7810 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998939746234963, + "loss": 2.5901, + "step": 7815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998938389129083, + "loss": 2.701, + "step": 7820 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999893703115527, + "loss": 2.6018, + "step": 7825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998935672313524, + "loss": 2.4727, + "step": 7830 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999893431260384, + "loss": 2.5026, + "step": 7835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998932952026229, + "loss": 2.5641, + "step": 7840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998931590580678, + "loss": 2.6295, + "step": 7845 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998930228267198, + "loss": 2.4993, + "step": 7850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998928865085783, + "loss": 2.7407, + "step": 7855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998927501036433, + "loss": 2.6448, + "step": 7860 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999892613611915, + "loss": 2.5057, + "step": 7865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998924770333937, + "loss": 2.4465, + "step": 7870 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999892340368079, + "loss": 2.6736, + "step": 7875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998922036159708, + "loss": 2.4964, + "step": 7880 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998920667770696, + "loss": 2.4419, + "step": 7885 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999891929851375, + "loss": 2.6466, + "step": 7890 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999891792838887, + "loss": 2.3967, + "step": 7895 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999891655739606, + "loss": 2.5252, + "step": 7900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999891518553532, + "loss": 2.5781, + "step": 7905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998913812806644, + "loss": 2.2417, + "step": 7910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998912439210035, + "loss": 2.3379, + "step": 7915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998911064745496, + "loss": 2.6051, + "step": 7920 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998909689413025, + "loss": 2.498, + "step": 7925 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998908313212624, + "loss": 2.4427, + "step": 7930 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999890693614429, + "loss": 2.3317, + "step": 7935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998905558208025, + "loss": 2.4034, + "step": 7940 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999890417940383, + "loss": 2.5472, + "step": 7945 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998902799731703, + "loss": 2.4709, + "step": 7950 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999890141919164, + "loss": 2.5028, + "step": 7955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998900037783654, + "loss": 2.2752, + "step": 7960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998898655507733, + "loss": 2.2431, + "step": 7965 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998897272363882, + "loss": 2.5253, + "step": 7970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998895888352102, + "loss": 2.6414, + "step": 7975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998894503472388, + "loss": 2.7794, + "step": 7980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998893117724746, + "loss": 2.5362, + "step": 7985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998891731109173, + "loss": 2.4186, + "step": 7990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998890343625672, + "loss": 2.6099, + "step": 7995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998888955274238, + "loss": 2.505, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998887566054875, + "loss": 2.499, + "step": 8005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998886175967584, + "loss": 2.7635, + "step": 8010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998884785012362, + "loss": 2.4564, + "step": 8015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998883393189208, + "loss": 2.5485, + "step": 8020 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998882000498133, + "loss": 2.5103, + "step": 8025 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999888060693912, + "loss": 2.5329, + "step": 8030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998879212512183, + "loss": 2.6389, + "step": 8035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998877817217314, + "loss": 2.4754, + "step": 8040 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998876421054517, + "loss": 2.5635, + "step": 8045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998875024023792, + "loss": 2.3957, + "step": 8050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998873626125136, + "loss": 2.5177, + "step": 8055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998872227358554, + "loss": 2.5145, + "step": 8060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998870827724044, + "loss": 2.6447, + "step": 8065 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998869427221604, + "loss": 2.4512, + "step": 8070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998868025851237, + "loss": 2.4837, + "step": 8075 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998866623612943, + "loss": 2.4513, + "step": 8080 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999886522050672, + "loss": 2.3954, + "step": 8085 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998863816532566, + "loss": 2.4231, + "step": 8090 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999886241169049, + "loss": 2.4992, + "step": 8095 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998861005980482, + "loss": 2.4991, + "step": 8100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999885959940255, + "loss": 2.3801, + "step": 8105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998858191956688, + "loss": 2.6345, + "step": 8110 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199988567836429, + "loss": 2.7197, + "step": 8115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998855374461186, + "loss": 2.3708, + "step": 8120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998853964411543, + "loss": 2.0928, + "step": 8125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998852553493974, + "loss": 2.3053, + "step": 8130 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999885114170848, + "loss": 2.6692, + "step": 8135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998849729055057, + "loss": 2.6266, + "step": 8140 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999884831553371, + "loss": 2.5056, + "step": 8145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998846901144433, + "loss": 2.2743, + "step": 8150 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999884548588723, + "loss": 2.4226, + "step": 8155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998844069762104, + "loss": 2.6256, + "step": 8160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999884265276905, + "loss": 2.4524, + "step": 8165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998841234908073, + "loss": 2.5078, + "step": 8170 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999883981617917, + "loss": 2.5066, + "step": 8175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998838396582337, + "loss": 2.4922, + "step": 8180 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998836976117584, + "loss": 2.3698, + "step": 8185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998835554784904, + "loss": 2.5426, + "step": 8190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998834132584295, + "loss": 2.4569, + "step": 8195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998832709515764, + "loss": 2.5148, + "step": 8200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999883128557931, + "loss": 2.4442, + "step": 8205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998829860774927, + "loss": 2.4735, + "step": 8210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998828435102624, + "loss": 2.6126, + "step": 8215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998827008562393, + "loss": 2.5147, + "step": 8220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998825581154237, + "loss": 2.354, + "step": 8225 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999882415287816, + "loss": 2.4763, + "step": 8230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998822723734156, + "loss": 2.4824, + "step": 8235 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998821293722228, + "loss": 2.6852, + "step": 8240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998819862842378, + "loss": 2.4895, + "step": 8245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998818431094605, + "loss": 2.4536, + "step": 8250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998816998478903, + "loss": 2.7339, + "step": 8255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998815564995282, + "loss": 2.6485, + "step": 8260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998814130643735, + "loss": 2.599, + "step": 8265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998812695424265, + "loss": 2.4026, + "step": 8270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998811259336875, + "loss": 2.5381, + "step": 8275 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999880982238156, + "loss": 2.6003, + "step": 8280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999880838455832, + "loss": 2.2663, + "step": 8285 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999880694586716, + "loss": 2.6823, + "step": 8290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998805506308073, + "loss": 2.3228, + "step": 8295 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999880406588107, + "loss": 2.3697, + "step": 8300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999880262458614, + "loss": 2.347, + "step": 8305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998801182423287, + "loss": 2.5555, + "step": 8310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998799739392515, + "loss": 2.4845, + "step": 8315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998798295493817, + "loss": 2.6127, + "step": 8320 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987968507272, + "loss": 2.5411, + "step": 8325 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999879540509266, + "loss": 2.4625, + "step": 8330 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987939585902, + "loss": 2.2127, + "step": 8335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998792511219816, + "loss": 2.3251, + "step": 8340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998791062981512, + "loss": 2.4014, + "step": 8345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998789613875285, + "loss": 2.4815, + "step": 8350 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999878816390114, + "loss": 2.2377, + "step": 8355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998786713059071, + "loss": 2.4975, + "step": 8360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998785261349084, + "loss": 2.3934, + "step": 8365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998783808771172, + "loss": 2.2827, + "step": 8370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998782355325342, + "loss": 2.282, + "step": 8375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998780901011595, + "loss": 2.7165, + "step": 8380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998779445829922, + "loss": 2.3389, + "step": 8385 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999877798978033, + "loss": 2.4213, + "step": 8390 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998776532862818, + "loss": 2.7093, + "step": 8395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998775075077385, + "loss": 2.3988, + "step": 8400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998773616424035, + "loss": 2.4379, + "step": 8405 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999877215690276, + "loss": 2.382, + "step": 8410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998770696513568, + "loss": 2.3973, + "step": 8415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998769235256457, + "loss": 2.5994, + "step": 8420 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999876777313143, + "loss": 2.3849, + "step": 8425 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999876631013848, + "loss": 2.4507, + "step": 8430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998764846277612, + "loss": 2.3732, + "step": 8435 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999876338154882, + "loss": 2.2734, + "step": 8440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998761915952112, + "loss": 2.6901, + "step": 8445 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999876044948749, + "loss": 2.5284, + "step": 8450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998758982154942, + "loss": 2.4051, + "step": 8455 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999875751395448, + "loss": 2.6417, + "step": 8460 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987560448861, + "loss": 2.4962, + "step": 8465 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987545749498, + "loss": 2.5253, + "step": 8470 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999875310414558, + "loss": 2.3919, + "step": 8475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998751632473445, + "loss": 2.3322, + "step": 8480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998750159933392, + "loss": 2.4986, + "step": 8485 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998748686525422, + "loss": 2.4157, + "step": 8490 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999874721224953, + "loss": 2.2829, + "step": 8495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998745737105723, + "loss": 2.4797, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998744261094, + "loss": 2.3625, + "step": 8505 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999874278421436, + "loss": 2.4289, + "step": 8510 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987413064668, + "loss": 2.5305, + "step": 8515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998739827851325, + "loss": 2.3624, + "step": 8520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998738348367932, + "loss": 2.4507, + "step": 8525 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998736868016622, + "loss": 2.438, + "step": 8530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998735386797395, + "loss": 2.5493, + "step": 8535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998733904710254, + "loss": 2.6027, + "step": 8540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998732421755194, + "loss": 2.6591, + "step": 8545 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999873093793222, + "loss": 2.5961, + "step": 8550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998729453241326, + "loss": 2.6149, + "step": 8555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998727967682523, + "loss": 2.4328, + "step": 8560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998726481255797, + "loss": 2.7167, + "step": 8565 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999872499396116, + "loss": 2.621, + "step": 8570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998723505798604, + "loss": 2.4871, + "step": 8575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998722016768134, + "loss": 2.4709, + "step": 8580 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999872052686975, + "loss": 2.639, + "step": 8585 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999871903610345, + "loss": 2.4479, + "step": 8590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998717544469235, + "loss": 2.4216, + "step": 8595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998716051967104, + "loss": 2.7285, + "step": 8600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998714558597057, + "loss": 2.6347, + "step": 8605 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199987130643591, + "loss": 2.7399, + "step": 8610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998711569253224, + "loss": 2.4631, + "step": 8615 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998710073279433, + "loss": 2.6147, + "step": 8620 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999870857643773, + "loss": 2.7385, + "step": 8625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998707078728111, + "loss": 2.5324, + "step": 8630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998705580150583, + "loss": 2.6185, + "step": 8635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998704080705136, + "loss": 2.4729, + "step": 8640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998702580391778, + "loss": 2.3925, + "step": 8645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998701079210505, + "loss": 2.7546, + "step": 8650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998699577161318, + "loss": 2.5315, + "step": 8655 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998698074244218, + "loss": 2.5595, + "step": 8660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998696570459204, + "loss": 2.4245, + "step": 8665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998695065806277, + "loss": 2.5762, + "step": 8670 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999869356028544, + "loss": 2.2482, + "step": 8675 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999869205389669, + "loss": 2.6133, + "step": 8680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998690546640022, + "loss": 2.3625, + "step": 8685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998689038515445, + "loss": 2.3822, + "step": 8690 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998687529522956, + "loss": 2.3996, + "step": 8695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998686019662552, + "loss": 2.3756, + "step": 8700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998684508934237, + "loss": 2.4049, + "step": 8705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998682997338012, + "loss": 2.6368, + "step": 8710 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999868148487387, + "loss": 2.585, + "step": 8715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998679971541821, + "loss": 2.4734, + "step": 8720 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999867845734186, + "loss": 2.5682, + "step": 8725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998676942273986, + "loss": 2.2908, + "step": 8730 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998675426338204, + "loss": 2.3191, + "step": 8735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998673909534507, + "loss": 2.4821, + "step": 8740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998672391862896, + "loss": 2.5785, + "step": 8745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999867087332338, + "loss": 2.5052, + "step": 8750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998669353915948, + "loss": 2.5428, + "step": 8755 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999866783364061, + "loss": 2.6008, + "step": 8760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998666312497359, + "loss": 2.2407, + "step": 8765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998664790486196, + "loss": 2.3564, + "step": 8770 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998663267607126, + "loss": 2.4997, + "step": 8775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998661743860142, + "loss": 2.613, + "step": 8780 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999866021924525, + "loss": 2.588, + "step": 8785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998658693762449, + "loss": 2.1804, + "step": 8790 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999865716741174, + "loss": 2.3337, + "step": 8795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998655640193115, + "loss": 2.9856, + "step": 8800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998654112106582, + "loss": 2.5314, + "step": 8805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998652583152145, + "loss": 2.454, + "step": 8810 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998651053329793, + "loss": 2.3367, + "step": 8815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998649522639535, + "loss": 2.497, + "step": 8820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998647991081368, + "loss": 2.328, + "step": 8825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998646458655288, + "loss": 2.4763, + "step": 8830 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998644925361302, + "loss": 2.6595, + "step": 8835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998643391199407, + "loss": 2.4011, + "step": 8840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998641856169605, + "loss": 2.2727, + "step": 8845 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998640320271894, + "loss": 2.5044, + "step": 8850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998638783506274, + "loss": 2.5862, + "step": 8855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998637245872747, + "loss": 2.5068, + "step": 8860 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999863570737131, + "loss": 2.6231, + "step": 8865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998634168001965, + "loss": 2.2372, + "step": 8870 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998632627764716, + "loss": 2.4787, + "step": 8875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998631086659557, + "loss": 2.3037, + "step": 8880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999862954468649, + "loss": 2.4987, + "step": 8885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998628001845515, + "loss": 2.4102, + "step": 8890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998626458136634, + "loss": 2.4029, + "step": 8895 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998624913559847, + "loss": 2.4455, + "step": 8900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999862336811515, + "loss": 2.7084, + "step": 8905 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999862182180255, + "loss": 2.3189, + "step": 8910 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999862027462204, + "loss": 2.3658, + "step": 8915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998618726573626, + "loss": 2.3893, + "step": 8920 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998617177657306, + "loss": 2.6753, + "step": 8925 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998615627873077, + "loss": 2.3598, + "step": 8930 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998614077220946, + "loss": 2.4231, + "step": 8935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998612525700904, + "loss": 2.5501, + "step": 8940 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999861097331296, + "loss": 2.1479, + "step": 8945 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860942005711, + "loss": 2.3937, + "step": 8950 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860786593335, + "loss": 2.4062, + "step": 8955 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860631094169, + "loss": 2.3751, + "step": 8960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998604755082122, + "loss": 2.4611, + "step": 8965 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860319835465, + "loss": 2.5114, + "step": 8970 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860164075927, + "loss": 2.4888, + "step": 8975 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999860008229599, + "loss": 2.4488, + "step": 8980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998598522964802, + "loss": 2.4818, + "step": 8985 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999859696276571, + "loss": 2.2975, + "step": 8990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998595401698714, + "loss": 2.3841, + "step": 8995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998593839763812, + "loss": 2.3571, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999859227696101, + "loss": 2.4845, + "step": 9005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998590713290296, + "loss": 2.5427, + "step": 9010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998589148751685, + "loss": 2.6918, + "step": 9015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998587583345168, + "loss": 2.2234, + "step": 9020 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999858601707075, + "loss": 2.3232, + "step": 9025 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998584449928424, + "loss": 2.4575, + "step": 9030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998582881918195, + "loss": 2.3962, + "step": 9035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998581313040065, + "loss": 2.7066, + "step": 9040 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998579743294034, + "loss": 2.4354, + "step": 9045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998578172680096, + "loss": 2.3431, + "step": 9050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998576601198257, + "loss": 2.5614, + "step": 9055 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999857502884851, + "loss": 2.5538, + "step": 9060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998573455630867, + "loss": 2.3665, + "step": 9065 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999857188154532, + "loss": 2.4864, + "step": 9070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998570306591873, + "loss": 2.6007, + "step": 9075 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999856873077052, + "loss": 2.5006, + "step": 9080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998567154081264, + "loss": 2.5271, + "step": 9085 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999856557652411, + "loss": 2.47, + "step": 9090 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999856399809905, + "loss": 2.3941, + "step": 9095 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998562418806093, + "loss": 2.493, + "step": 9100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998560838645232, + "loss": 2.5728, + "step": 9105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998559257616472, + "loss": 2.594, + "step": 9110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998557675719806, + "loss": 2.6892, + "step": 9115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998556092955242, + "loss": 2.3456, + "step": 9120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998554509322776, + "loss": 2.5928, + "step": 9125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998552924822412, + "loss": 2.6073, + "step": 9130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998551339454147, + "loss": 2.4351, + "step": 9135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998549753217978, + "loss": 2.4864, + "step": 9140 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999854816611391, + "loss": 2.4912, + "step": 9145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998546578141942, + "loss": 2.4551, + "step": 9150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998544989302072, + "loss": 2.4463, + "step": 9155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998543399594304, + "loss": 2.3603, + "step": 9160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998541809018638, + "loss": 2.6566, + "step": 9165 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999854021757507, + "loss": 2.3337, + "step": 9170 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199985386252636, + "loss": 2.2276, + "step": 9175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998537032084234, + "loss": 2.2761, + "step": 9180 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998535438036968, + "loss": 2.4544, + "step": 9185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998533843121804, + "loss": 2.6465, + "step": 9190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998532247338742, + "loss": 2.7588, + "step": 9195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998530650687778, + "loss": 2.5723, + "step": 9200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998529053168916, + "loss": 2.7165, + "step": 9205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998527454782156, + "loss": 2.5715, + "step": 9210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998525855527497, + "loss": 2.5548, + "step": 9215 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999852425540494, + "loss": 2.3141, + "step": 9220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998522654414487, + "loss": 2.3339, + "step": 9225 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998521052556133, + "loss": 2.3205, + "step": 9230 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999851944982988, + "loss": 2.5238, + "step": 9235 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998517846235735, + "loss": 2.4086, + "step": 9240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998516241773686, + "loss": 2.2727, + "step": 9245 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999851463644374, + "loss": 2.0573, + "step": 9250 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199985130302459, + "loss": 2.4938, + "step": 9255 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999851142318016, + "loss": 2.4737, + "step": 9260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998509815246526, + "loss": 2.6544, + "step": 9265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998508206444995, + "loss": 2.629, + "step": 9270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998506596775566, + "loss": 2.1496, + "step": 9275 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999850498623824, + "loss": 2.6678, + "step": 9280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998503374833018, + "loss": 2.4694, + "step": 9285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998501762559897, + "loss": 2.5012, + "step": 9290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998500149418882, + "loss": 2.6216, + "step": 9295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998498535409972, + "loss": 2.6186, + "step": 9300 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998496920533163, + "loss": 2.3252, + "step": 9305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998495304788462, + "loss": 2.4164, + "step": 9310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998493688175864, + "loss": 2.4692, + "step": 9315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998492070695369, + "loss": 2.5259, + "step": 9320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999849045234698, + "loss": 2.554, + "step": 9325 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998488833130695, + "loss": 2.3446, + "step": 9330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998487213046515, + "loss": 2.3739, + "step": 9335 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999848559209444, + "loss": 2.3847, + "step": 9340 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999848397027447, + "loss": 2.4115, + "step": 9345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998482347586606, + "loss": 2.4375, + "step": 9350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998480724030848, + "loss": 2.5581, + "step": 9355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998479099607195, + "loss": 2.485, + "step": 9360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998477474315646, + "loss": 2.2013, + "step": 9365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998475848156204, + "loss": 2.4933, + "step": 9370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999847422112887, + "loss": 2.5338, + "step": 9375 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999847259323364, + "loss": 2.4976, + "step": 9380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998470964470517, + "loss": 2.2246, + "step": 9385 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199984693348395, + "loss": 2.7514, + "step": 9390 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999846770434059, + "loss": 2.3547, + "step": 9395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998466072973788, + "loss": 2.674, + "step": 9400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998464440739092, + "loss": 2.2843, + "step": 9405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998462807636503, + "loss": 2.5339, + "step": 9410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998461173666022, + "loss": 2.3399, + "step": 9415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998459538827644, + "loss": 2.6921, + "step": 9420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998457903121377, + "loss": 2.4712, + "step": 9425 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998456266547216, + "loss": 2.5702, + "step": 9430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998454629105165, + "loss": 2.5769, + "step": 9435 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999845299079522, + "loss": 2.3923, + "step": 9440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998451351617384, + "loss": 2.5397, + "step": 9445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998449711571657, + "loss": 2.5886, + "step": 9450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998448070658036, + "loss": 2.388, + "step": 9455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998446428876526, + "loss": 2.5055, + "step": 9460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998444786227122, + "loss": 2.6634, + "step": 9465 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998443142709829, + "loss": 2.3508, + "step": 9470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998441498324642, + "loss": 2.5842, + "step": 9475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998439853071567, + "loss": 2.4448, + "step": 9480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998438206950597, + "loss": 2.5319, + "step": 9485 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998436559961742, + "loss": 2.4871, + "step": 9490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998434912104994, + "loss": 2.3699, + "step": 9495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998433263380353, + "loss": 2.4543, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998431613787825, + "loss": 2.3799, + "step": 9505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998429963327406, + "loss": 2.5814, + "step": 9510 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998428311999097, + "loss": 2.5413, + "step": 9515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998426659802897, + "loss": 2.4872, + "step": 9520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998425006738808, + "loss": 2.4769, + "step": 9525 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999842335280683, + "loss": 2.4815, + "step": 9530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998421698006963, + "loss": 2.6861, + "step": 9535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998420042339205, + "loss": 2.6536, + "step": 9540 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999841838580356, + "loss": 2.4387, + "step": 9545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998416728400024, + "loss": 2.4469, + "step": 9550 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199984150701286, + "loss": 2.4486, + "step": 9555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998413410989287, + "loss": 2.4776, + "step": 9560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998411750982086, + "loss": 2.3943, + "step": 9565 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998410090106997, + "loss": 2.4478, + "step": 9570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998408428364018, + "loss": 2.0753, + "step": 9575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998406765753152, + "loss": 2.4195, + "step": 9580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998405102274397, + "loss": 2.2805, + "step": 9585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998403437927758, + "loss": 2.365, + "step": 9590 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999840177271323, + "loss": 2.5554, + "step": 9595 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999840010663081, + "loss": 2.5545, + "step": 9600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998398439680505, + "loss": 2.5549, + "step": 9605 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998396771862313, + "loss": 2.3808, + "step": 9610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998395103176236, + "loss": 2.6222, + "step": 9615 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999839343362227, + "loss": 2.2378, + "step": 9620 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998391763200417, + "loss": 2.4597, + "step": 9625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998390091910677, + "loss": 2.3753, + "step": 9630 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999838841975305, + "loss": 2.4199, + "step": 9635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998386746727538, + "loss": 2.436, + "step": 9640 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999838507283414, + "loss": 2.5025, + "step": 9645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998383398072856, + "loss": 2.5538, + "step": 9650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998381722443687, + "loss": 2.537, + "step": 9655 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999838004594663, + "loss": 2.4635, + "step": 9660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998378368581688, + "loss": 2.3456, + "step": 9665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998376690348862, + "loss": 2.1818, + "step": 9670 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998375011248147, + "loss": 2.6232, + "step": 9675 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999837333127955, + "loss": 2.4076, + "step": 9680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998371650443067, + "loss": 2.4803, + "step": 9685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998369968738698, + "loss": 2.3332, + "step": 9690 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998368286166444, + "loss": 2.4951, + "step": 9695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998366602726306, + "loss": 2.562, + "step": 9700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998364918418285, + "loss": 2.4566, + "step": 9705 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999836323324238, + "loss": 2.3356, + "step": 9710 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998361547198586, + "loss": 2.4319, + "step": 9715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998359860286914, + "loss": 2.5457, + "step": 9720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998358172507353, + "loss": 2.6287, + "step": 9725 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998356483859914, + "loss": 2.3725, + "step": 9730 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998354794344584, + "loss": 2.4876, + "step": 9735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998353103961377, + "loss": 2.5241, + "step": 9740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998351412710283, + "loss": 2.5487, + "step": 9745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999834972059131, + "loss": 2.4608, + "step": 9750 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999834802760445, + "loss": 2.5501, + "step": 9755 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998346333749707, + "loss": 2.4901, + "step": 9760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998344639027082, + "loss": 2.5478, + "step": 9765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998342943436577, + "loss": 2.5238, + "step": 9770 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998341246978187, + "loss": 2.4842, + "step": 9775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998339549651915, + "loss": 2.5017, + "step": 9780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998337851457763, + "loss": 2.3788, + "step": 9785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998336152395724, + "loss": 2.3579, + "step": 9790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998334452465808, + "loss": 2.4703, + "step": 9795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998332751668008, + "loss": 2.4456, + "step": 9800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998331050002328, + "loss": 2.4326, + "step": 9805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998329347468765, + "loss": 2.5756, + "step": 9810 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999832764406732, + "loss": 2.668, + "step": 9815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998325939797994, + "loss": 2.3295, + "step": 9820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998324234660788, + "loss": 2.4382, + "step": 9825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998322528655703, + "loss": 2.2991, + "step": 9830 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998320821782738, + "loss": 2.7737, + "step": 9835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998319114041888, + "loss": 2.3518, + "step": 9840 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999831740543316, + "loss": 2.2385, + "step": 9845 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999831569595655, + "loss": 2.503, + "step": 9850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998313985612062, + "loss": 2.2877, + "step": 9855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998312274399695, + "loss": 2.4239, + "step": 9860 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998310562319445, + "loss": 2.4293, + "step": 9865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998308849371318, + "loss": 2.3999, + "step": 9870 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999830713555531, + "loss": 2.3348, + "step": 9875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998305420871424, + "loss": 2.3459, + "step": 9880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999830370531966, + "loss": 2.4825, + "step": 9885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998301988900013, + "loss": 2.4744, + "step": 9890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998300271612492, + "loss": 2.4583, + "step": 9895 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998298553457086, + "loss": 2.3683, + "step": 9900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998296834433806, + "loss": 2.6798, + "step": 9905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998295114542647, + "loss": 2.5938, + "step": 9910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998293393783608, + "loss": 2.1591, + "step": 9915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998291672156693, + "loss": 2.5002, + "step": 9920 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199982899496619, + "loss": 2.4791, + "step": 9925 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999828822629923, + "loss": 2.3907, + "step": 9930 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999828650206868, + "loss": 2.3974, + "step": 9935 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999828477697025, + "loss": 2.4692, + "step": 9940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998283051003948, + "loss": 2.7643, + "step": 9945 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998281324169764, + "loss": 2.0845, + "step": 9950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998279596467706, + "loss": 2.4465, + "step": 9955 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999827786789777, + "loss": 2.3468, + "step": 9960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998276138459957, + "loss": 2.5807, + "step": 9965 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999827440815427, + "loss": 2.3106, + "step": 9970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998272676980704, + "loss": 2.3665, + "step": 9975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998270944939262, + "loss": 2.7522, + "step": 9980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998269212029946, + "loss": 2.5925, + "step": 9985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998267478252749, + "loss": 2.7227, + "step": 9990 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999826574360768, + "loss": 2.5847, + "step": 9995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998264008094734, + "loss": 2.6716, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998262271713914, + "loss": 2.3696, + "step": 10005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998260534465217, + "loss": 2.2694, + "step": 10010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998258796348643, + "loss": 2.501, + "step": 10015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998257057364196, + "loss": 2.4445, + "step": 10020 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998255317511875, + "loss": 2.2597, + "step": 10025 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998253576791676, + "loss": 2.5403, + "step": 10030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998251835203604, + "loss": 2.4338, + "step": 10035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998250092747658, + "loss": 2.4408, + "step": 10040 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999824834942384, + "loss": 2.3402, + "step": 10045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998246605232143, + "loss": 2.2358, + "step": 10050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998244860172575, + "loss": 2.4517, + "step": 10055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998243114245133, + "loss": 2.2565, + "step": 10060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998241367449814, + "loss": 2.5145, + "step": 10065 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998239619786626, + "loss": 2.2973, + "step": 10070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998237871255562, + "loss": 2.4887, + "step": 10075 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998236121856623, + "loss": 2.2674, + "step": 10080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998234371589813, + "loss": 2.472, + "step": 10085 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999823262045513, + "loss": 2.3701, + "step": 10090 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998230868452575, + "loss": 2.3732, + "step": 10095 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998229115582148, + "loss": 2.5064, + "step": 10100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998227361843848, + "loss": 2.4974, + "step": 10105 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999822560723767, + "loss": 2.3062, + "step": 10110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998223851763625, + "loss": 2.6467, + "step": 10115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998222095421708, + "loss": 2.5184, + "step": 10120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998220338211917, + "loss": 2.5022, + "step": 10125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998218580134257, + "loss": 2.5341, + "step": 10130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998216821188723, + "loss": 2.5238, + "step": 10135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998215061375318, + "loss": 2.554, + "step": 10140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998213300694042, + "loss": 2.1777, + "step": 10145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998211539144894, + "loss": 2.4832, + "step": 10150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998209776727875, + "loss": 2.2942, + "step": 10155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998208013442985, + "loss": 2.1188, + "step": 10160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998206249290226, + "loss": 2.1989, + "step": 10165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998204484269593, + "loss": 2.6481, + "step": 10170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998202718381094, + "loss": 2.5382, + "step": 10175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998200951624722, + "loss": 2.5676, + "step": 10180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999819918400048, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998197415508368, + "loss": 2.4684, + "step": 10190 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999819564614839, + "loss": 2.6515, + "step": 10195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998193875920536, + "loss": 2.6088, + "step": 10200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998192104824818, + "loss": 2.2971, + "step": 10205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998190332861225, + "loss": 2.5469, + "step": 10210 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999818856002977, + "loss": 2.294, + "step": 10215 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999818678633044, + "loss": 2.6456, + "step": 10220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998185011763244, + "loss": 2.3013, + "step": 10225 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999818323632818, + "loss": 2.3921, + "step": 10230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998181460025241, + "loss": 2.5392, + "step": 10235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999817968285444, + "loss": 2.3093, + "step": 10240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999817790481577, + "loss": 2.5359, + "step": 10245 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999817612590923, + "loss": 2.4225, + "step": 10250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998174346134824, + "loss": 2.4767, + "step": 10255 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999817256549255, + "loss": 2.5893, + "step": 10260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998170783982406, + "loss": 2.2597, + "step": 10265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998169001604396, + "loss": 2.3343, + "step": 10270 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999816721835852, + "loss": 2.351, + "step": 10275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998165434244775, + "loss": 2.4915, + "step": 10280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998163649263164, + "loss": 2.7405, + "step": 10285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998161863413685, + "loss": 2.4724, + "step": 10290 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999816007669634, + "loss": 2.3576, + "step": 10295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998158289111128, + "loss": 2.3789, + "step": 10300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999815650065805, + "loss": 2.2525, + "step": 10305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998154711337108, + "loss": 2.3558, + "step": 10310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998152921148297, + "loss": 2.5284, + "step": 10315 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999815113009162, + "loss": 2.4762, + "step": 10320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999814933816708, + "loss": 2.3437, + "step": 10325 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999814754537467, + "loss": 2.5542, + "step": 10330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998145751714397, + "loss": 2.571, + "step": 10335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998143957186256, + "loss": 2.3391, + "step": 10340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998142161790255, + "loss": 2.418, + "step": 10345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998140365526385, + "loss": 2.5464, + "step": 10350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998138568394654, + "loss": 2.2553, + "step": 10355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998136770395052, + "loss": 2.5427, + "step": 10360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998134971527593, + "loss": 2.4642, + "step": 10365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998133171792264, + "loss": 2.2765, + "step": 10370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999813137118907, + "loss": 2.2825, + "step": 10375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998129569718016, + "loss": 2.8506, + "step": 10380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998127767379095, + "loss": 2.453, + "step": 10385 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998125964172314, + "loss": 2.2256, + "step": 10390 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998124160097667, + "loss": 2.5469, + "step": 10395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998122355155155, + "loss": 2.7639, + "step": 10400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998120549344784, + "loss": 2.3243, + "step": 10405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998118742666545, + "loss": 2.281, + "step": 10410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998116935120446, + "loss": 2.5782, + "step": 10415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998115126706485, + "loss": 2.4022, + "step": 10420 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999811331742466, + "loss": 2.5827, + "step": 10425 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999811150727497, + "loss": 2.3773, + "step": 10430 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999810969625742, + "loss": 2.5919, + "step": 10435 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999810788437201, + "loss": 2.5754, + "step": 10440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998106071618735, + "loss": 2.5282, + "step": 10445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998104257997598, + "loss": 2.4171, + "step": 10450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998102443508602, + "loss": 2.3195, + "step": 10455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998100628151742, + "loss": 2.4589, + "step": 10460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998098811927018, + "loss": 2.4459, + "step": 10465 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999809699483444, + "loss": 2.4079, + "step": 10470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998095176873996, + "loss": 2.5142, + "step": 10475 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999809335804569, + "loss": 2.4839, + "step": 10480 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999809153834953, + "loss": 2.3362, + "step": 10485 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998089717785503, + "loss": 2.2893, + "step": 10490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998087896353614, + "loss": 2.2023, + "step": 10495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998086074053868, + "loss": 2.4619, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998084250886264, + "loss": 2.6596, + "step": 10505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998082426850796, + "loss": 2.2436, + "step": 10510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999808060194747, + "loss": 2.3952, + "step": 10515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998078776176285, + "loss": 2.2106, + "step": 10520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999807694953724, + "loss": 2.4022, + "step": 10525 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998075122030335, + "loss": 2.5848, + "step": 10530 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999807329365557, + "loss": 2.5176, + "step": 10535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998071464412948, + "loss": 2.4297, + "step": 10540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998069634302468, + "loss": 2.4786, + "step": 10545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998067803324125, + "loss": 2.2828, + "step": 10550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998065971477927, + "loss": 2.2195, + "step": 10555 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999806413876387, + "loss": 2.6033, + "step": 10560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998062305181952, + "loss": 2.1797, + "step": 10565 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998060470732178, + "loss": 2.3711, + "step": 10570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998058635414546, + "loss": 2.1863, + "step": 10575 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999805679922906, + "loss": 2.5752, + "step": 10580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998054962175712, + "loss": 2.5801, + "step": 10585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998053124254506, + "loss": 2.4377, + "step": 10590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998051285465443, + "loss": 2.7624, + "step": 10595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998049445808525, + "loss": 2.5697, + "step": 10600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998047605283752, + "loss": 2.5927, + "step": 10605 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999804576389112, + "loss": 2.5098, + "step": 10610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998043921630628, + "loss": 2.4638, + "step": 10615 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998042078502284, + "loss": 2.3502, + "step": 10620 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999804023450608, + "loss": 2.2765, + "step": 10625 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999803838964202, + "loss": 2.5749, + "step": 10630 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999803654391011, + "loss": 2.3107, + "step": 10635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998034697310337, + "loss": 2.5372, + "step": 10640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998032849842713, + "loss": 2.2403, + "step": 10645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998031001507228, + "loss": 2.4946, + "step": 10650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998029152303894, + "loss": 2.3198, + "step": 10655 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998027302232702, + "loss": 2.5626, + "step": 10660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998025451293657, + "loss": 2.1714, + "step": 10665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998023599486752, + "loss": 2.3108, + "step": 10670 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998021746811997, + "loss": 2.4947, + "step": 10675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998019893269385, + "loss": 2.319, + "step": 10680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998018038858918, + "loss": 2.1148, + "step": 10685 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199980161835806, + "loss": 2.325, + "step": 10690 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998014327434426, + "loss": 2.4553, + "step": 10695 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199980124704204, + "loss": 2.5565, + "step": 10700 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999801061253852, + "loss": 2.5449, + "step": 10705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998008753788785, + "loss": 2.6652, + "step": 10710 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998006894171195, + "loss": 2.3767, + "step": 10715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998005033685753, + "loss": 2.4926, + "step": 10720 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999800317233246, + "loss": 2.1747, + "step": 10725 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999800131011131, + "loss": 2.5619, + "step": 10730 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999799944702231, + "loss": 2.6276, + "step": 10735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997997583065457, + "loss": 2.4658, + "step": 10740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997995718240753, + "loss": 2.5733, + "step": 10745 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997993852548192, + "loss": 2.5212, + "step": 10750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997991985987783, + "loss": 2.4937, + "step": 10755 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999799011855952, + "loss": 2.415, + "step": 10760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997988250263407, + "loss": 2.3701, + "step": 10765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997986381099442, + "loss": 2.5911, + "step": 10770 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997984511067624, + "loss": 2.7455, + "step": 10775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997982640167954, + "loss": 2.4073, + "step": 10780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997980768400434, + "loss": 2.5364, + "step": 10785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997978895765063, + "loss": 2.5758, + "step": 10790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997977022261844, + "loss": 2.4763, + "step": 10795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997975147890772, + "loss": 2.3824, + "step": 10800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999797327265185, + "loss": 2.384, + "step": 10805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997971396545076, + "loss": 2.4721, + "step": 10810 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997969519570453, + "loss": 2.8403, + "step": 10815 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999796764172798, + "loss": 2.3952, + "step": 10820 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997965763017658, + "loss": 2.5735, + "step": 10825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997963883439487, + "loss": 2.4688, + "step": 10830 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997962002993463, + "loss": 2.6084, + "step": 10835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997960121679592, + "loss": 2.4667, + "step": 10840 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997958239497868, + "loss": 2.7458, + "step": 10845 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199979563564483, + "loss": 2.4085, + "step": 10850 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999795447253088, + "loss": 2.5596, + "step": 10855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997952587745614, + "loss": 2.592, + "step": 10860 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997950702092498, + "loss": 2.46, + "step": 10865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997948815571532, + "loss": 2.5783, + "step": 10870 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997946928182721, + "loss": 2.441, + "step": 10875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997945039926062, + "loss": 2.4168, + "step": 10880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999794315080155, + "loss": 2.2499, + "step": 10885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997941260809196, + "loss": 2.0727, + "step": 10890 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999793936994899, + "loss": 2.3397, + "step": 10895 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999793747822094, + "loss": 2.3988, + "step": 10900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997935585625042, + "loss": 2.5731, + "step": 10905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997933692161298, + "loss": 2.4224, + "step": 10910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997931797829704, + "loss": 2.4046, + "step": 10915 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997929902630267, + "loss": 2.6267, + "step": 10920 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999792800656298, + "loss": 2.4695, + "step": 10925 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997926109627847, + "loss": 2.3921, + "step": 10930 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997924211824869, + "loss": 2.2779, + "step": 10935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997922313154043, + "loss": 2.6126, + "step": 10940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997920413615373, + "loss": 2.5356, + "step": 10945 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997918513208856, + "loss": 2.5047, + "step": 10950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997916611934496, + "loss": 2.5633, + "step": 10955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997914709792288, + "loss": 2.667, + "step": 10960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997912806782236, + "loss": 2.4727, + "step": 10965 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997910902904337, + "loss": 2.4113, + "step": 10970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997908998158593, + "loss": 2.6202, + "step": 10975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997907092545006, + "loss": 2.5698, + "step": 10980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997905186063574, + "loss": 2.6121, + "step": 10985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997903278714299, + "loss": 2.6337, + "step": 10990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997901370497179, + "loss": 2.4949, + "step": 10995 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997899461412212, + "loss": 2.2693, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997897551459403, + "loss": 2.5515, + "step": 11005 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999789564063875, + "loss": 2.2905, + "step": 11010 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997893728950254, + "loss": 2.5977, + "step": 11015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997891816393915, + "loss": 2.5307, + "step": 11020 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999788990296973, + "loss": 2.2319, + "step": 11025 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997887988677706, + "loss": 2.1402, + "step": 11030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997886073517838, + "loss": 2.4426, + "step": 11035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997884157490125, + "loss": 2.3057, + "step": 11040 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999788224059457, + "loss": 2.4781, + "step": 11045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997880322831173, + "loss": 2.2988, + "step": 11050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997878404199934, + "loss": 2.4854, + "step": 11055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997876484700852, + "loss": 2.6659, + "step": 11060 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999787456433393, + "loss": 2.5361, + "step": 11065 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997872643099168, + "loss": 2.2406, + "step": 11070 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999787072099656, + "loss": 2.5424, + "step": 11075 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997868798026113, + "loss": 2.4807, + "step": 11080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997866874187824, + "loss": 2.4698, + "step": 11085 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999786494948169, + "loss": 2.7042, + "step": 11090 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999786302390772, + "loss": 2.5465, + "step": 11095 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999786109746591, + "loss": 2.396, + "step": 11100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997859170156255, + "loss": 2.3966, + "step": 11105 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999785724197876, + "loss": 2.2927, + "step": 11110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999785531293343, + "loss": 2.336, + "step": 11115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997853383020252, + "loss": 2.5682, + "step": 11120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997851452239243, + "loss": 2.5712, + "step": 11125 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999784952059039, + "loss": 2.6416, + "step": 11130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997847588073696, + "loss": 2.3506, + "step": 11135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997845654689162, + "loss": 2.5925, + "step": 11140 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999784372043679, + "loss": 2.473, + "step": 11145 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999784178531658, + "loss": 2.498, + "step": 11150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997839849328531, + "loss": 2.4977, + "step": 11155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997837912472642, + "loss": 2.4077, + "step": 11160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997835974748917, + "loss": 2.3962, + "step": 11165 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999783403615735, + "loss": 2.4046, + "step": 11170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997832096697946, + "loss": 2.5895, + "step": 11175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997830156370702, + "loss": 2.4535, + "step": 11180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999782821517562, + "loss": 2.7525, + "step": 11185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997826273112707, + "loss": 2.3057, + "step": 11190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997824330181952, + "loss": 2.6935, + "step": 11195 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999782238638336, + "loss": 2.5052, + "step": 11200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997820441716927, + "loss": 2.3999, + "step": 11205 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999781849618266, + "loss": 2.2644, + "step": 11210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997816549780558, + "loss": 2.082, + "step": 11215 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999781460251062, + "loss": 2.5398, + "step": 11220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999781265437284, + "loss": 2.6221, + "step": 11225 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997810705367228, + "loss": 2.4482, + "step": 11230 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999780875549378, + "loss": 2.7528, + "step": 11235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999780680475249, + "loss": 2.4769, + "step": 11240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999780485314337, + "loss": 2.4324, + "step": 11245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997802900666414, + "loss": 2.2279, + "step": 11250 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999780094732162, + "loss": 2.5746, + "step": 11255 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999779899310899, + "loss": 2.5469, + "step": 11260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997797038028527, + "loss": 2.6787, + "step": 11265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997795082080227, + "loss": 2.7183, + "step": 11270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997793125264095, + "loss": 2.157, + "step": 11275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997791167580126, + "loss": 2.355, + "step": 11280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997789209028324, + "loss": 2.3888, + "step": 11285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997787249608688, + "loss": 2.3385, + "step": 11290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997785289321214, + "loss": 2.3096, + "step": 11295 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999778332816591, + "loss": 2.4811, + "step": 11300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999778136614277, + "loss": 2.482, + "step": 11305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997779403251799, + "loss": 2.3757, + "step": 11310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999777743949299, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997775474866352, + "loss": 2.3709, + "step": 11320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997773509371878, + "loss": 2.5319, + "step": 11325 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997771543009573, + "loss": 2.4362, + "step": 11330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997769575779431, + "loss": 2.4256, + "step": 11335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997767607681462, + "loss": 2.4115, + "step": 11340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997765638715657, + "loss": 2.3964, + "step": 11345 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999776366888202, + "loss": 2.581, + "step": 11350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997761698180552, + "loss": 2.3648, + "step": 11355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997759726611252, + "loss": 2.4494, + "step": 11360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997757754174118, + "loss": 2.2324, + "step": 11365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997755780869154, + "loss": 2.4793, + "step": 11370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999775380669636, + "loss": 2.3191, + "step": 11375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997751831655734, + "loss": 2.7056, + "step": 11380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997749855747276, + "loss": 2.3016, + "step": 11385 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997747878970986, + "loss": 2.3911, + "step": 11390 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997745901326866, + "loss": 1.878, + "step": 11395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997743922814919, + "loss": 2.6576, + "step": 11400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997741943435135, + "loss": 2.3337, + "step": 11405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997739963187525, + "loss": 2.6586, + "step": 11410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997737982072083, + "loss": 2.3893, + "step": 11415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997736000088812, + "loss": 2.5075, + "step": 11420 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999773401723771, + "loss": 2.4445, + "step": 11425 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997732033518782, + "loss": 2.5811, + "step": 11430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997730048932022, + "loss": 2.2631, + "step": 11435 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997728063477432, + "loss": 2.652, + "step": 11440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997726077155013, + "loss": 2.4095, + "step": 11445 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999772408996477, + "loss": 2.626, + "step": 11450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999772210190669, + "loss": 2.5359, + "step": 11455 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997720112980788, + "loss": 2.3061, + "step": 11460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997718123187055, + "loss": 2.6007, + "step": 11465 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997716132525494, + "loss": 2.4207, + "step": 11470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997714140996108, + "loss": 2.4945, + "step": 11475 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999771214859889, + "loss": 2.6721, + "step": 11480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997710155333844, + "loss": 2.4411, + "step": 11485 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999770816120097, + "loss": 2.5658, + "step": 11490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997706166200273, + "loss": 2.4054, + "step": 11495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997704170331745, + "loss": 2.2281, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997702173595394, + "loss": 2.5157, + "step": 11505 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997700175991212, + "loss": 2.4029, + "step": 11510 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997698177519205, + "loss": 2.429, + "step": 11515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997696178179376, + "loss": 2.3668, + "step": 11520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997694177971713, + "loss": 2.3173, + "step": 11525 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997692176896228, + "loss": 2.492, + "step": 11530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997690174952917, + "loss": 2.5129, + "step": 11535 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997688172141779, + "loss": 2.5201, + "step": 11540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997686168462818, + "loss": 2.5068, + "step": 11545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997684163916026, + "loss": 2.2567, + "step": 11550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997682158501414, + "loss": 2.4654, + "step": 11555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997680152218975, + "loss": 2.536, + "step": 11560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997678145068713, + "loss": 2.661, + "step": 11565 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997676137050623, + "loss": 2.5768, + "step": 11570 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999767412816471, + "loss": 2.3125, + "step": 11575 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999767211841097, + "loss": 2.4354, + "step": 11580 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999767010778941, + "loss": 2.784, + "step": 11585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997668096300025, + "loss": 2.3771, + "step": 11590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997666083942814, + "loss": 2.5635, + "step": 11595 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997664070717783, + "loss": 2.58, + "step": 11600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997662056624925, + "loss": 2.3966, + "step": 11605 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997660041664247, + "loss": 2.3584, + "step": 11610 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999765802583574, + "loss": 2.7565, + "step": 11615 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997656009139417, + "loss": 2.7276, + "step": 11620 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997653991575266, + "loss": 2.3058, + "step": 11625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997651973143295, + "loss": 2.3889, + "step": 11630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997649953843502, + "loss": 2.3774, + "step": 11635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997647933675885, + "loss": 2.6754, + "step": 11640 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997645912640447, + "loss": 2.4136, + "step": 11645 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997643890737186, + "loss": 2.4073, + "step": 11650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997641867966105, + "loss": 2.4641, + "step": 11655 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199976398443272, + "loss": 2.5109, + "step": 11660 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997637819820475, + "loss": 2.3103, + "step": 11665 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997635794445927, + "loss": 2.1382, + "step": 11670 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999763376820356, + "loss": 2.4605, + "step": 11675 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997631741093373, + "loss": 2.4008, + "step": 11680 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997629713115362, + "loss": 2.3952, + "step": 11685 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997627684269532, + "loss": 2.5662, + "step": 11690 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997625654555881, + "loss": 2.575, + "step": 11695 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997623623974414, + "loss": 2.4816, + "step": 11700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997621592525121, + "loss": 2.7463, + "step": 11705 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997619560208014, + "loss": 2.6753, + "step": 11710 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997617527023085, + "loss": 2.5616, + "step": 11715 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997615492970336, + "loss": 2.4672, + "step": 11720 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997613458049766, + "loss": 2.3435, + "step": 11725 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999761142226138, + "loss": 2.561, + "step": 11730 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999760938560517, + "loss": 2.3806, + "step": 11735 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997607348081145, + "loss": 2.3155, + "step": 11740 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997605309689305, + "loss": 2.6049, + "step": 11745 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997603270429642, + "loss": 2.5219, + "step": 11750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997601230302162, + "loss": 2.5796, + "step": 11755 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997599189306863, + "loss": 2.4046, + "step": 11760 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997597147443746, + "loss": 2.499, + "step": 11765 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997595104712812, + "loss": 2.4154, + "step": 11770 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999759306111406, + "loss": 2.3173, + "step": 11775 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997591016647493, + "loss": 2.4123, + "step": 11780 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997588971313107, + "loss": 2.5128, + "step": 11785 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997586925110902, + "loss": 2.4484, + "step": 11790 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997584878040883, + "loss": 2.5272, + "step": 11795 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997582830103046, + "loss": 2.5432, + "step": 11800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997580781297392, + "loss": 2.4824, + "step": 11805 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997578731623924, + "loss": 2.2588, + "step": 11810 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997576681082637, + "loss": 2.3555, + "step": 11815 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997574629673537, + "loss": 2.2945, + "step": 11820 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999757257739662, + "loss": 2.2826, + "step": 11825 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997570524251887, + "loss": 2.5673, + "step": 11830 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997568470239338, + "loss": 2.445, + "step": 11835 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997566415358976, + "loss": 2.4448, + "step": 11840 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199975643596108, + "loss": 2.4024, + "step": 11845 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997562302994805, + "loss": 2.3226, + "step": 11850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997560245510996, + "loss": 2.4601, + "step": 11855 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997558187159376, + "loss": 2.444, + "step": 11860 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999755612793994, + "loss": 2.5258, + "step": 11865 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997554067852688, + "loss": 2.7061, + "step": 11870 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997552006897622, + "loss": 2.5543, + "step": 11875 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997549945074744, + "loss": 2.4321, + "step": 11880 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997547882384052, + "loss": 2.2429, + "step": 11885 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997545818825548, + "loss": 2.4161, + "step": 11890 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997543754399227, + "loss": 2.4542, + "step": 11895 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997541689105095, + "loss": 2.4715, + "step": 11900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999753962294315, + "loss": 2.4635, + "step": 11905 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997537555913394, + "loss": 2.6533, + "step": 11910 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997535488015824, + "loss": 2.3862, + "step": 11915 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999753341925044, + "loss": 2.3898, + "step": 11920 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997531349617246, + "loss": 2.6894, + "step": 11925 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999752927911624, + "loss": 2.5738, + "step": 11930 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999752720774742, + "loss": 2.3446, + "step": 11935 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997525135510792, + "loss": 2.6286, + "step": 11940 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997523062406352, + "loss": 2.535, + "step": 11945 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199975209884341, + "loss": 2.5846, + "step": 11950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997518913594036, + "loss": 2.6708, + "step": 11955 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997516837886158, + "loss": 2.3358, + "step": 11960 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997514761310473, + "loss": 2.5653, + "step": 11965 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999751268386698, + "loss": 2.4606, + "step": 11970 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997510605555672, + "loss": 2.6728, + "step": 11975 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997508526376555, + "loss": 2.5948, + "step": 11980 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997506446329627, + "loss": 2.4636, + "step": 11985 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997504365414895, + "loss": 2.6507, + "step": 11990 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997502283632349, + "loss": 2.4276, + "step": 11995 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999750020098199, + "loss": 2.4268, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997498117463826, + "loss": 2.3155, + "step": 12005 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997496033077853, + "loss": 2.5103, + "step": 12010 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999749394782407, + "loss": 2.5187, + "step": 12015 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997491861702477, + "loss": 2.5975, + "step": 12020 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999748977471308, + "loss": 2.4078, + "step": 12025 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999748768685587, + "loss": 2.3794, + "step": 12030 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997485598130855, + "loss": 2.3124, + "step": 12035 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997483508538027, + "loss": 2.5713, + "step": 12040 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997481418077397, + "loss": 2.1336, + "step": 12045 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997479326748957, + "loss": 2.7057, + "step": 12050 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997477234552709, + "loss": 2.4896, + "step": 12055 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997475141488656, + "loss": 2.3709, + "step": 12060 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997473047556793, + "loss": 2.1572, + "step": 12065 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997470952757122, + "loss": 2.5542, + "step": 12070 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997468857089646, + "loss": 2.4698, + "step": 12075 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997466760554366, + "loss": 2.4545, + "step": 12080 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997464663151275, + "loss": 2.4641, + "step": 12085 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999746256488038, + "loss": 2.153, + "step": 12090 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999746046574168, + "loss": 2.6339, + "step": 12095 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997458365735173, + "loss": 2.3097, + "step": 12100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997456264860862, + "loss": 2.196, + "step": 12105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997454163118742, + "loss": 2.1926, + "step": 12110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999745206050882, + "loss": 2.524, + "step": 12115 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999744995703109, + "loss": 2.6172, + "step": 12120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997447852685557, + "loss": 2.401, + "step": 12125 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999744574747222, + "loss": 2.5944, + "step": 12130 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999744364139108, + "loss": 2.1631, + "step": 12135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997441534442132, + "loss": 2.3836, + "step": 12140 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999743942662538, + "loss": 2.5634, + "step": 12145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997437317940825, + "loss": 2.4223, + "step": 12150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997435208388465, + "loss": 2.21, + "step": 12155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997433097968307, + "loss": 2.3197, + "step": 12160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999743098668034, + "loss": 2.3689, + "step": 12165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997428874524568, + "loss": 2.3294, + "step": 12170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997426761500997, + "loss": 2.2847, + "step": 12175 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999742464760962, + "loss": 2.5656, + "step": 12180 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997422532850442, + "loss": 2.4576, + "step": 12185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997420417223463, + "loss": 2.5989, + "step": 12190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997418300728682, + "loss": 2.323, + "step": 12195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997416183366093, + "loss": 2.3168, + "step": 12200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997414065135708, + "loss": 2.224, + "step": 12205 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999741194603752, + "loss": 2.5972, + "step": 12210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997409826071527, + "loss": 2.5446, + "step": 12215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997407705237737, + "loss": 2.4893, + "step": 12220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999740558353614, + "loss": 2.5806, + "step": 12225 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999740346096675, + "loss": 2.4046, + "step": 12230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997401337529553, + "loss": 2.3519, + "step": 12235 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997399213224557, + "loss": 2.1366, + "step": 12240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999739708805176, + "loss": 2.3766, + "step": 12245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997394962011165, + "loss": 2.6111, + "step": 12250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997392835102766, + "loss": 2.2581, + "step": 12255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997390707326572, + "loss": 2.5957, + "step": 12260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997388578682574, + "loss": 2.5943, + "step": 12265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997386449170778, + "loss": 2.7057, + "step": 12270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997384318791182, + "loss": 2.3825, + "step": 12275 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997382187543787, + "loss": 2.5424, + "step": 12280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997380055428594, + "loss": 2.5994, + "step": 12285 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199973779224456, + "loss": 2.4017, + "step": 12290 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999737578859481, + "loss": 2.4752, + "step": 12295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997373653876218, + "loss": 2.5205, + "step": 12300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999737151828983, + "loss": 2.3572, + "step": 12305 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997369381835644, + "loss": 2.5049, + "step": 12310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999736724451366, + "loss": 2.3371, + "step": 12315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997365106323877, + "loss": 2.3831, + "step": 12320 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199973629672663, + "loss": 2.3459, + "step": 12325 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999736082734092, + "loss": 2.6217, + "step": 12330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997358686547746, + "loss": 2.597, + "step": 12335 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997356544886776, + "loss": 2.4469, + "step": 12340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997354402358008, + "loss": 2.4035, + "step": 12345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997352258961445, + "loss": 2.4356, + "step": 12350 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999735011469708, + "loss": 2.6113, + "step": 12355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997347969564927, + "loss": 2.5397, + "step": 12360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999734582356497, + "loss": 2.5205, + "step": 12365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997343676697222, + "loss": 2.525, + "step": 12370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997341528961676, + "loss": 2.4736, + "step": 12375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997339380358335, + "loss": 2.4842, + "step": 12380 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199973372308872, + "loss": 2.5738, + "step": 12385 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999733508054827, + "loss": 2.4422, + "step": 12390 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997332929341544, + "loss": 2.286, + "step": 12395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997330777267024, + "loss": 2.5369, + "step": 12400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997328624324706, + "loss": 2.325, + "step": 12405 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997326470514598, + "loss": 2.5605, + "step": 12410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997324315836697, + "loss": 2.4599, + "step": 12415 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997322160290998, + "loss": 2.514, + "step": 12420 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997320003877506, + "loss": 2.2743, + "step": 12425 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999731784659622, + "loss": 2.4482, + "step": 12430 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997315688447142, + "loss": 2.308, + "step": 12435 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999731352943027, + "loss": 2.2427, + "step": 12440 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997311369545604, + "loss": 2.1984, + "step": 12445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997309208793146, + "loss": 2.368, + "step": 12450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997307047172895, + "loss": 2.2944, + "step": 12455 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999730488468485, + "loss": 2.4126, + "step": 12460 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997302721329015, + "loss": 2.3809, + "step": 12465 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999730055710539, + "loss": 2.4451, + "step": 12470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997298392013967, + "loss": 2.4656, + "step": 12475 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997296226054756, + "loss": 2.53, + "step": 12480 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997294059227754, + "loss": 2.561, + "step": 12485 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999729189153296, + "loss": 2.2128, + "step": 12490 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997289722970373, + "loss": 2.2274, + "step": 12495 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997287553539996, + "loss": 2.5228, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999728538324183, + "loss": 2.3405, + "step": 12505 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999728321207587, + "loss": 2.6303, + "step": 12510 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997281040042122, + "loss": 2.482, + "step": 12515 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997278867140583, + "loss": 2.6349, + "step": 12520 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997276693371253, + "loss": 2.4412, + "step": 12525 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997274518734136, + "loss": 2.4151, + "step": 12530 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997272343229226, + "loss": 2.4361, + "step": 12535 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999727016685653, + "loss": 2.2396, + "step": 12540 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997267989616042, + "loss": 2.5266, + "step": 12545 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997265811507763, + "loss": 2.4731, + "step": 12550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997263632531698, + "loss": 2.4637, + "step": 12555 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997261452687844, + "loss": 2.4655, + "step": 12560 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997259271976203, + "loss": 2.2793, + "step": 12565 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999725709039677, + "loss": 2.4703, + "step": 12570 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997254907949552, + "loss": 2.3971, + "step": 12575 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997252724634546, + "loss": 2.4912, + "step": 12580 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997250540451747, + "loss": 2.6492, + "step": 12585 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997248355401167, + "loss": 2.6137, + "step": 12590 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997246169482796, + "loss": 2.2331, + "step": 12595 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999724398269664, + "loss": 2.4599, + "step": 12600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997241795042696, + "loss": 2.6955, + "step": 12605 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997239606520966, + "loss": 2.4918, + "step": 12610 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997237417131446, + "loss": 2.4573, + "step": 12615 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997235226874141, + "loss": 2.5273, + "step": 12620 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997233035749052, + "loss": 2.5456, + "step": 12625 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997230843756173, + "loss": 2.4297, + "step": 12630 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997228650895513, + "loss": 2.5807, + "step": 12635 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997226457167062, + "loss": 2.6845, + "step": 12640 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999722426257083, + "loss": 2.4075, + "step": 12645 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999722206710681, + "loss": 2.4821, + "step": 12650 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997219870775005, + "loss": 2.3636, + "step": 12655 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997217673575418, + "loss": 2.3911, + "step": 12660 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997215475508042, + "loss": 2.5498, + "step": 12665 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997213276572883, + "loss": 2.2082, + "step": 12670 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999721107676994, + "loss": 2.5635, + "step": 12675 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999720887609921, + "loss": 2.2998, + "step": 12680 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199972066745607, + "loss": 2.2889, + "step": 12685 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997204472154407, + "loss": 2.5273, + "step": 12690 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997202268880329, + "loss": 2.4649, + "step": 12695 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997200064738463, + "loss": 2.5811, + "step": 12700 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999719785972882, + "loss": 2.4679, + "step": 12705 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999719565385139, + "loss": 2.5025, + "step": 12710 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997193447106182, + "loss": 2.3796, + "step": 12715 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997191239493186, + "loss": 2.1805, + "step": 12720 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999718903101241, + "loss": 2.5732, + "step": 12725 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997186821663852, + "loss": 2.3069, + "step": 12730 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999718461144751, + "loss": 2.4859, + "step": 12735 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997182400363387, + "loss": 2.6011, + "step": 12740 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997180188411484, + "loss": 2.5648, + "step": 12745 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997177975591797, + "loss": 2.4831, + "step": 12750 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997175761904333, + "loss": 2.3341, + "step": 12755 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997173547349083, + "loss": 2.6718, + "step": 12760 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997171331926055, + "loss": 2.4975, + "step": 12765 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999716911563524, + "loss": 2.6542, + "step": 12770 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997166898476652, + "loss": 2.3267, + "step": 12775 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997164680450282, + "loss": 2.2833, + "step": 12780 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997162461556133, + "loss": 2.4378, + "step": 12785 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199971602417942, + "loss": 2.1509, + "step": 12790 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997158021164488, + "loss": 2.743, + "step": 12795 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019997155799667, + "loss": 2.3859, + "step": 12800 + } + ], + "logging_steps": 5, + "max_steps": 1685984, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 200, + "total_flos": 3.8338610650381025e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}