| { | |
| "best_metric": 0.5966796875, | |
| "best_model_checkpoint": "./results/checkpoint-10662", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 14216, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0028137310073157004, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.8827, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005627462014631401, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.9839, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008441193021947102, | |
| "grad_norm": 224.25678800007668, | |
| "learning_rate": 4.2e-06, | |
| "loss": 2.9641, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011254924029262802, | |
| "grad_norm": 139.77573172893386, | |
| "learning_rate": 1.02e-05, | |
| "loss": 2.3895, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014068655036578503, | |
| "grad_norm": 110.31193690169778, | |
| "learning_rate": 1.6199999999999997e-05, | |
| "loss": 1.55, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.016882386043894203, | |
| "grad_norm": 23.291664316032804, | |
| "learning_rate": 2.2199999999999998e-05, | |
| "loss": 2.1463, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.019696117051209903, | |
| "grad_norm": 78.12962991651972, | |
| "learning_rate": 2.8199999999999998e-05, | |
| "loss": 2.6968, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.022509848058525603, | |
| "grad_norm": 145.88009990286014, | |
| "learning_rate": 3.42e-05, | |
| "loss": 3.5582, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.025323579065841307, | |
| "grad_norm": 126.24036573153992, | |
| "learning_rate": 4.02e-05, | |
| "loss": 2.7401, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.028137310073157007, | |
| "grad_norm": 7.932821466299223, | |
| "learning_rate": 4.62e-05, | |
| "loss": 1.4496, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.030951041080472707, | |
| "grad_norm": 51.39094381445175, | |
| "learning_rate": 5.2199999999999995e-05, | |
| "loss": 1.2416, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03376477208778841, | |
| "grad_norm": 52.627137222293655, | |
| "learning_rate": 5.82e-05, | |
| "loss": 0.9829, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03657850309510411, | |
| "grad_norm": 83.67118972227, | |
| "learning_rate": 6.419999999999999e-05, | |
| "loss": 1.8664, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03939223410241981, | |
| "grad_norm": 254.8820062247784, | |
| "learning_rate": 7.02e-05, | |
| "loss": 1.3919, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04220596510973551, | |
| "grad_norm": 95.05590819779509, | |
| "learning_rate": 7.62e-05, | |
| "loss": 1.8913, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04501969611705121, | |
| "grad_norm": 96.52932022551686, | |
| "learning_rate": 8.22e-05, | |
| "loss": 2.9941, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04783342712436691, | |
| "grad_norm": 152.73209597084838, | |
| "learning_rate": 8.819999999999999e-05, | |
| "loss": 1.9007, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.050647158131682614, | |
| "grad_norm": 38.961577452090374, | |
| "learning_rate": 9.419999999999999e-05, | |
| "loss": 1.3572, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05346088913899831, | |
| "grad_norm": 8.365359526839624, | |
| "learning_rate": 0.0001002, | |
| "loss": 0.8396, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.056274620146314014, | |
| "grad_norm": 27.415167430506084, | |
| "learning_rate": 0.00010619999999999998, | |
| "loss": 1.6339, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05908835115362971, | |
| "grad_norm": 4.809093819846915, | |
| "learning_rate": 0.00011219999999999999, | |
| "loss": 0.7721, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.061902082160945414, | |
| "grad_norm": 25.63960863733989, | |
| "learning_rate": 0.0001182, | |
| "loss": 0.8201, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06471581316826111, | |
| "grad_norm": 81.6209161855533, | |
| "learning_rate": 0.00012419999999999998, | |
| "loss": 1.3382, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06752954417557681, | |
| "grad_norm": 25.965053380742912, | |
| "learning_rate": 0.0001302, | |
| "loss": 1.2083, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07034327518289252, | |
| "grad_norm": 49.9863607853443, | |
| "learning_rate": 0.0001362, | |
| "loss": 1.3294, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07315700619020822, | |
| "grad_norm": 20.007567654071906, | |
| "learning_rate": 0.0001422, | |
| "loss": 0.9669, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07597073719752391, | |
| "grad_norm": 29.047164184009052, | |
| "learning_rate": 0.0001482, | |
| "loss": 0.8447, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07878446820483961, | |
| "grad_norm": 23.644879858956426, | |
| "learning_rate": 0.00015419999999999998, | |
| "loss": 1.141, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08159819921215532, | |
| "grad_norm": 46.372111281936895, | |
| "learning_rate": 0.0001602, | |
| "loss": 1.2533, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08441193021947102, | |
| "grad_norm": 11.20178962457438, | |
| "learning_rate": 0.0001662, | |
| "loss": 0.825, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08722566122678672, | |
| "grad_norm": 48.90453331100731, | |
| "learning_rate": 0.00017219999999999998, | |
| "loss": 1.1152, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09003939223410241, | |
| "grad_norm": 0.091122566845799, | |
| "learning_rate": 0.00017819999999999997, | |
| "loss": 0.877, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09285312324141812, | |
| "grad_norm": 44.370694350506966, | |
| "learning_rate": 0.00018419999999999998, | |
| "loss": 2.8161, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09566685424873382, | |
| "grad_norm": 26.012340742157125, | |
| "learning_rate": 0.0001902, | |
| "loss": 1.8136, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09848058525604952, | |
| "grad_norm": 4.391781057112832, | |
| "learning_rate": 0.0001962, | |
| "loss": 0.8146, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10129431626336523, | |
| "grad_norm": 9.187263907428804, | |
| "learning_rate": 0.0002022, | |
| "loss": 1.9734, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10410804727068092, | |
| "grad_norm": 146.14669734330562, | |
| "learning_rate": 0.00020819999999999996, | |
| "loss": 2.3596, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.10692177827799662, | |
| "grad_norm": 103.03855355929782, | |
| "learning_rate": 0.00021419999999999998, | |
| "loss": 4.1036, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10973550928531232, | |
| "grad_norm": 64.30913047008124, | |
| "learning_rate": 0.00022019999999999999, | |
| "loss": 1.337, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11254924029262803, | |
| "grad_norm": 309.69819080980943, | |
| "learning_rate": 0.00022559999999999998, | |
| "loss": 1.7713, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11536297129994373, | |
| "grad_norm": 1.9356075644481516, | |
| "learning_rate": 0.0002316, | |
| "loss": 1.7203, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11817670230725942, | |
| "grad_norm": 79.09050048639865, | |
| "learning_rate": 0.0002376, | |
| "loss": 3.0218, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12099043331457512, | |
| "grad_norm": 24.669088958893436, | |
| "learning_rate": 0.00024359999999999999, | |
| "loss": 2.8796, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12380416432189083, | |
| "grad_norm": 35.50057015666331, | |
| "learning_rate": 0.00024959999999999994, | |
| "loss": 1.3042, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.12661789532920653, | |
| "grad_norm": 63.35643345487432, | |
| "learning_rate": 0.0002556, | |
| "loss": 1.0222, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12943162633652222, | |
| "grad_norm": 44.8309413245288, | |
| "learning_rate": 0.00026159999999999996, | |
| "loss": 2.2135, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13224535734383794, | |
| "grad_norm": 38.21235063708972, | |
| "learning_rate": 0.0002676, | |
| "loss": 1.9759, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13505908835115363, | |
| "grad_norm": 42.502230547826144, | |
| "learning_rate": 0.0002736, | |
| "loss": 1.3122, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13787281935846932, | |
| "grad_norm": 36.78039561983335, | |
| "learning_rate": 0.00027959999999999997, | |
| "loss": 0.9201, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14068655036578503, | |
| "grad_norm": 31.432740474500267, | |
| "learning_rate": 0.00028559999999999995, | |
| "loss": 0.7877, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14350028137310072, | |
| "grad_norm": 89.40921662924484, | |
| "learning_rate": 0.0002916, | |
| "loss": 1.5383, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.14631401238041644, | |
| "grad_norm": 52.21924604041036, | |
| "learning_rate": 0.00029759999999999997, | |
| "loss": 2.0496, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14912774338773213, | |
| "grad_norm": 26.377094972604038, | |
| "learning_rate": 0.00029986876640419944, | |
| "loss": 1.0984, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.15194147439504782, | |
| "grad_norm": 12.678479550625854, | |
| "learning_rate": 0.0002996500437445319, | |
| "loss": 0.7538, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.15475520540236354, | |
| "grad_norm": 39.15053950048559, | |
| "learning_rate": 0.0002994313210848644, | |
| "loss": 0.8266, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15756893640967923, | |
| "grad_norm": 8.686627630645642, | |
| "learning_rate": 0.00029921259842519685, | |
| "loss": 0.6999, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.16038266741699495, | |
| "grad_norm": 15.686342705824982, | |
| "learning_rate": 0.00029899387576552927, | |
| "loss": 0.7207, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.16319639842431063, | |
| "grad_norm": 5.239119778234183, | |
| "learning_rate": 0.00029877515310586174, | |
| "loss": 0.979, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.16601012943162632, | |
| "grad_norm": 3.898189968863333, | |
| "learning_rate": 0.0002985564304461942, | |
| "loss": 0.7798, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16882386043894204, | |
| "grad_norm": 82.36259815094716, | |
| "learning_rate": 0.0002983377077865267, | |
| "loss": 1.3224, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17163759144625773, | |
| "grad_norm": 78.13332901615998, | |
| "learning_rate": 0.0002981189851268591, | |
| "loss": 1.5312, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.17445132245357345, | |
| "grad_norm": 19.326937621272116, | |
| "learning_rate": 0.00029790026246719157, | |
| "loss": 2.1933, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.17726505346088914, | |
| "grad_norm": 63.14152039967042, | |
| "learning_rate": 0.00029768153980752404, | |
| "loss": 1.356, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.18007878446820483, | |
| "grad_norm": 80.26072923404266, | |
| "learning_rate": 0.0002974628171478565, | |
| "loss": 2.3056, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18289251547552055, | |
| "grad_norm": 10.103427618986666, | |
| "learning_rate": 0.0002972440944881889, | |
| "loss": 1.2462, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.18570624648283623, | |
| "grad_norm": 51.23631739417244, | |
| "learning_rate": 0.0002970253718285214, | |
| "loss": 1.1639, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.18851997749015195, | |
| "grad_norm": 6.157960830396006, | |
| "learning_rate": 0.00029680664916885386, | |
| "loss": 0.7786, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.19133370849746764, | |
| "grad_norm": 10.247301558325885, | |
| "learning_rate": 0.00029658792650918633, | |
| "loss": 0.7196, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.19414743950478333, | |
| "grad_norm": 43.71975555313302, | |
| "learning_rate": 0.0002963692038495188, | |
| "loss": 1.1545, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.19696117051209905, | |
| "grad_norm": 7.9395865721238446, | |
| "learning_rate": 0.0002961504811898512, | |
| "loss": 0.8198, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19977490151941474, | |
| "grad_norm": 10.481113571999062, | |
| "learning_rate": 0.0002959317585301837, | |
| "loss": 0.7158, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20258863252673046, | |
| "grad_norm": 12.378546853564645, | |
| "learning_rate": 0.00029571303587051616, | |
| "loss": 0.7685, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.20540236353404615, | |
| "grad_norm": 47.604153129845606, | |
| "learning_rate": 0.00029549431321084863, | |
| "loss": 0.7332, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.20821609454136183, | |
| "grad_norm": 25.527299782292648, | |
| "learning_rate": 0.0002952755905511811, | |
| "loss": 0.7475, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.21102982554867755, | |
| "grad_norm": 45.25744483228148, | |
| "learning_rate": 0.00029505686789151357, | |
| "loss": 0.743, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21384355655599324, | |
| "grad_norm": 37.835568741114564, | |
| "learning_rate": 0.000294838145231846, | |
| "loss": 0.7685, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.21665728756330896, | |
| "grad_norm": 27.87886782853722, | |
| "learning_rate": 0.00029461942257217845, | |
| "loss": 0.9676, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.21947101857062465, | |
| "grad_norm": 23.79947674931416, | |
| "learning_rate": 0.0002944006999125109, | |
| "loss": 0.7742, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.22228474957794034, | |
| "grad_norm": 13.066512959590527, | |
| "learning_rate": 0.0002941819772528434, | |
| "loss": 0.7118, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.22509848058525606, | |
| "grad_norm": 25.847486935286263, | |
| "learning_rate": 0.0002939632545931758, | |
| "loss": 0.7724, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.22791221159257175, | |
| "grad_norm": 35.49445839081954, | |
| "learning_rate": 0.0002937445319335083, | |
| "loss": 0.7618, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.23072594259988746, | |
| "grad_norm": 49.49529219586411, | |
| "learning_rate": 0.00029352580927384075, | |
| "loss": 0.7084, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.23353967360720315, | |
| "grad_norm": 37.565688190537166, | |
| "learning_rate": 0.00029330708661417317, | |
| "loss": 0.8741, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.23635340461451884, | |
| "grad_norm": 8.413460164151891, | |
| "learning_rate": 0.00029308836395450564, | |
| "loss": 1.0024, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.23916713562183456, | |
| "grad_norm": 32.940098975528464, | |
| "learning_rate": 0.0002928696412948381, | |
| "loss": 0.9008, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24198086662915025, | |
| "grad_norm": 41.72472308400113, | |
| "learning_rate": 0.0002926509186351706, | |
| "loss": 0.6943, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.24479459763646597, | |
| "grad_norm": 5.973403122009343, | |
| "learning_rate": 0.00029243219597550305, | |
| "loss": 0.8841, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.24760832864378166, | |
| "grad_norm": 19.107388950348, | |
| "learning_rate": 0.0002922134733158355, | |
| "loss": 0.7404, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2504220596510974, | |
| "grad_norm": 66.51613947405258, | |
| "learning_rate": 0.00029199475065616793, | |
| "loss": 1.5743, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.25323579065841306, | |
| "grad_norm": 35.550471084963576, | |
| "learning_rate": 0.0002917760279965004, | |
| "loss": 0.9394, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25604952166572875, | |
| "grad_norm": 36.649681372911445, | |
| "learning_rate": 0.00029155730533683287, | |
| "loss": 0.7641, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.25886325267304444, | |
| "grad_norm": 12.270656532801103, | |
| "learning_rate": 0.00029133858267716534, | |
| "loss": 0.9354, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.26167698368036013, | |
| "grad_norm": 18.4202269481449, | |
| "learning_rate": 0.0002911198600174978, | |
| "loss": 0.8232, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2644907146876759, | |
| "grad_norm": 70.47960911214764, | |
| "learning_rate": 0.0002909011373578303, | |
| "loss": 0.8628, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.26730444569499157, | |
| "grad_norm": 25.90531145228859, | |
| "learning_rate": 0.0002906824146981627, | |
| "loss": 0.8182, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.27011817670230726, | |
| "grad_norm": 29.63336083779562, | |
| "learning_rate": 0.00029046369203849517, | |
| "loss": 0.7766, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.27293190770962295, | |
| "grad_norm": 16.666638960939466, | |
| "learning_rate": 0.00029024496937882764, | |
| "loss": 0.7988, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.27574563871693863, | |
| "grad_norm": 20.07806754771967, | |
| "learning_rate": 0.0002900262467191601, | |
| "loss": 0.7784, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2785593697242544, | |
| "grad_norm": 12.36951117312153, | |
| "learning_rate": 0.0002898075240594925, | |
| "loss": 0.8476, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.28137310073157007, | |
| "grad_norm": 22.1125299804219, | |
| "learning_rate": 0.000289588801399825, | |
| "loss": 0.719, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28418683173888576, | |
| "grad_norm": 3.109040322200588, | |
| "learning_rate": 0.00028937007874015746, | |
| "loss": 0.7406, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.28700056274620145, | |
| "grad_norm": 28.351720853512052, | |
| "learning_rate": 0.0002891513560804899, | |
| "loss": 0.8295, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.28981429375351714, | |
| "grad_norm": 8.61987445057803, | |
| "learning_rate": 0.00028893263342082235, | |
| "loss": 0.681, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2926280247608329, | |
| "grad_norm": 11.532817025226382, | |
| "learning_rate": 0.0002887139107611548, | |
| "loss": 0.7396, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2954417557681486, | |
| "grad_norm": 6.165669988575859, | |
| "learning_rate": 0.0002884951881014873, | |
| "loss": 0.7601, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.29825548677546426, | |
| "grad_norm": 5.860155976144423, | |
| "learning_rate": 0.00028827646544181976, | |
| "loss": 0.7554, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.30106921778277995, | |
| "grad_norm": 19.345458106174757, | |
| "learning_rate": 0.0002880577427821522, | |
| "loss": 0.7848, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.30388294879009564, | |
| "grad_norm": 19.436003760130507, | |
| "learning_rate": 0.00028783902012248464, | |
| "loss": 0.6703, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3066966797974114, | |
| "grad_norm": 3.958053947843868, | |
| "learning_rate": 0.0002876202974628171, | |
| "loss": 0.7141, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3095104108047271, | |
| "grad_norm": 9.441658402935863, | |
| "learning_rate": 0.0002874015748031496, | |
| "loss": 0.6783, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.31232414181204277, | |
| "grad_norm": 18.815776769084255, | |
| "learning_rate": 0.00028718285214348205, | |
| "loss": 0.8264, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.31513787281935846, | |
| "grad_norm": 18.750079475373475, | |
| "learning_rate": 0.0002869641294838145, | |
| "loss": 0.702, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.31795160382667415, | |
| "grad_norm": 40.59299255876535, | |
| "learning_rate": 0.000286745406824147, | |
| "loss": 0.7972, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3207653348339899, | |
| "grad_norm": 25.46547229422401, | |
| "learning_rate": 0.0002865266841644794, | |
| "loss": 0.7, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3235790658413056, | |
| "grad_norm": 13.988940584670248, | |
| "learning_rate": 0.0002863079615048119, | |
| "loss": 0.6672, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.32639279684862127, | |
| "grad_norm": 4.209523057541857, | |
| "learning_rate": 0.00028608923884514435, | |
| "loss": 0.6089, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.32920652785593696, | |
| "grad_norm": 21.621479938817654, | |
| "learning_rate": 0.0002858705161854768, | |
| "loss": 0.6678, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.33202025886325265, | |
| "grad_norm": 27.115608784965413, | |
| "learning_rate": 0.00028565179352580924, | |
| "loss": 0.656, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3348339898705684, | |
| "grad_norm": 9.058371119623647, | |
| "learning_rate": 0.0002854330708661417, | |
| "loss": 0.8967, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3376477208778841, | |
| "grad_norm": 23.7560047514354, | |
| "learning_rate": 0.0002852143482064742, | |
| "loss": 0.8101, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3404614518851998, | |
| "grad_norm": 35.20987445808512, | |
| "learning_rate": 0.0002849956255468066, | |
| "loss": 0.7738, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.34327518289251546, | |
| "grad_norm": 11.698215716101412, | |
| "learning_rate": 0.00028477690288713906, | |
| "loss": 0.8432, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.34608891389983115, | |
| "grad_norm": 22.50137501429176, | |
| "learning_rate": 0.00028455818022747153, | |
| "loss": 0.7033, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3489026449071469, | |
| "grad_norm": 19.821463032004264, | |
| "learning_rate": 0.000284339457567804, | |
| "loss": 0.7943, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3517163759144626, | |
| "grad_norm": 33.67996416219415, | |
| "learning_rate": 0.00028412073490813647, | |
| "loss": 0.9042, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3545301069217783, | |
| "grad_norm": 10.3108845588795, | |
| "learning_rate": 0.0002839020122484689, | |
| "loss": 0.6961, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.35734383792909397, | |
| "grad_norm": 13.932358160677255, | |
| "learning_rate": 0.00028368328958880136, | |
| "loss": 0.6584, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.36015756893640966, | |
| "grad_norm": 6.665964382062972, | |
| "learning_rate": 0.00028346456692913383, | |
| "loss": 0.7388, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3629712999437254, | |
| "grad_norm": 10.05896951436482, | |
| "learning_rate": 0.0002832458442694663, | |
| "loss": 0.7343, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3657850309510411, | |
| "grad_norm": 11.0866654634556, | |
| "learning_rate": 0.00028302712160979877, | |
| "loss": 0.6406, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3685987619583568, | |
| "grad_norm": 16.53331909071421, | |
| "learning_rate": 0.00028280839895013124, | |
| "loss": 0.7217, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.37141249296567247, | |
| "grad_norm": 29.64720675209545, | |
| "learning_rate": 0.00028258967629046365, | |
| "loss": 0.6978, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.37422622397298816, | |
| "grad_norm": 28.455785874506656, | |
| "learning_rate": 0.0002823709536307961, | |
| "loss": 0.7681, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3770399549803039, | |
| "grad_norm": 22.981131743639594, | |
| "learning_rate": 0.0002821522309711286, | |
| "loss": 0.6291, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3798536859876196, | |
| "grad_norm": 29.59061751823822, | |
| "learning_rate": 0.00028193350831146106, | |
| "loss": 0.8084, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3826674169949353, | |
| "grad_norm": 5.6091230275760475, | |
| "learning_rate": 0.0002817147856517935, | |
| "loss": 0.7371, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.385481148002251, | |
| "grad_norm": 5.883998486444261, | |
| "learning_rate": 0.00028149606299212595, | |
| "loss": 0.7251, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.38829487900956666, | |
| "grad_norm": 9.040960893323161, | |
| "learning_rate": 0.0002812773403324584, | |
| "loss": 0.6948, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3911086100168824, | |
| "grad_norm": 16.807114962722785, | |
| "learning_rate": 0.0002810586176727909, | |
| "loss": 0.7303, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3939223410241981, | |
| "grad_norm": 13.0879100690685, | |
| "learning_rate": 0.0002808398950131233, | |
| "loss": 0.6484, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3967360720315138, | |
| "grad_norm": 16.044210764032027, | |
| "learning_rate": 0.0002806211723534558, | |
| "loss": 0.6612, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.3995498030388295, | |
| "grad_norm": 37.0843541394152, | |
| "learning_rate": 0.00028040244969378825, | |
| "loss": 0.7218, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.40236353404614517, | |
| "grad_norm": 44.12879697805232, | |
| "learning_rate": 0.0002801837270341207, | |
| "loss": 1.2958, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4051772650534609, | |
| "grad_norm": 28.017644530703276, | |
| "learning_rate": 0.0002799650043744532, | |
| "loss": 0.989, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.4079909960607766, | |
| "grad_norm": 48.95451227633847, | |
| "learning_rate": 0.0002797462817147856, | |
| "loss": 0.7852, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4108047270680923, | |
| "grad_norm": 13.750288764403155, | |
| "learning_rate": 0.00027952755905511807, | |
| "loss": 1.0036, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.413618458075408, | |
| "grad_norm": 12.62751471781883, | |
| "learning_rate": 0.00027930883639545054, | |
| "loss": 0.7137, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.41643218908272367, | |
| "grad_norm": 27.20810519301277, | |
| "learning_rate": 0.000279090113735783, | |
| "loss": 0.6905, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4192459200900394, | |
| "grad_norm": 28.107277824414965, | |
| "learning_rate": 0.0002788713910761155, | |
| "loss": 0.6837, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4220596510973551, | |
| "grad_norm": 5.416144983374891, | |
| "learning_rate": 0.00027865266841644795, | |
| "loss": 0.7208, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4248733821046708, | |
| "grad_norm": 11.589744326535003, | |
| "learning_rate": 0.00027843394575678037, | |
| "loss": 0.6227, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4276871131119865, | |
| "grad_norm": 8.906957133772503, | |
| "learning_rate": 0.00027821522309711284, | |
| "loss": 0.7367, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4305008441193022, | |
| "grad_norm": 32.87231457665042, | |
| "learning_rate": 0.0002779965004374453, | |
| "loss": 0.7919, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4333145751266179, | |
| "grad_norm": 26.00340134223748, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.676, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.4361283061339336, | |
| "grad_norm": 34.53119270440781, | |
| "learning_rate": 0.0002775590551181102, | |
| "loss": 0.8778, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4389420371412493, | |
| "grad_norm": 6.460463403838006, | |
| "learning_rate": 0.00027734033245844266, | |
| "loss": 0.5865, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.441755768148565, | |
| "grad_norm": 11.404929871459034, | |
| "learning_rate": 0.00027712160979877513, | |
| "loss": 0.6955, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4445694991558807, | |
| "grad_norm": 16.213324604997545, | |
| "learning_rate": 0.0002769028871391076, | |
| "loss": 0.6287, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.4473832301631964, | |
| "grad_norm": 23.82912364680576, | |
| "learning_rate": 0.00027668416447944, | |
| "loss": 1.0301, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4501969611705121, | |
| "grad_norm": 13.424863094947291, | |
| "learning_rate": 0.0002764654418197725, | |
| "loss": 0.7109, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4530106921778278, | |
| "grad_norm": 20.253487976081246, | |
| "learning_rate": 0.00027624671916010496, | |
| "loss": 0.5895, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4558244231851435, | |
| "grad_norm": 12.145349601064195, | |
| "learning_rate": 0.00027602799650043743, | |
| "loss": 0.6832, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4586381541924592, | |
| "grad_norm": 15.611833511231971, | |
| "learning_rate": 0.0002758092738407699, | |
| "loss": 0.6678, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4614518851997749, | |
| "grad_norm": 14.283717293563125, | |
| "learning_rate": 0.0002755905511811023, | |
| "loss": 0.596, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4642656162070906, | |
| "grad_norm": 20.447345294591308, | |
| "learning_rate": 0.0002753718285214348, | |
| "loss": 0.6593, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4670793472144063, | |
| "grad_norm": 7.225334907859718, | |
| "learning_rate": 0.00027515310586176726, | |
| "loss": 0.7062, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.469893078221722, | |
| "grad_norm": 16.228475676453073, | |
| "learning_rate": 0.0002749343832020997, | |
| "loss": 0.68, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4727068092290377, | |
| "grad_norm": 15.345865551250505, | |
| "learning_rate": 0.0002747156605424322, | |
| "loss": 0.6377, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.47552054023635343, | |
| "grad_norm": 13.116990150980092, | |
| "learning_rate": 0.00027449693788276467, | |
| "loss": 0.555, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4783342712436691, | |
| "grad_norm": 7.523456579032664, | |
| "learning_rate": 0.0002742782152230971, | |
| "loss": 0.7089, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4811480022509848, | |
| "grad_norm": 15.62034181204981, | |
| "learning_rate": 0.00027405949256342955, | |
| "loss": 0.5955, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4839617332583005, | |
| "grad_norm": 33.72794816539747, | |
| "learning_rate": 0.000273840769903762, | |
| "loss": 0.7967, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.4867754642656162, | |
| "grad_norm": 8.371501278758954, | |
| "learning_rate": 0.0002736220472440945, | |
| "loss": 0.6281, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.48958919527293193, | |
| "grad_norm": 14.674093397655396, | |
| "learning_rate": 0.0002734033245844269, | |
| "loss": 0.7373, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.4924029262802476, | |
| "grad_norm": 4.743062155600575, | |
| "learning_rate": 0.0002731846019247594, | |
| "loss": 0.6782, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4952166572875633, | |
| "grad_norm": 45.76318589779893, | |
| "learning_rate": 0.00027296587926509185, | |
| "loss": 0.6851, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.498030388294879, | |
| "grad_norm": 7.008897310409392, | |
| "learning_rate": 0.00027274715660542426, | |
| "loss": 0.7031, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5008441193021947, | |
| "grad_norm": 12.188963127648035, | |
| "learning_rate": 0.00027252843394575673, | |
| "loss": 0.7553, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5036578503095104, | |
| "grad_norm": 8.04659950296303, | |
| "learning_rate": 0.0002723097112860892, | |
| "loss": 0.6391, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5064715813168261, | |
| "grad_norm": 23.403833885375786, | |
| "learning_rate": 0.00027209098862642167, | |
| "loss": 0.6564, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5092853123241418, | |
| "grad_norm": 19.61248291110157, | |
| "learning_rate": 0.00027187226596675414, | |
| "loss": 0.5736, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5120990433314575, | |
| "grad_norm": 7.232723854059021, | |
| "learning_rate": 0.00027165354330708656, | |
| "loss": 0.7036, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5149127743387732, | |
| "grad_norm": 13.467653622805527, | |
| "learning_rate": 0.00027143482064741903, | |
| "loss": 0.8533, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5177265053460889, | |
| "grad_norm": 24.167342861487832, | |
| "learning_rate": 0.0002712160979877515, | |
| "loss": 0.7531, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5205402363534046, | |
| "grad_norm": 17.840804581591108, | |
| "learning_rate": 0.00027099737532808397, | |
| "loss": 0.6141, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5233539673607203, | |
| "grad_norm": 6.905072707920589, | |
| "learning_rate": 0.00027077865266841644, | |
| "loss": 0.6358, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.526167698368036, | |
| "grad_norm": 14.15349419929909, | |
| "learning_rate": 0.0002705599300087489, | |
| "loss": 0.7491, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5289814293753518, | |
| "grad_norm": 10.6411502042627, | |
| "learning_rate": 0.0002703412073490814, | |
| "loss": 0.6761, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5317951603826674, | |
| "grad_norm": 12.526381470822352, | |
| "learning_rate": 0.0002701224846894138, | |
| "loss": 0.6767, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5346088913899831, | |
| "grad_norm": 18.982165857200393, | |
| "learning_rate": 0.00026990376202974626, | |
| "loss": 0.6584, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5374226223972988, | |
| "grad_norm": 21.858403045881502, | |
| "learning_rate": 0.00026968503937007873, | |
| "loss": 0.7442, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5402363534046145, | |
| "grad_norm": 17.69501842397575, | |
| "learning_rate": 0.0002694663167104112, | |
| "loss": 0.6945, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5430500844119303, | |
| "grad_norm": 22.834715228106134, | |
| "learning_rate": 0.0002692475940507436, | |
| "loss": 0.7521, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5458638154192459, | |
| "grad_norm": 26.467656611768952, | |
| "learning_rate": 0.0002690288713910761, | |
| "loss": 0.6719, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5486775464265616, | |
| "grad_norm": 29.568622749960294, | |
| "learning_rate": 0.00026881014873140856, | |
| "loss": 1.0309, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5514912774338773, | |
| "grad_norm": 9.2347635939369, | |
| "learning_rate": 0.000268591426071741, | |
| "loss": 0.7166, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.554305008441193, | |
| "grad_norm": 19.460279560031523, | |
| "learning_rate": 0.00026837270341207345, | |
| "loss": 0.7064, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5571187394485088, | |
| "grad_norm": 5.903067753944877, | |
| "learning_rate": 0.0002681539807524059, | |
| "loss": 0.7376, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5599324704558244, | |
| "grad_norm": 17.371144674890022, | |
| "learning_rate": 0.0002679352580927384, | |
| "loss": 0.6147, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5627462014631401, | |
| "grad_norm": 5.4409380839404875, | |
| "learning_rate": 0.00026771653543307086, | |
| "loss": 0.5593, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5655599324704558, | |
| "grad_norm": 43.10864314052242, | |
| "learning_rate": 0.00026749781277340327, | |
| "loss": 0.7555, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5683736634777715, | |
| "grad_norm": 7.489455426282972, | |
| "learning_rate": 0.00026727909011373574, | |
| "loss": 0.7477, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5711873944850873, | |
| "grad_norm": 6.66426392264251, | |
| "learning_rate": 0.0002670603674540682, | |
| "loss": 0.6787, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5740011254924029, | |
| "grad_norm": 15.342229369129983, | |
| "learning_rate": 0.0002668416447944007, | |
| "loss": 0.6353, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5768148564997186, | |
| "grad_norm": 17.180941723078337, | |
| "learning_rate": 0.00026662292213473315, | |
| "loss": 0.6054, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5796285875070343, | |
| "grad_norm": 35.277901006510085, | |
| "learning_rate": 0.0002664041994750656, | |
| "loss": 0.7408, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.58244231851435, | |
| "grad_norm": 11.27450226311033, | |
| "learning_rate": 0.00026618547681539804, | |
| "loss": 0.6806, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5852560495216658, | |
| "grad_norm": 58.49843655029433, | |
| "learning_rate": 0.0002659667541557305, | |
| "loss": 0.994, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5880697805289814, | |
| "grad_norm": 13.809131175890833, | |
| "learning_rate": 0.000265748031496063, | |
| "loss": 0.609, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5908835115362971, | |
| "grad_norm": 20.009880236869453, | |
| "learning_rate": 0.00026552930883639545, | |
| "loss": 0.588, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5936972425436128, | |
| "grad_norm": 7.191101525174044, | |
| "learning_rate": 0.0002653105861767279, | |
| "loss": 0.6758, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5965109735509285, | |
| "grad_norm": 36.929986801209665, | |
| "learning_rate": 0.00026509186351706033, | |
| "loss": 0.7098, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5993247045582443, | |
| "grad_norm": 35.8301224810954, | |
| "learning_rate": 0.0002648731408573928, | |
| "loss": 0.5812, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6021384355655599, | |
| "grad_norm": 22.332697021851985, | |
| "learning_rate": 0.0002646544181977253, | |
| "loss": 0.578, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6049521665728756, | |
| "grad_norm": 7.695082610709639, | |
| "learning_rate": 0.0002644356955380577, | |
| "loss": 0.5778, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6077658975801913, | |
| "grad_norm": 17.046853464953895, | |
| "learning_rate": 0.00026421697287839016, | |
| "loss": 0.7022, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.610579628587507, | |
| "grad_norm": 41.35650306806981, | |
| "learning_rate": 0.00026399825021872263, | |
| "loss": 1.036, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6133933595948228, | |
| "grad_norm": 8.7115876639432, | |
| "learning_rate": 0.0002637795275590551, | |
| "loss": 0.7931, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6162070906021384, | |
| "grad_norm": 34.58819046075799, | |
| "learning_rate": 0.00026356080489938757, | |
| "loss": 0.643, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6190208216094542, | |
| "grad_norm": 10.263602247101797, | |
| "learning_rate": 0.00026334208223972, | |
| "loss": 0.6436, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6218345526167698, | |
| "grad_norm": 46.6429192571408, | |
| "learning_rate": 0.00026312335958005246, | |
| "loss": 0.6991, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6246482836240855, | |
| "grad_norm": 11.878255221017628, | |
| "learning_rate": 0.0002629046369203849, | |
| "loss": 0.6504, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6274620146314013, | |
| "grad_norm": 6.479822059289286, | |
| "learning_rate": 0.0002626859142607174, | |
| "loss": 0.5869, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6302757456387169, | |
| "grad_norm": 12.174115126023809, | |
| "learning_rate": 0.00026246719160104987, | |
| "loss": 0.8186, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6330894766460327, | |
| "grad_norm": 14.12631818540897, | |
| "learning_rate": 0.00026224846894138234, | |
| "loss": 0.7347, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6359032076533483, | |
| "grad_norm": 32.041943287347785, | |
| "learning_rate": 0.00026202974628171475, | |
| "loss": 0.7152, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.638716938660664, | |
| "grad_norm": 5.4013176531081655, | |
| "learning_rate": 0.0002618110236220472, | |
| "loss": 0.6162, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6415306696679798, | |
| "grad_norm": 13.504260914807004, | |
| "learning_rate": 0.0002615923009623797, | |
| "loss": 0.7279, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6443444006752954, | |
| "grad_norm": 9.620376593872086, | |
| "learning_rate": 0.00026137357830271216, | |
| "loss": 0.6628, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6471581316826112, | |
| "grad_norm": 22.427874242699758, | |
| "learning_rate": 0.0002611548556430446, | |
| "loss": 0.6983, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6499718626899268, | |
| "grad_norm": 41.554954362999574, | |
| "learning_rate": 0.00026093613298337705, | |
| "loss": 1.0201, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6527855936972425, | |
| "grad_norm": 7.036254242716845, | |
| "learning_rate": 0.0002607174103237095, | |
| "loss": 0.8125, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6555993247045583, | |
| "grad_norm": 13.380680950268676, | |
| "learning_rate": 0.000260498687664042, | |
| "loss": 0.6904, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6584130557118739, | |
| "grad_norm": 22.884181519089868, | |
| "learning_rate": 0.0002602799650043744, | |
| "loss": 1.0037, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6612267867191897, | |
| "grad_norm": 14.511141210797714, | |
| "learning_rate": 0.00026006124234470687, | |
| "loss": 0.7414, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6640405177265053, | |
| "grad_norm": 13.431978101097688, | |
| "learning_rate": 0.00025984251968503934, | |
| "loss": 0.7078, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.666854248733821, | |
| "grad_norm": 29.24656191831114, | |
| "learning_rate": 0.0002596237970253718, | |
| "loss": 0.5655, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6696679797411368, | |
| "grad_norm": 6.150879349284207, | |
| "learning_rate": 0.0002594050743657043, | |
| "loss": 0.64, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6724817107484524, | |
| "grad_norm": 6.533201611095304, | |
| "learning_rate": 0.0002591863517060367, | |
| "loss": 0.6766, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6752954417557682, | |
| "grad_norm": 11.440054847870906, | |
| "learning_rate": 0.00025896762904636917, | |
| "loss": 0.5998, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6781091727630838, | |
| "grad_norm": 31.143749184180702, | |
| "learning_rate": 0.00025874890638670164, | |
| "loss": 0.5311, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6809229037703995, | |
| "grad_norm": 22.852634622577742, | |
| "learning_rate": 0.0002585301837270341, | |
| "loss": 0.5925, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6837366347777153, | |
| "grad_norm": 24.588223403495608, | |
| "learning_rate": 0.0002583114610673666, | |
| "loss": 0.6734, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6865503657850309, | |
| "grad_norm": 28.048648610791382, | |
| "learning_rate": 0.00025809273840769905, | |
| "loss": 0.618, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6893640967923467, | |
| "grad_norm": 10.406220663733844, | |
| "learning_rate": 0.00025787401574803146, | |
| "loss": 0.5913, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6921778277996623, | |
| "grad_norm": 6.811024400622029, | |
| "learning_rate": 0.00025765529308836393, | |
| "loss": 0.6074, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.694991558806978, | |
| "grad_norm": 12.155916590641617, | |
| "learning_rate": 0.0002574365704286964, | |
| "loss": 0.5704, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6978052898142938, | |
| "grad_norm": 9.031515529220442, | |
| "learning_rate": 0.0002572178477690289, | |
| "loss": 0.6423, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7006190208216094, | |
| "grad_norm": 14.205953959415192, | |
| "learning_rate": 0.0002569991251093613, | |
| "loss": 0.6407, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7034327518289252, | |
| "grad_norm": 48.32963976504197, | |
| "learning_rate": 0.00025678040244969376, | |
| "loss": 0.6504, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7062464828362408, | |
| "grad_norm": 23.896138968892455, | |
| "learning_rate": 0.00025656167979002623, | |
| "loss": 0.6352, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7090602138435566, | |
| "grad_norm": 11.036690380167714, | |
| "learning_rate": 0.00025634295713035865, | |
| "loss": 0.7513, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7118739448508723, | |
| "grad_norm": 38.96438150155598, | |
| "learning_rate": 0.0002561242344706911, | |
| "loss": 0.8657, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7146876758581879, | |
| "grad_norm": 42.72643454806835, | |
| "learning_rate": 0.0002559055118110236, | |
| "loss": 0.9091, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7175014068655037, | |
| "grad_norm": 16.610380140185, | |
| "learning_rate": 0.00025568678915135606, | |
| "loss": 0.7069, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7203151378728193, | |
| "grad_norm": 22.40904574778732, | |
| "learning_rate": 0.0002554680664916885, | |
| "loss": 0.5857, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7231288688801351, | |
| "grad_norm": 14.389267092163761, | |
| "learning_rate": 0.000255249343832021, | |
| "loss": 0.6018, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7259425998874508, | |
| "grad_norm": 13.683335764917064, | |
| "learning_rate": 0.0002550306211723534, | |
| "loss": 0.5358, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7287563308947664, | |
| "grad_norm": 30.498280377063637, | |
| "learning_rate": 0.0002548118985126859, | |
| "loss": 0.9904, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7315700619020822, | |
| "grad_norm": 8.683287013020767, | |
| "learning_rate": 0.00025459317585301835, | |
| "loss": 0.71, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7343837929093978, | |
| "grad_norm": 7.103289048611902, | |
| "learning_rate": 0.0002543744531933508, | |
| "loss": 0.8194, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7371975239167136, | |
| "grad_norm": 6.281668196582603, | |
| "learning_rate": 0.0002541557305336833, | |
| "loss": 0.7173, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7400112549240293, | |
| "grad_norm": 5.39806094595311, | |
| "learning_rate": 0.00025393700787401576, | |
| "loss": 0.7826, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7428249859313449, | |
| "grad_norm": 32.09155901494012, | |
| "learning_rate": 0.0002537182852143482, | |
| "loss": 0.6226, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7456387169386607, | |
| "grad_norm": 8.519967935941418, | |
| "learning_rate": 0.00025349956255468065, | |
| "loss": 0.7454, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7484524479459763, | |
| "grad_norm": 42.035625193177715, | |
| "learning_rate": 0.0002532808398950131, | |
| "loss": 0.5172, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7512661789532921, | |
| "grad_norm": 9.967889876277471, | |
| "learning_rate": 0.0002530621172353456, | |
| "loss": 1.0016, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7540799099606078, | |
| "grad_norm": 26.068769304112433, | |
| "learning_rate": 0.000252843394575678, | |
| "loss": 0.7935, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7568936409679234, | |
| "grad_norm": 20.87767388709777, | |
| "learning_rate": 0.0002526246719160105, | |
| "loss": 0.8831, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7597073719752392, | |
| "grad_norm": 23.861680316972155, | |
| "learning_rate": 0.00025240594925634294, | |
| "loss": 0.7566, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7625211029825548, | |
| "grad_norm": 19.606881060581557, | |
| "learning_rate": 0.00025218722659667536, | |
| "loss": 0.6166, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7653348339898706, | |
| "grad_norm": 23.231679663361476, | |
| "learning_rate": 0.00025196850393700783, | |
| "loss": 0.7444, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7681485649971863, | |
| "grad_norm": 14.475225232701424, | |
| "learning_rate": 0.0002517497812773403, | |
| "loss": 0.6132, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.770962296004502, | |
| "grad_norm": 10.196976505426665, | |
| "learning_rate": 0.00025153105861767277, | |
| "loss": 0.6414, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7737760270118177, | |
| "grad_norm": 12.26153672415283, | |
| "learning_rate": 0.00025131233595800524, | |
| "loss": 0.6497, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7765897580191333, | |
| "grad_norm": 11.399029351648554, | |
| "learning_rate": 0.00025109361329833766, | |
| "loss": 0.6397, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7794034890264491, | |
| "grad_norm": 29.29036307836923, | |
| "learning_rate": 0.0002508748906386701, | |
| "loss": 0.6448, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7822172200337648, | |
| "grad_norm": 6.5421868930376315, | |
| "learning_rate": 0.0002506561679790026, | |
| "loss": 0.619, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7850309510410804, | |
| "grad_norm": 28.964608214250898, | |
| "learning_rate": 0.00025043744531933507, | |
| "loss": 0.6457, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.7878446820483962, | |
| "grad_norm": 19.105194261412365, | |
| "learning_rate": 0.00025021872265966754, | |
| "loss": 0.8734, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7906584130557118, | |
| "grad_norm": 21.86681738792712, | |
| "learning_rate": 0.00025, | |
| "loss": 0.6627, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7934721440630276, | |
| "grad_norm": 21.420403379202583, | |
| "learning_rate": 0.0002497812773403325, | |
| "loss": 0.6947, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7962858750703433, | |
| "grad_norm": 51.77141397970572, | |
| "learning_rate": 0.0002495625546806649, | |
| "loss": 0.6478, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.799099606077659, | |
| "grad_norm": 19.60546919995034, | |
| "learning_rate": 0.00024934383202099736, | |
| "loss": 0.6738, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8019133370849747, | |
| "grad_norm": 5.930058860961108, | |
| "learning_rate": 0.00024912510936132983, | |
| "loss": 0.676, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8047270680922903, | |
| "grad_norm": 9.793830622150702, | |
| "learning_rate": 0.0002489063867016623, | |
| "loss": 0.5543, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8075407990996061, | |
| "grad_norm": 30.643668253643902, | |
| "learning_rate": 0.0002486876640419947, | |
| "loss": 0.6535, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8103545301069218, | |
| "grad_norm": 18.840891754548007, | |
| "learning_rate": 0.0002484689413823272, | |
| "loss": 0.7703, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8131682611142375, | |
| "grad_norm": 11.630930014907443, | |
| "learning_rate": 0.00024825021872265966, | |
| "loss": 0.6506, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8159819921215532, | |
| "grad_norm": 11.371539982197872, | |
| "learning_rate": 0.00024803149606299207, | |
| "loss": 0.6467, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8187957231288688, | |
| "grad_norm": 10.427236030304023, | |
| "learning_rate": 0.00024781277340332454, | |
| "loss": 0.8076, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8216094541361846, | |
| "grad_norm": 36.87576985444582, | |
| "learning_rate": 0.000247594050743657, | |
| "loss": 0.6055, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8244231851435003, | |
| "grad_norm": 35.48244372566825, | |
| "learning_rate": 0.0002473753280839895, | |
| "loss": 0.8045, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.827236916150816, | |
| "grad_norm": 24.817299467837056, | |
| "learning_rate": 0.00024715660542432195, | |
| "loss": 0.6533, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8300506471581317, | |
| "grad_norm": 9.141011529069573, | |
| "learning_rate": 0.00024693788276465437, | |
| "loss": 0.6856, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8328643781654473, | |
| "grad_norm": 16.064568145247428, | |
| "learning_rate": 0.00024671916010498684, | |
| "loss": 0.6118, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8356781091727631, | |
| "grad_norm": 14.088534153379833, | |
| "learning_rate": 0.0002465004374453193, | |
| "loss": 0.6359, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8384918401800788, | |
| "grad_norm": 10.800798513388331, | |
| "learning_rate": 0.0002462817147856518, | |
| "loss": 0.4701, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8413055711873945, | |
| "grad_norm": 23.57379674968355, | |
| "learning_rate": 0.00024606299212598425, | |
| "loss": 0.8891, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8441193021947102, | |
| "grad_norm": 19.087599267026963, | |
| "learning_rate": 0.0002458442694663167, | |
| "loss": 0.6812, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8469330332020258, | |
| "grad_norm": 5.851382871921484, | |
| "learning_rate": 0.00024562554680664913, | |
| "loss": 0.5135, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8497467642093416, | |
| "grad_norm": 32.51696153998222, | |
| "learning_rate": 0.0002454068241469816, | |
| "loss": 0.6505, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8525604952166573, | |
| "grad_norm": 19.264326332382478, | |
| "learning_rate": 0.0002451881014873141, | |
| "loss": 0.7129, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.855374226223973, | |
| "grad_norm": 16.974285473343233, | |
| "learning_rate": 0.00024496937882764654, | |
| "loss": 0.7383, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8581879572312887, | |
| "grad_norm": 26.864035695775446, | |
| "learning_rate": 0.00024475065616797896, | |
| "loss": 0.3988, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8610016882386043, | |
| "grad_norm": 13.174415289969117, | |
| "learning_rate": 0.00024453193350831143, | |
| "loss": 0.7362, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8638154192459201, | |
| "grad_norm": 17.500378614825863, | |
| "learning_rate": 0.0002443132108486439, | |
| "loss": 0.7758, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8666291502532358, | |
| "grad_norm": 9.282760951889967, | |
| "learning_rate": 0.00024409448818897634, | |
| "loss": 0.4896, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8694428812605515, | |
| "grad_norm": 24.683487403388195, | |
| "learning_rate": 0.0002438757655293088, | |
| "loss": 0.6937, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8722566122678672, | |
| "grad_norm": 6.130822987006422, | |
| "learning_rate": 0.00024365704286964128, | |
| "loss": 0.6025, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8750703432751828, | |
| "grad_norm": 41.848473080771385, | |
| "learning_rate": 0.00024343832020997373, | |
| "loss": 0.7405, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8778840742824986, | |
| "grad_norm": 16.807668144029588, | |
| "learning_rate": 0.0002432195975503062, | |
| "loss": 0.8849, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8806978052898143, | |
| "grad_norm": 12.875519609787505, | |
| "learning_rate": 0.00024300087489063867, | |
| "loss": 0.6274, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.88351153629713, | |
| "grad_norm": 17.334020913099007, | |
| "learning_rate": 0.00024278215223097108, | |
| "loss": 0.4966, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.8863252673044457, | |
| "grad_norm": 3.003018774866583, | |
| "learning_rate": 0.00024256342957130355, | |
| "loss": 0.6363, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8891389983117614, | |
| "grad_norm": 21.403704259371647, | |
| "learning_rate": 0.00024234470691163602, | |
| "loss": 0.6202, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8919527293190771, | |
| "grad_norm": 20.49088520270555, | |
| "learning_rate": 0.0002421259842519685, | |
| "loss": 0.6555, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.8947664603263928, | |
| "grad_norm": 3.2248953164538157, | |
| "learning_rate": 0.00024190726159230096, | |
| "loss": 0.667, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.8975801913337085, | |
| "grad_norm": 20.43806666316274, | |
| "learning_rate": 0.0002416885389326334, | |
| "loss": 0.8157, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9003939223410242, | |
| "grad_norm": 43.16776399371423, | |
| "learning_rate": 0.00024146981627296585, | |
| "loss": 0.9972, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9032076533483399, | |
| "grad_norm": 7.49046572212332, | |
| "learning_rate": 0.00024125109361329832, | |
| "loss": 0.5636, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9060213843556556, | |
| "grad_norm": 8.741984739159578, | |
| "learning_rate": 0.00024103237095363076, | |
| "loss": 0.5747, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9088351153629713, | |
| "grad_norm": 54.83042946791486, | |
| "learning_rate": 0.00024081364829396323, | |
| "loss": 0.5467, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.911648846370287, | |
| "grad_norm": 5.610010207094356, | |
| "learning_rate": 0.0002405949256342957, | |
| "loss": 0.8028, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9144625773776027, | |
| "grad_norm": 19.20818992178154, | |
| "learning_rate": 0.00024037620297462817, | |
| "loss": 0.5641, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9172763083849184, | |
| "grad_norm": 19.18323455494463, | |
| "learning_rate": 0.0002401574803149606, | |
| "loss": 0.8983, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9200900393922341, | |
| "grad_norm": 7.863834695625984, | |
| "learning_rate": 0.00023993875765529306, | |
| "loss": 0.5333, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9229037703995498, | |
| "grad_norm": 11.037689804640895, | |
| "learning_rate": 0.00023972003499562553, | |
| "loss": 0.5062, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9257175014068655, | |
| "grad_norm": 14.170498863161551, | |
| "learning_rate": 0.000239501312335958, | |
| "loss": 0.4058, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9285312324141812, | |
| "grad_norm": 8.276743998093925, | |
| "learning_rate": 0.00023928258967629044, | |
| "loss": 0.8532, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9313449634214969, | |
| "grad_norm": 31.54134403213458, | |
| "learning_rate": 0.0002390638670166229, | |
| "loss": 0.7638, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9341586944288126, | |
| "grad_norm": 21.816033120921777, | |
| "learning_rate": 0.00023884514435695538, | |
| "loss": 0.6246, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.9369724254361284, | |
| "grad_norm": 18.097032537484406, | |
| "learning_rate": 0.0002386264216972878, | |
| "loss": 0.5869, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.939786156443444, | |
| "grad_norm": 30.83082131815264, | |
| "learning_rate": 0.00023840769903762027, | |
| "loss": 0.6802, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.9425998874507597, | |
| "grad_norm": 47.924062613651145, | |
| "learning_rate": 0.00023818897637795274, | |
| "loss": 0.7447, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9454136184580754, | |
| "grad_norm": 9.599329887116856, | |
| "learning_rate": 0.0002379702537182852, | |
| "loss": 0.4587, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9482273494653911, | |
| "grad_norm": 24.233656237412927, | |
| "learning_rate": 0.00023775153105861765, | |
| "loss": 1.0523, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9510410804727069, | |
| "grad_norm": 8.951427533475114, | |
| "learning_rate": 0.00023753280839895012, | |
| "loss": 0.6428, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9538548114800225, | |
| "grad_norm": 7.42291585951858, | |
| "learning_rate": 0.00023731408573928256, | |
| "loss": 0.5499, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.9566685424873382, | |
| "grad_norm": 15.702658109698685, | |
| "learning_rate": 0.00023709536307961503, | |
| "loss": 0.7035, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9594822734946539, | |
| "grad_norm": 20.19323341543604, | |
| "learning_rate": 0.00023687664041994747, | |
| "loss": 0.6141, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9622960045019696, | |
| "grad_norm": 43.77793213323037, | |
| "learning_rate": 0.00023665791776027994, | |
| "loss": 0.6725, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9651097355092854, | |
| "grad_norm": 32.8128961448371, | |
| "learning_rate": 0.00023643919510061241, | |
| "loss": 0.5943, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.967923466516601, | |
| "grad_norm": 46.81002844829282, | |
| "learning_rate": 0.00023622047244094488, | |
| "loss": 0.7372, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9707371975239167, | |
| "grad_norm": 11.402187714876383, | |
| "learning_rate": 0.0002360017497812773, | |
| "loss": 0.6388, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9735509285312324, | |
| "grad_norm": 18.063184970189784, | |
| "learning_rate": 0.00023578302712160977, | |
| "loss": 0.6867, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9763646595385481, | |
| "grad_norm": 25.879726827027653, | |
| "learning_rate": 0.00023556430446194224, | |
| "loss": 0.5965, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9791783905458639, | |
| "grad_norm": 20.717474130396493, | |
| "learning_rate": 0.0002353455818022747, | |
| "loss": 0.7171, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9819921215531795, | |
| "grad_norm": 18.1608450158541, | |
| "learning_rate": 0.00023512685914260715, | |
| "loss": 0.584, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9848058525604952, | |
| "grad_norm": 10.9801787450404, | |
| "learning_rate": 0.00023490813648293962, | |
| "loss": 0.4611, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9876195835678109, | |
| "grad_norm": 59.845266656605816, | |
| "learning_rate": 0.00023468941382327207, | |
| "loss": 0.6816, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.9904333145751266, | |
| "grad_norm": 22.584791520562405, | |
| "learning_rate": 0.0002344706911636045, | |
| "loss": 0.6226, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.9932470455824424, | |
| "grad_norm": 25.253863357778947, | |
| "learning_rate": 0.00023425196850393698, | |
| "loss": 0.7587, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.996060776589758, | |
| "grad_norm": 13.205996634467093, | |
| "learning_rate": 0.00023403324584426945, | |
| "loss": 0.5234, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.9988745075970737, | |
| "grad_norm": 12.477784447497413, | |
| "learning_rate": 0.00023381452318460192, | |
| "loss": 0.7967, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_0_f1": 0.5288677130044843, | |
| "eval_0_precision": 0.39911167512690354, | |
| "eval_0_recall": 0.7836378737541528, | |
| "eval_1_f1": 0.6981504758484468, | |
| "eval_1_precision": 0.8818326151054661, | |
| "eval_1_recall": 0.5777975925100312, | |
| "eval_accuracy": 0.6320455291671226, | |
| "eval_loss": 0.6142578125, | |
| "eval_runtime": 469.6152, | |
| "eval_samples_per_second": 19.456, | |
| "eval_steps_per_second": 3.243, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 1.0016882386043895, | |
| "grad_norm": 20.289578848031194, | |
| "learning_rate": 0.00023359580052493436, | |
| "loss": 0.5507, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.004501969611705, | |
| "grad_norm": 15.822667712574415, | |
| "learning_rate": 0.00023337707786526683, | |
| "loss": 0.5981, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.0073157006190208, | |
| "grad_norm": 23.851500914988023, | |
| "learning_rate": 0.00023315835520559927, | |
| "loss": 0.5925, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.0101294316263365, | |
| "grad_norm": 28.90154599345354, | |
| "learning_rate": 0.00023293963254593174, | |
| "loss": 0.6938, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.0129431626336522, | |
| "grad_norm": 6.673268351357181, | |
| "learning_rate": 0.0002327209098862642, | |
| "loss": 0.4444, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.015756893640968, | |
| "grad_norm": 24.026678476440093, | |
| "learning_rate": 0.00023250218722659666, | |
| "loss": 0.6206, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.0185706246482835, | |
| "grad_norm": 12.993158134163783, | |
| "learning_rate": 0.00023228346456692913, | |
| "loss": 0.7656, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.0213843556555993, | |
| "grad_norm": 8.661592494763198, | |
| "learning_rate": 0.0002320647419072616, | |
| "loss": 0.5644, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.024198086662915, | |
| "grad_norm": 17.738436058324886, | |
| "learning_rate": 0.000231846019247594, | |
| "loss": 0.6354, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.0270118176702308, | |
| "grad_norm": 8.612642747217874, | |
| "learning_rate": 0.00023162729658792648, | |
| "loss": 0.7017, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.0298255486775465, | |
| "grad_norm": 36.741673298246305, | |
| "learning_rate": 0.00023140857392825895, | |
| "loss": 0.6211, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.032639279684862, | |
| "grad_norm": 12.750982475761448, | |
| "learning_rate": 0.0002311898512685914, | |
| "loss": 0.5828, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.0354530106921778, | |
| "grad_norm": 12.486810558826239, | |
| "learning_rate": 0.00023097112860892387, | |
| "loss": 0.5709, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.0382667416994935, | |
| "grad_norm": 24.920452697969928, | |
| "learning_rate": 0.00023075240594925634, | |
| "loss": 0.5783, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.0410804727068093, | |
| "grad_norm": 6.301604046934106, | |
| "learning_rate": 0.00023053368328958878, | |
| "loss": 0.4701, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.043894203714125, | |
| "grad_norm": 1.554082347905222, | |
| "learning_rate": 0.00023031496062992122, | |
| "loss": 0.5536, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.0467079347214405, | |
| "grad_norm": 22.334353822789094, | |
| "learning_rate": 0.0002300962379702537, | |
| "loss": 0.6868, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.0495216657287563, | |
| "grad_norm": 29.418886082506507, | |
| "learning_rate": 0.00022987751531058616, | |
| "loss": 0.9408, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.052335396736072, | |
| "grad_norm": 20.36838671289186, | |
| "learning_rate": 0.00022965879265091863, | |
| "loss": 0.6802, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.0551491277433878, | |
| "grad_norm": 4.954178463432806, | |
| "learning_rate": 0.00022944006999125107, | |
| "loss": 0.6108, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.0579628587507035, | |
| "grad_norm": 30.711986018263367, | |
| "learning_rate": 0.00022922134733158352, | |
| "loss": 0.5281, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.060776589758019, | |
| "grad_norm": 10.233269531300138, | |
| "learning_rate": 0.000229002624671916, | |
| "loss": 0.5335, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.0635903207653348, | |
| "grad_norm": 63.23746707068614, | |
| "learning_rate": 0.00022878390201224843, | |
| "loss": 0.881, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.0664040517726505, | |
| "grad_norm": 5.131625836660247, | |
| "learning_rate": 0.0002285651793525809, | |
| "loss": 0.9595, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.0692177827799663, | |
| "grad_norm": 13.264342728525087, | |
| "learning_rate": 0.00022834645669291337, | |
| "loss": 0.4729, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.072031513787282, | |
| "grad_norm": 3.92201905465672, | |
| "learning_rate": 0.00022812773403324584, | |
| "loss": 0.6524, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.0748452447945978, | |
| "grad_norm": 22.941903277650525, | |
| "learning_rate": 0.0002279090113735783, | |
| "loss": 0.6657, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.0776589758019133, | |
| "grad_norm": 18.622940780105395, | |
| "learning_rate": 0.00022769028871391073, | |
| "loss": 0.4346, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.080472706809229, | |
| "grad_norm": 10.884115952331454, | |
| "learning_rate": 0.0002274715660542432, | |
| "loss": 0.4555, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.0832864378165448, | |
| "grad_norm": 24.118386588827224, | |
| "learning_rate": 0.00022725284339457567, | |
| "loss": 0.7681, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.0861001688238605, | |
| "grad_norm": 20.031875879534606, | |
| "learning_rate": 0.0002270341207349081, | |
| "loss": 0.7259, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.088913899831176, | |
| "grad_norm": 13.903004192497082, | |
| "learning_rate": 0.00022681539807524058, | |
| "loss": 0.6706, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.0917276308384918, | |
| "grad_norm": 12.51136628868392, | |
| "learning_rate": 0.00022659667541557305, | |
| "loss": 0.6882, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.0945413618458075, | |
| "grad_norm": 15.707389951014497, | |
| "learning_rate": 0.00022637795275590547, | |
| "loss": 0.545, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.0973550928531233, | |
| "grad_norm": 21.377996746325916, | |
| "learning_rate": 0.00022615923009623794, | |
| "loss": 0.5169, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.100168823860439, | |
| "grad_norm": 31.777506839495466, | |
| "learning_rate": 0.0002259405074365704, | |
| "loss": 0.8108, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.1029825548677545, | |
| "grad_norm": 17.123955716971917, | |
| "learning_rate": 0.00022572178477690288, | |
| "loss": 0.646, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.1057962858750703, | |
| "grad_norm": 36.33273283007592, | |
| "learning_rate": 0.00022550306211723535, | |
| "loss": 0.7394, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.108610016882386, | |
| "grad_norm": 18.821091198359895, | |
| "learning_rate": 0.0002252843394575678, | |
| "loss": 0.5749, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.1114237478897018, | |
| "grad_norm": 10.61252309290639, | |
| "learning_rate": 0.00022506561679790023, | |
| "loss": 0.5137, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.1142374788970175, | |
| "grad_norm": 13.982125625637144, | |
| "learning_rate": 0.0002248468941382327, | |
| "loss": 0.6647, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.117051209904333, | |
| "grad_norm": 42.981624236536284, | |
| "learning_rate": 0.00022462817147856514, | |
| "loss": 0.5238, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.1198649409116488, | |
| "grad_norm": 27.54632412072402, | |
| "learning_rate": 0.00022440944881889761, | |
| "loss": 0.6707, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.1226786719189645, | |
| "grad_norm": 23.75601752367408, | |
| "learning_rate": 0.00022419072615923008, | |
| "loss": 0.8241, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.1254924029262803, | |
| "grad_norm": 32.47147919838835, | |
| "learning_rate": 0.00022397200349956255, | |
| "loss": 0.6688, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.128306133933596, | |
| "grad_norm": 18.348598242445995, | |
| "learning_rate": 0.00022375328083989497, | |
| "loss": 0.5989, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.1311198649409118, | |
| "grad_norm": 12.324401214506343, | |
| "learning_rate": 0.00022353455818022744, | |
| "loss": 0.5851, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.1339335959482273, | |
| "grad_norm": 17.172404997190124, | |
| "learning_rate": 0.0002233158355205599, | |
| "loss": 0.5568, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.136747326955543, | |
| "grad_norm": 23.04273982046674, | |
| "learning_rate": 0.00022309711286089238, | |
| "loss": 0.5822, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.1395610579628588, | |
| "grad_norm": 7.802471124937242, | |
| "learning_rate": 0.00022287839020122482, | |
| "loss": 0.4082, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.1423747889701745, | |
| "grad_norm": 11.2591451273763, | |
| "learning_rate": 0.0002226596675415573, | |
| "loss": 0.4935, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.14518851997749, | |
| "grad_norm": 9.837614966281794, | |
| "learning_rate": 0.00022244094488188976, | |
| "loss": 0.7146, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.1480022509848058, | |
| "grad_norm": 10.946277605810202, | |
| "learning_rate": 0.00022222222222222218, | |
| "loss": 0.6566, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.1508159819921215, | |
| "grad_norm": 17.85614689532493, | |
| "learning_rate": 0.00022200349956255465, | |
| "loss": 0.4754, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.1536297129994373, | |
| "grad_norm": 24.06573596597373, | |
| "learning_rate": 0.00022178477690288712, | |
| "loss": 0.6678, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.156443444006753, | |
| "grad_norm": 23.587510010090995, | |
| "learning_rate": 0.0002215660542432196, | |
| "loss": 0.5681, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.1592571750140688, | |
| "grad_norm": 15.082272570896748, | |
| "learning_rate": 0.00022134733158355206, | |
| "loss": 0.4931, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.1620709060213843, | |
| "grad_norm": 34.70870912668241, | |
| "learning_rate": 0.0002211286089238845, | |
| "loss": 0.5661, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.1648846370287, | |
| "grad_norm": 11.548252271349462, | |
| "learning_rate": 0.00022090988626421694, | |
| "loss": 0.4761, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.1676983680360158, | |
| "grad_norm": 28.412960573382996, | |
| "learning_rate": 0.00022069116360454941, | |
| "loss": 0.5887, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.1705120990433315, | |
| "grad_norm": 10.783805303855186, | |
| "learning_rate": 0.00022047244094488186, | |
| "loss": 0.5948, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.173325830050647, | |
| "grad_norm": 24.93639136840195, | |
| "learning_rate": 0.00022025371828521433, | |
| "loss": 0.6058, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.1761395610579628, | |
| "grad_norm": 26.39448202076931, | |
| "learning_rate": 0.0002200349956255468, | |
| "loss": 0.607, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.1789532920652785, | |
| "grad_norm": 14.904096598298732, | |
| "learning_rate": 0.00021981627296587927, | |
| "loss": 0.548, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.1817670230725943, | |
| "grad_norm": 17.73765885454678, | |
| "learning_rate": 0.00021959755030621168, | |
| "loss": 0.7417, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.18458075407991, | |
| "grad_norm": 2.867842719491419, | |
| "learning_rate": 0.00021937882764654415, | |
| "loss": 0.6395, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.1873944850872258, | |
| "grad_norm": 34.56602745611629, | |
| "learning_rate": 0.00021916010498687662, | |
| "loss": 0.5349, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.1902082160945413, | |
| "grad_norm": 22.77844815887848, | |
| "learning_rate": 0.0002189413823272091, | |
| "loss": 0.61, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.193021947101857, | |
| "grad_norm": 42.626450175565964, | |
| "learning_rate": 0.00021872265966754154, | |
| "loss": 0.7502, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.1958356781091728, | |
| "grad_norm": 10.693548169842728, | |
| "learning_rate": 0.000218503937007874, | |
| "loss": 0.4929, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.1986494091164885, | |
| "grad_norm": 34.637471031794966, | |
| "learning_rate": 0.00021828521434820645, | |
| "loss": 0.5426, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.201463140123804, | |
| "grad_norm": 43.976042152968205, | |
| "learning_rate": 0.0002180664916885389, | |
| "loss": 0.4653, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.2042768711311198, | |
| "grad_norm": 6.320882760905354, | |
| "learning_rate": 0.00021784776902887136, | |
| "loss": 0.8992, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.2070906021384356, | |
| "grad_norm": 8.502068954123935, | |
| "learning_rate": 0.00021762904636920383, | |
| "loss": 0.6101, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.2099043331457513, | |
| "grad_norm": 11.429050183991922, | |
| "learning_rate": 0.0002174103237095363, | |
| "loss": 0.5721, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.212718064153067, | |
| "grad_norm": 17.562444133601815, | |
| "learning_rate": 0.00021719160104986874, | |
| "loss": 0.6881, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.2155317951603828, | |
| "grad_norm": 21.19106486102643, | |
| "learning_rate": 0.00021697287839020121, | |
| "loss": 0.5565, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.2183455261676983, | |
| "grad_norm": 12.164118551052857, | |
| "learning_rate": 0.00021675415573053366, | |
| "loss": 0.6187, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.221159257175014, | |
| "grad_norm": 5.033893258856872, | |
| "learning_rate": 0.00021653543307086613, | |
| "loss": 0.2693, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.2239729881823298, | |
| "grad_norm": 1.4732793797472918, | |
| "learning_rate": 0.00021631671041119857, | |
| "loss": 1.0616, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.2267867191896455, | |
| "grad_norm": 8.376633978447819, | |
| "learning_rate": 0.00021609798775153104, | |
| "loss": 0.8353, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.229600450196961, | |
| "grad_norm": 30.38632947822225, | |
| "learning_rate": 0.0002158792650918635, | |
| "loss": 0.7668, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.2324141812042768, | |
| "grad_norm": 20.42829408507086, | |
| "learning_rate": 0.00021566054243219598, | |
| "loss": 0.5452, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.2352279122115926, | |
| "grad_norm": 11.244453757125129, | |
| "learning_rate": 0.0002154418197725284, | |
| "loss": 0.5771, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.2380416432189083, | |
| "grad_norm": 29.01355049880867, | |
| "learning_rate": 0.00021522309711286087, | |
| "loss": 0.7755, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.240855374226224, | |
| "grad_norm": 22.1695788769221, | |
| "learning_rate": 0.00021500437445319334, | |
| "loss": 0.6102, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.2436691052335398, | |
| "grad_norm": 11.814950010874579, | |
| "learning_rate": 0.00021478565179352578, | |
| "loss": 0.5579, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.2464828362408553, | |
| "grad_norm": 25.70339419099322, | |
| "learning_rate": 0.00021456692913385825, | |
| "loss": 0.6162, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.249296567248171, | |
| "grad_norm": 4.252865920700129, | |
| "learning_rate": 0.00021434820647419072, | |
| "loss": 0.4195, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.2521102982554868, | |
| "grad_norm": 38.698082556525144, | |
| "learning_rate": 0.00021412948381452316, | |
| "loss": 0.5526, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.2549240292628026, | |
| "grad_norm": 7.8381650122365025, | |
| "learning_rate": 0.0002139107611548556, | |
| "loss": 0.5447, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.257737760270118, | |
| "grad_norm": 14.386500677754873, | |
| "learning_rate": 0.00021369203849518808, | |
| "loss": 0.5332, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.2605514912774338, | |
| "grad_norm": 10.393563025135272, | |
| "learning_rate": 0.00021347331583552055, | |
| "loss": 0.3905, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.2633652222847496, | |
| "grad_norm": 11.830727306060455, | |
| "learning_rate": 0.00021325459317585302, | |
| "loss": 0.6539, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.2661789532920653, | |
| "grad_norm": 14.042878553076964, | |
| "learning_rate": 0.00021303587051618546, | |
| "loss": 0.7558, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.268992684299381, | |
| "grad_norm": 9.152885609833971, | |
| "learning_rate": 0.0002128171478565179, | |
| "loss": 0.684, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.2718064153066968, | |
| "grad_norm": 29.21214553934626, | |
| "learning_rate": 0.00021262029746281714, | |
| "loss": 0.6722, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.2746201463140123, | |
| "grad_norm": 31.814467115420808, | |
| "learning_rate": 0.00021240157480314958, | |
| "loss": 0.5875, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.277433877321328, | |
| "grad_norm": 11.976587331588098, | |
| "learning_rate": 0.00021218285214348205, | |
| "loss": 0.5788, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.2802476083286438, | |
| "grad_norm": 9.351382128729613, | |
| "learning_rate": 0.00021196412948381452, | |
| "loss": 0.5404, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.2830613393359596, | |
| "grad_norm": 9.993287787206937, | |
| "learning_rate": 0.000211745406824147, | |
| "loss": 0.574, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.285875070343275, | |
| "grad_norm": 0.9168680830567139, | |
| "learning_rate": 0.0002115266841644794, | |
| "loss": 0.812, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.2886888013505908, | |
| "grad_norm": 6.660390815631498, | |
| "learning_rate": 0.00021130796150481187, | |
| "loss": 0.7979, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.2915025323579066, | |
| "grad_norm": 10.67143801901763, | |
| "learning_rate": 0.00021108923884514434, | |
| "loss": 0.5466, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.2943162633652223, | |
| "grad_norm": 17.62423042213442, | |
| "learning_rate": 0.00021087051618547681, | |
| "loss": 0.431, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.297129994372538, | |
| "grad_norm": 15.617571509727133, | |
| "learning_rate": 0.00021065179352580926, | |
| "loss": 0.5231, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.2999437253798538, | |
| "grad_norm": 58.40350186744155, | |
| "learning_rate": 0.00021043307086614173, | |
| "loss": 0.4857, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.3027574563871693, | |
| "grad_norm": 15.519074842077424, | |
| "learning_rate": 0.00021021434820647417, | |
| "loss": 0.4439, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.305571187394485, | |
| "grad_norm": 23.71709936979936, | |
| "learning_rate": 0.0002099956255468066, | |
| "loss": 0.514, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.3083849184018008, | |
| "grad_norm": 14.117780601189649, | |
| "learning_rate": 0.00020977690288713908, | |
| "loss": 0.6811, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.3111986494091166, | |
| "grad_norm": 31.859641559976787, | |
| "learning_rate": 0.00020955818022747155, | |
| "loss": 0.7653, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.314012380416432, | |
| "grad_norm": 4.62858313326057, | |
| "learning_rate": 0.00020933945756780402, | |
| "loss": 0.56, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.3168261114237478, | |
| "grad_norm": 32.35923134160814, | |
| "learning_rate": 0.00020912073490813647, | |
| "loss": 0.581, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.3196398424310636, | |
| "grad_norm": 11.88084068278056, | |
| "learning_rate": 0.0002089020122484689, | |
| "loss": 0.5339, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.3224535734383793, | |
| "grad_norm": 9.520992713167384, | |
| "learning_rate": 0.00020868328958880138, | |
| "loss": 0.782, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.325267304445695, | |
| "grad_norm": 22.853640876872127, | |
| "learning_rate": 0.00020846456692913385, | |
| "loss": 0.6565, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.3280810354530108, | |
| "grad_norm": 3.8605452401376685, | |
| "learning_rate": 0.0002082458442694663, | |
| "loss": 0.5357, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.3308947664603263, | |
| "grad_norm": 39.18854892428108, | |
| "learning_rate": 0.00020802712160979876, | |
| "loss": 0.6124, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.333708497467642, | |
| "grad_norm": 12.900555694355658, | |
| "learning_rate": 0.00020780839895013123, | |
| "loss": 0.5629, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.3365222284749578, | |
| "grad_norm": 30.260254281976717, | |
| "learning_rate": 0.00020758967629046365, | |
| "loss": 0.6766, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.3393359594822736, | |
| "grad_norm": 5.549604555260689, | |
| "learning_rate": 0.00020737095363079612, | |
| "loss": 0.6215, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.342149690489589, | |
| "grad_norm": 9.606804838953646, | |
| "learning_rate": 0.0002071522309711286, | |
| "loss": 0.5282, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.3449634214969048, | |
| "grad_norm": 21.0629637183242, | |
| "learning_rate": 0.00020693350831146106, | |
| "loss": 0.567, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.3477771525042206, | |
| "grad_norm": 35.449435817589865, | |
| "learning_rate": 0.0002067147856517935, | |
| "loss": 0.5674, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.3505908835115363, | |
| "grad_norm": 11.42100394739649, | |
| "learning_rate": 0.00020649606299212597, | |
| "loss": 0.4903, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.353404614518852, | |
| "grad_norm": 46.973771954330346, | |
| "learning_rate": 0.00020627734033245844, | |
| "loss": 0.6514, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.3562183455261678, | |
| "grad_norm": 37.42810605601175, | |
| "learning_rate": 0.00020605861767279088, | |
| "loss": 0.7795, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.3590320765334833, | |
| "grad_norm": 17.86195496240817, | |
| "learning_rate": 0.00020583989501312333, | |
| "loss": 0.6856, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.361845807540799, | |
| "grad_norm": 4.510420970955073, | |
| "learning_rate": 0.0002056211723534558, | |
| "loss": 0.5515, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.3646595385481148, | |
| "grad_norm": 35.14529979476458, | |
| "learning_rate": 0.00020540244969378827, | |
| "loss": 0.8242, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.3674732695554306, | |
| "grad_norm": 12.97076064718709, | |
| "learning_rate": 0.00020518372703412074, | |
| "loss": 0.6027, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.370287000562746, | |
| "grad_norm": 37.34136036844645, | |
| "learning_rate": 0.00020496500437445318, | |
| "loss": 0.9699, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.3731007315700618, | |
| "grad_norm": 10.648747474625555, | |
| "learning_rate": 0.00020474628171478562, | |
| "loss": 0.5742, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.3759144625773776, | |
| "grad_norm": 38.35409796130347, | |
| "learning_rate": 0.0002045275590551181, | |
| "loss": 0.4493, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.3787281935846933, | |
| "grad_norm": 10.275255197278986, | |
| "learning_rate": 0.00020430883639545053, | |
| "loss": 0.5563, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.381541924592009, | |
| "grad_norm": 25.09323166849902, | |
| "learning_rate": 0.000204090113735783, | |
| "loss": 0.6363, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.3843556555993248, | |
| "grad_norm": 13.731524114353487, | |
| "learning_rate": 0.00020387139107611547, | |
| "loss": 0.8071, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.3871693866066404, | |
| "grad_norm": 10.601584528659094, | |
| "learning_rate": 0.00020365266841644794, | |
| "loss": 0.4743, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.389983117613956, | |
| "grad_norm": 20.873005848848994, | |
| "learning_rate": 0.00020343394575678036, | |
| "loss": 0.5681, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.3927968486212718, | |
| "grad_norm": 6.779807648777697, | |
| "learning_rate": 0.00020321522309711283, | |
| "loss": 0.4404, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.3956105796285876, | |
| "grad_norm": 52.49716946373782, | |
| "learning_rate": 0.0002029965004374453, | |
| "loss": 0.5812, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.3984243106359031, | |
| "grad_norm": 10.370627801920397, | |
| "learning_rate": 0.00020277777777777777, | |
| "loss": 0.6843, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.4012380416432189, | |
| "grad_norm": 22.239749529632242, | |
| "learning_rate": 0.0002025590551181102, | |
| "loss": 0.6977, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.4040517726505346, | |
| "grad_norm": 14.1275030867891, | |
| "learning_rate": 0.00020234033245844268, | |
| "loss": 0.4539, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.4068655036578503, | |
| "grad_norm": 8.99633615310471, | |
| "learning_rate": 0.00020212160979877513, | |
| "loss": 0.6492, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.409679234665166, | |
| "grad_norm": 22.805411368810102, | |
| "learning_rate": 0.0002019028871391076, | |
| "loss": 0.6462, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.4124929656724818, | |
| "grad_norm": 6.381125315859451, | |
| "learning_rate": 0.00020168416447944004, | |
| "loss": 0.6981, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.4153066966797974, | |
| "grad_norm": 7.54030708950237, | |
| "learning_rate": 0.0002014654418197725, | |
| "loss": 0.4331, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.418120427687113, | |
| "grad_norm": 12.137237315522457, | |
| "learning_rate": 0.00020124671916010498, | |
| "loss": 0.5546, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.4209341586944289, | |
| "grad_norm": 59.47494361525208, | |
| "learning_rate": 0.00020102799650043745, | |
| "loss": 0.5772, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.4237478897017446, | |
| "grad_norm": 31.495786286643714, | |
| "learning_rate": 0.0002008092738407699, | |
| "loss": 0.6932, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.4265616207090601, | |
| "grad_norm": 18.486318084665708, | |
| "learning_rate": 0.00020059055118110234, | |
| "loss": 0.4995, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.4293753517163759, | |
| "grad_norm": 10.238292416097469, | |
| "learning_rate": 0.0002003718285214348, | |
| "loss": 0.6652, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.4321890827236916, | |
| "grad_norm": 4.579110553754593, | |
| "learning_rate": 0.00020015310586176725, | |
| "loss": 0.5336, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.4350028137310074, | |
| "grad_norm": 34.098434311756876, | |
| "learning_rate": 0.00019993438320209972, | |
| "loss": 0.765, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.437816544738323, | |
| "grad_norm": 35.700128715881476, | |
| "learning_rate": 0.0001997156605424322, | |
| "loss": 0.727, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.4406302757456388, | |
| "grad_norm": 28.138298504559835, | |
| "learning_rate": 0.00019949693788276466, | |
| "loss": 0.695, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.4434440067529544, | |
| "grad_norm": 23.026654117472113, | |
| "learning_rate": 0.00019927821522309707, | |
| "loss": 0.5303, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.4462577377602701, | |
| "grad_norm": 9.046788588294012, | |
| "learning_rate": 0.00019905949256342954, | |
| "loss": 0.4019, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.4490714687675859, | |
| "grad_norm": 21.468136979947417, | |
| "learning_rate": 0.00019884076990376201, | |
| "loss": 0.2962, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.4518851997749016, | |
| "grad_norm": 26.34398373401709, | |
| "learning_rate": 0.00019862204724409448, | |
| "loss": 1.0659, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.4546989307822171, | |
| "grad_norm": 17.669274446566238, | |
| "learning_rate": 0.00019840332458442693, | |
| "loss": 0.5564, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.4575126617895329, | |
| "grad_norm": 3.3651916727576987, | |
| "learning_rate": 0.0001981846019247594, | |
| "loss": 0.4807, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.4603263927968486, | |
| "grad_norm": 11.603663088020909, | |
| "learning_rate": 0.00019796587926509184, | |
| "loss": 0.5503, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.4631401238041644, | |
| "grad_norm": 23.63460879726596, | |
| "learning_rate": 0.00019774715660542428, | |
| "loss": 0.7177, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.46595385481148, | |
| "grad_norm": 2.9074863920622134, | |
| "learning_rate": 0.00019752843394575675, | |
| "loss": 0.3413, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.4687675858187959, | |
| "grad_norm": 31.091134376352294, | |
| "learning_rate": 0.00019730971128608922, | |
| "loss": 0.7108, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.4715813168261114, | |
| "grad_norm": 22.73781393986795, | |
| "learning_rate": 0.0001970909886264217, | |
| "loss": 0.7106, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.4743950478334271, | |
| "grad_norm": 10.733654149323753, | |
| "learning_rate": 0.00019687226596675416, | |
| "loss": 0.3555, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.4772087788407429, | |
| "grad_norm": 43.418587910591356, | |
| "learning_rate": 0.00019665354330708658, | |
| "loss": 0.6709, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.4800225098480584, | |
| "grad_norm": 10.924870366111936, | |
| "learning_rate": 0.00019643482064741905, | |
| "loss": 0.5829, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.4828362408553741, | |
| "grad_norm": 4.141398446252563, | |
| "learning_rate": 0.00019621609798775152, | |
| "loss": 0.4006, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.4856499718626899, | |
| "grad_norm": 25.642802616147556, | |
| "learning_rate": 0.00019599737532808396, | |
| "loss": 0.8251, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.4884637028700056, | |
| "grad_norm": 27.534126408595263, | |
| "learning_rate": 0.00019577865266841643, | |
| "loss": 1.0336, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.4912774338773214, | |
| "grad_norm": 6.160307363496283, | |
| "learning_rate": 0.0001955599300087489, | |
| "loss": 0.7243, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.4940911648846371, | |
| "grad_norm": 13.152914687437683, | |
| "learning_rate": 0.00019534120734908137, | |
| "loss": 0.6427, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.4969048958919529, | |
| "grad_norm": 9.301055295352276, | |
| "learning_rate": 0.0001951224846894138, | |
| "loss": 0.6152, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.4997186268992684, | |
| "grad_norm": 6.722778731476633, | |
| "learning_rate": 0.00019490376202974626, | |
| "loss": 0.5331, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.5025323579065841, | |
| "grad_norm": 8.186600974279003, | |
| "learning_rate": 0.00019468503937007873, | |
| "loss": 0.4851, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.5053460889138999, | |
| "grad_norm": 12.233978539104966, | |
| "learning_rate": 0.0001944663167104112, | |
| "loss": 0.5694, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.5081598199212154, | |
| "grad_norm": 35.97067266871921, | |
| "learning_rate": 0.00019424759405074364, | |
| "loss": 0.7976, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.5109735509285311, | |
| "grad_norm": 16.314895195522084, | |
| "learning_rate": 0.0001940288713910761, | |
| "loss": 0.5439, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.5137872819358469, | |
| "grad_norm": 16.9947029531932, | |
| "learning_rate": 0.00019381014873140855, | |
| "loss": 0.4797, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.5166010129431626, | |
| "grad_norm": 22.886764769826087, | |
| "learning_rate": 0.000193591426071741, | |
| "loss": 0.5191, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.5194147439504784, | |
| "grad_norm": 5.870017090348409, | |
| "learning_rate": 0.00019337270341207347, | |
| "loss": 0.5474, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.5222284749577941, | |
| "grad_norm": 28.06407341443698, | |
| "learning_rate": 0.00019315398075240594, | |
| "loss": 0.69, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.5250422059651099, | |
| "grad_norm": 14.781385104588194, | |
| "learning_rate": 0.0001929352580927384, | |
| "loss": 0.5645, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.5278559369724254, | |
| "grad_norm": 5.855766754825115, | |
| "learning_rate": 0.00019271653543307085, | |
| "loss": 0.4795, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.5306696679797411, | |
| "grad_norm": 22.918135069417044, | |
| "learning_rate": 0.0001924978127734033, | |
| "loss": 0.5786, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.5334833989870569, | |
| "grad_norm": 17.207456518107474, | |
| "learning_rate": 0.00019227909011373576, | |
| "loss": 0.651, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.5362971299943724, | |
| "grad_norm": 5.184301427212219, | |
| "learning_rate": 0.00019206036745406823, | |
| "loss": 0.415, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.5391108610016881, | |
| "grad_norm": 16.094276621452206, | |
| "learning_rate": 0.00019184164479440067, | |
| "loss": 0.5126, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.541924592009004, | |
| "grad_norm": 27.587401306674103, | |
| "learning_rate": 0.00019162292213473314, | |
| "loss": 0.5171, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.5447383230163196, | |
| "grad_norm": 36.812039328705936, | |
| "learning_rate": 0.00019140419947506561, | |
| "loss": 0.573, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.5475520540236354, | |
| "grad_norm": 18.270164053028488, | |
| "learning_rate": 0.00019118547681539803, | |
| "loss": 0.4896, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.5503657850309511, | |
| "grad_norm": 21.200268676017966, | |
| "learning_rate": 0.0001909667541557305, | |
| "loss": 0.5086, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.5531795160382669, | |
| "grad_norm": 2.634500985821002, | |
| "learning_rate": 0.00019074803149606297, | |
| "loss": 0.5153, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.5559932470455824, | |
| "grad_norm": 31.303796019116458, | |
| "learning_rate": 0.00019052930883639544, | |
| "loss": 0.5879, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.5588069780528981, | |
| "grad_norm": 13.767169050681202, | |
| "learning_rate": 0.0001903105861767279, | |
| "loss": 0.6354, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.5616207090602139, | |
| "grad_norm": 20.81439861452622, | |
| "learning_rate": 0.00019009186351706035, | |
| "loss": 0.5908, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.5644344400675294, | |
| "grad_norm": 24.25248324077591, | |
| "learning_rate": 0.00018987314085739282, | |
| "loss": 0.568, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.5672481710748452, | |
| "grad_norm": 20.3474642773289, | |
| "learning_rate": 0.000189676290463692, | |
| "loss": 0.6771, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.570061902082161, | |
| "grad_norm": 27.420648385460662, | |
| "learning_rate": 0.00018945756780402447, | |
| "loss": 0.7918, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.5728756330894766, | |
| "grad_norm": 12.762719969507081, | |
| "learning_rate": 0.00018923884514435694, | |
| "loss": 0.6914, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.5756893640967924, | |
| "grad_norm": 12.90335156682279, | |
| "learning_rate": 0.0001890201224846894, | |
| "loss": 0.6175, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.5785030951041081, | |
| "grad_norm": 21.08111096971608, | |
| "learning_rate": 0.00018880139982502188, | |
| "loss": 0.4938, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.5813168261114239, | |
| "grad_norm": 10.112944768654387, | |
| "learning_rate": 0.0001885826771653543, | |
| "loss": 0.5957, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.5841305571187394, | |
| "grad_norm": 11.817780491387401, | |
| "learning_rate": 0.00018836395450568677, | |
| "loss": 0.616, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.5869442881260551, | |
| "grad_norm": 13.413123522838792, | |
| "learning_rate": 0.00018814523184601924, | |
| "loss": 0.5436, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.589758019133371, | |
| "grad_norm": 8.5007708126369, | |
| "learning_rate": 0.00018792650918635168, | |
| "loss": 0.5707, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.5925717501406864, | |
| "grad_norm": 6.355914825325111, | |
| "learning_rate": 0.00018770778652668415, | |
| "loss": 0.5345, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.5953854811480022, | |
| "grad_norm": 18.871338179625443, | |
| "learning_rate": 0.00018748906386701662, | |
| "loss": 0.5581, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.598199212155318, | |
| "grad_norm": 22.113665794555953, | |
| "learning_rate": 0.00018727034120734904, | |
| "loss": 0.3793, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.6010129431626337, | |
| "grad_norm": 18.640229312889087, | |
| "learning_rate": 0.0001870516185476815, | |
| "loss": 0.6424, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.6038266741699494, | |
| "grad_norm": 9.013049101470614, | |
| "learning_rate": 0.00018683289588801398, | |
| "loss": 0.6761, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.6066404051772651, | |
| "grad_norm": 24.66632774615283, | |
| "learning_rate": 0.00018661417322834645, | |
| "loss": 0.4636, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.6094541361845809, | |
| "grad_norm": 21.359311361155275, | |
| "learning_rate": 0.00018639545056867892, | |
| "loss": 0.3643, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.6122678671918964, | |
| "grad_norm": 17.552845045440993, | |
| "learning_rate": 0.00018617672790901136, | |
| "loss": 0.7695, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.6150815981992122, | |
| "grad_norm": 14.750302688475113, | |
| "learning_rate": 0.0001859580052493438, | |
| "loss": 0.7166, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.617895329206528, | |
| "grad_norm": 14.017729808491103, | |
| "learning_rate": 0.00018573928258967627, | |
| "loss": 0.5689, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.6207090602138434, | |
| "grad_norm": 27.853805455956927, | |
| "learning_rate": 0.00018552055993000872, | |
| "loss": 0.6315, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.6235227912211592, | |
| "grad_norm": 16.4717416832815, | |
| "learning_rate": 0.0001853018372703412, | |
| "loss": 0.4896, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.626336522228475, | |
| "grad_norm": 11.48773947806387, | |
| "learning_rate": 0.00018508311461067366, | |
| "loss": 0.5196, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.6291502532357907, | |
| "grad_norm": 55.37757053824189, | |
| "learning_rate": 0.00018486439195100613, | |
| "loss": 0.6459, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.6319639842431064, | |
| "grad_norm": 22.81165151193899, | |
| "learning_rate": 0.00018464566929133857, | |
| "loss": 0.5926, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.6347777152504221, | |
| "grad_norm": 19.036953260995485, | |
| "learning_rate": 0.000184426946631671, | |
| "loss": 0.9008, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.637591446257738, | |
| "grad_norm": 25.57500606412806, | |
| "learning_rate": 0.00018420822397200348, | |
| "loss": 0.5999, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.6404051772650534, | |
| "grad_norm": 8.891606826403597, | |
| "learning_rate": 0.00018398950131233595, | |
| "loss": 0.5492, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.6432189082723692, | |
| "grad_norm": 7.0897653575377975, | |
| "learning_rate": 0.0001837707786526684, | |
| "loss": 0.4375, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.646032639279685, | |
| "grad_norm": 16.82282152611567, | |
| "learning_rate": 0.00018355205599300087, | |
| "loss": 0.6416, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.6488463702870004, | |
| "grad_norm": 26.076012233816623, | |
| "learning_rate": 0.00018333333333333334, | |
| "loss": 0.7995, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.6516601012943162, | |
| "grad_norm": 6.103373372823494, | |
| "learning_rate": 0.00018311461067366575, | |
| "loss": 0.513, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.654473832301632, | |
| "grad_norm": 7.46141704246519, | |
| "learning_rate": 0.00018289588801399822, | |
| "loss": 0.442, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.6572875633089477, | |
| "grad_norm": 21.657859712145655, | |
| "learning_rate": 0.0001826771653543307, | |
| "loss": 0.6058, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.6601012943162634, | |
| "grad_norm": 23.56206415921756, | |
| "learning_rate": 0.00018245844269466316, | |
| "loss": 0.609, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.6629150253235792, | |
| "grad_norm": 11.96355285804545, | |
| "learning_rate": 0.00018223972003499563, | |
| "loss": 0.4169, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.665728756330895, | |
| "grad_norm": 15.80001057748199, | |
| "learning_rate": 0.00018202099737532807, | |
| "loss": 0.7119, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.6685424873382104, | |
| "grad_norm": 24.01734519933029, | |
| "learning_rate": 0.00018180227471566052, | |
| "loss": 0.6546, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.6713562183455262, | |
| "grad_norm": 12.082586359258165, | |
| "learning_rate": 0.000181583552055993, | |
| "loss": 0.7743, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.674169949352842, | |
| "grad_norm": 16.8076808139855, | |
| "learning_rate": 0.00018136482939632543, | |
| "loss": 0.5819, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.6769836803601574, | |
| "grad_norm": 16.864221341224397, | |
| "learning_rate": 0.0001811461067366579, | |
| "loss": 0.6483, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.6797974113674732, | |
| "grad_norm": 11.102468101320996, | |
| "learning_rate": 0.00018092738407699037, | |
| "loss": 0.5152, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.682611142374789, | |
| "grad_norm": 17.6010512401763, | |
| "learning_rate": 0.00018070866141732284, | |
| "loss": 0.5134, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.6854248733821047, | |
| "grad_norm": 8.25091098683039, | |
| "learning_rate": 0.00018048993875765526, | |
| "loss": 0.49, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.6882386043894204, | |
| "grad_norm": 7.5344372075509884, | |
| "learning_rate": 0.00018027121609798773, | |
| "loss": 0.4619, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6910523353967362, | |
| "grad_norm": 23.21545471999833, | |
| "learning_rate": 0.0001800524934383202, | |
| "loss": 0.8264, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.693866066404052, | |
| "grad_norm": 15.393641748407818, | |
| "learning_rate": 0.00017983377077865267, | |
| "loss": 0.6024, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.6966797974113674, | |
| "grad_norm": 12.417067525367335, | |
| "learning_rate": 0.0001796150481189851, | |
| "loss": 0.6584, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.6994935284186832, | |
| "grad_norm": 15.042896501382003, | |
| "learning_rate": 0.00017939632545931758, | |
| "loss": 0.4492, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.702307259425999, | |
| "grad_norm": 9.115061298735506, | |
| "learning_rate": 0.00017917760279965005, | |
| "loss": 0.4221, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.7051209904333144, | |
| "grad_norm": 0.6607374478724659, | |
| "learning_rate": 0.00017895888013998246, | |
| "loss": 0.434, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.7079347214406302, | |
| "grad_norm": 33.803698820392704, | |
| "learning_rate": 0.00017878390201224846, | |
| "loss": 1.9177, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.710748452447946, | |
| "grad_norm": 361.56918934904206, | |
| "learning_rate": 0.00017856517935258093, | |
| "loss": 2.013, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.7135621834552617, | |
| "grad_norm": 44.98806827034684, | |
| "learning_rate": 0.00017834645669291338, | |
| "loss": 1.6814, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.7163759144625774, | |
| "grad_norm": 22.283635215772854, | |
| "learning_rate": 0.00017812773403324582, | |
| "loss": 0.795, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.7191896454698932, | |
| "grad_norm": 105.85751003748128, | |
| "learning_rate": 0.0001779090113735783, | |
| "loss": 2.4138, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.722003376477209, | |
| "grad_norm": 85.01552368225332, | |
| "learning_rate": 0.00017769028871391076, | |
| "loss": 3.8929, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.7248171074845244, | |
| "grad_norm": 44.282971732965535, | |
| "learning_rate": 0.0001774715660542432, | |
| "loss": 0.7031, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.7276308384918402, | |
| "grad_norm": 298.69778969364853, | |
| "learning_rate": 0.00017725284339457567, | |
| "loss": 0.5441, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.730444569499156, | |
| "grad_norm": 41.66809813265777, | |
| "learning_rate": 0.00017703412073490814, | |
| "loss": 0.7863, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.7332583005064714, | |
| "grad_norm": 34.03455804697322, | |
| "learning_rate": 0.00017681539807524056, | |
| "loss": 0.9071, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.7360720315137872, | |
| "grad_norm": 137.98952284030946, | |
| "learning_rate": 0.00017659667541557303, | |
| "loss": 1.5913, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.738885762521103, | |
| "grad_norm": 54.35499220435977, | |
| "learning_rate": 0.0001763779527559055, | |
| "loss": 1.4096, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.7416994935284187, | |
| "grad_norm": 72.22077387735027, | |
| "learning_rate": 0.00017615923009623797, | |
| "loss": 0.6111, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.7445132245357344, | |
| "grad_norm": 11.271321807307686, | |
| "learning_rate": 0.0001759405074365704, | |
| "loss": 0.8519, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.7473269555430502, | |
| "grad_norm": 50.02675742399026, | |
| "learning_rate": 0.00017572178477690288, | |
| "loss": 0.9388, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.750140686550366, | |
| "grad_norm": 30.543850975892273, | |
| "learning_rate": 0.00017550306211723532, | |
| "loss": 0.6496, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.7529544175576814, | |
| "grad_norm": 17.096512987881336, | |
| "learning_rate": 0.0001752843394575678, | |
| "loss": 0.8429, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.7557681485649972, | |
| "grad_norm": 10.875495203297126, | |
| "learning_rate": 0.00017506561679790024, | |
| "loss": 0.424, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.758581879572313, | |
| "grad_norm": 28.472277481379553, | |
| "learning_rate": 0.0001748468941382327, | |
| "loss": 0.5346, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.7613956105796285, | |
| "grad_norm": 84.611446382734, | |
| "learning_rate": 0.00017462817147856518, | |
| "loss": 0.7309, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.7642093415869442, | |
| "grad_norm": 83.93394745603818, | |
| "learning_rate": 0.00017440944881889765, | |
| "loss": 0.7314, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.76702307259426, | |
| "grad_norm": 29.72369442712257, | |
| "learning_rate": 0.00017419072615923006, | |
| "loss": 0.4404, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.7698368036015757, | |
| "grad_norm": 33.47447474767712, | |
| "learning_rate": 0.00017397200349956253, | |
| "loss": 0.5264, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.7726505346088914, | |
| "grad_norm": 112.25470154565467, | |
| "learning_rate": 0.000173753280839895, | |
| "loss": 0.5341, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.7754642656162072, | |
| "grad_norm": 5.004631103885064, | |
| "learning_rate": 0.00017353455818022744, | |
| "loss": 0.7944, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.778277996623523, | |
| "grad_norm": 36.206284557597996, | |
| "learning_rate": 0.00017331583552055991, | |
| "loss": 0.6088, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.7810917276308385, | |
| "grad_norm": 114.83303534732538, | |
| "learning_rate": 0.00017309711286089238, | |
| "loss": 0.8535, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.7839054586381542, | |
| "grad_norm": 39.25126961762459, | |
| "learning_rate": 0.00017287839020122485, | |
| "loss": 0.4341, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.78671918964547, | |
| "grad_norm": 38.887489483647045, | |
| "learning_rate": 0.00017265966754155727, | |
| "loss": 0.6262, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.7895329206527855, | |
| "grad_norm": 14.662335403344557, | |
| "learning_rate": 0.00017244094488188974, | |
| "loss": 0.7171, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.7923466516601012, | |
| "grad_norm": 12.888841929949086, | |
| "learning_rate": 0.0001722222222222222, | |
| "loss": 0.5094, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.795160382667417, | |
| "grad_norm": 22.26070054592782, | |
| "learning_rate": 0.00017200349956255468, | |
| "loss": 0.4261, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.7979741136747327, | |
| "grad_norm": 23.038642054175508, | |
| "learning_rate": 0.00017178477690288712, | |
| "loss": 0.4987, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.8007878446820484, | |
| "grad_norm": 9.474105949765265, | |
| "learning_rate": 0.0001715660542432196, | |
| "loss": 0.5878, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.8036015756893642, | |
| "grad_norm": 70.27189577371828, | |
| "learning_rate": 0.00017134733158355204, | |
| "loss": 0.4774, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.80641530669668, | |
| "grad_norm": 26.61930756765317, | |
| "learning_rate": 0.00017112860892388448, | |
| "loss": 0.598, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.8092290377039955, | |
| "grad_norm": 12.533144473520764, | |
| "learning_rate": 0.00017090988626421695, | |
| "loss": 0.6024, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.8120427687113112, | |
| "grad_norm": 48.14804877192819, | |
| "learning_rate": 0.00017069116360454942, | |
| "loss": 0.9674, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.814856499718627, | |
| "grad_norm": 15.22684827666546, | |
| "learning_rate": 0.0001704724409448819, | |
| "loss": 0.7041, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.8176702307259425, | |
| "grad_norm": 43.53579267992454, | |
| "learning_rate": 0.00017025371828521436, | |
| "loss": 0.64, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.8204839617332582, | |
| "grad_norm": 41.19355041508803, | |
| "learning_rate": 0.00017003499562554677, | |
| "loss": 0.4662, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.823297692740574, | |
| "grad_norm": 44.036889353364195, | |
| "learning_rate": 0.00016981627296587924, | |
| "loss": 0.58, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.8261114237478897, | |
| "grad_norm": 3.448573380443346, | |
| "learning_rate": 0.00016959755030621171, | |
| "loss": 0.8725, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.8289251547552055, | |
| "grad_norm": 27.321094827902026, | |
| "learning_rate": 0.00016937882764654416, | |
| "loss": 0.5894, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.8317388857625212, | |
| "grad_norm": 11.550339390724506, | |
| "learning_rate": 0.00016916010498687663, | |
| "loss": 1.0566, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.834552616769837, | |
| "grad_norm": 29.635894647284605, | |
| "learning_rate": 0.0001689413823272091, | |
| "loss": 0.5362, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.8373663477771525, | |
| "grad_norm": 28.87624464201189, | |
| "learning_rate": 0.0001687226596675415, | |
| "loss": 0.4639, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.8401800787844682, | |
| "grad_norm": 20.63490125951859, | |
| "learning_rate": 0.00016850393700787398, | |
| "loss": 0.5236, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.842993809791784, | |
| "grad_norm": 24.50339308909374, | |
| "learning_rate": 0.00016828521434820645, | |
| "loss": 0.6794, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.8458075407990995, | |
| "grad_norm": 22.43711891156182, | |
| "learning_rate": 0.00016806649168853892, | |
| "loss": 0.6718, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.8486212718064152, | |
| "grad_norm": 12.312381142318516, | |
| "learning_rate": 0.0001678477690288714, | |
| "loss": 0.546, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.851435002813731, | |
| "grad_norm": 52.91882297397753, | |
| "learning_rate": 0.00016762904636920384, | |
| "loss": 0.6186, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.8542487338210467, | |
| "grad_norm": 16.248616548482175, | |
| "learning_rate": 0.0001674103237095363, | |
| "loss": 0.4443, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.8570624648283625, | |
| "grad_norm": 46.586186471184554, | |
| "learning_rate": 0.00016719160104986875, | |
| "loss": 0.6858, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.8598761958356782, | |
| "grad_norm": 19.395899136066642, | |
| "learning_rate": 0.0001669728783902012, | |
| "loss": 0.7021, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.862689926842994, | |
| "grad_norm": 18.858764154991857, | |
| "learning_rate": 0.00016675415573053366, | |
| "loss": 0.5572, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.8655036578503095, | |
| "grad_norm": 34.85053822034739, | |
| "learning_rate": 0.00016653543307086613, | |
| "loss": 0.5491, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.8683173888576252, | |
| "grad_norm": 153.58844319020815, | |
| "learning_rate": 0.0001663167104111986, | |
| "loss": 1.0728, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.871131119864941, | |
| "grad_norm": 35.47908415964911, | |
| "learning_rate": 0.00016609798775153105, | |
| "loss": 0.7507, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.8739448508722565, | |
| "grad_norm": 25.27011106317989, | |
| "learning_rate": 0.0001658792650918635, | |
| "loss": 0.4367, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.8767585818795722, | |
| "grad_norm": 44.64115963656757, | |
| "learning_rate": 0.00016566054243219596, | |
| "loss": 0.4281, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.879572312886888, | |
| "grad_norm": 12.745753520758505, | |
| "learning_rate": 0.00016544181977252843, | |
| "loss": 0.459, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.8823860438942037, | |
| "grad_norm": 44.33709000085202, | |
| "learning_rate": 0.00016522309711286087, | |
| "loss": 0.6107, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.8851997749015195, | |
| "grad_norm": 133.11619488605578, | |
| "learning_rate": 0.00016500437445319334, | |
| "loss": 0.7659, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.8880135059088352, | |
| "grad_norm": 31.166131234712104, | |
| "learning_rate": 0.0001647856517935258, | |
| "loss": 0.2729, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.890827236916151, | |
| "grad_norm": 254.4189742635797, | |
| "learning_rate": 0.00016456692913385823, | |
| "loss": 0.8195, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.8936409679234665, | |
| "grad_norm": 39.51199032081583, | |
| "learning_rate": 0.0001643482064741907, | |
| "loss": 0.6009, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.8964546989307822, | |
| "grad_norm": 39.900138315281346, | |
| "learning_rate": 0.00016412948381452317, | |
| "loss": 0.5433, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.899268429938098, | |
| "grad_norm": 29.49439914115921, | |
| "learning_rate": 0.00016391076115485564, | |
| "loss": 0.5698, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.9020821609454135, | |
| "grad_norm": 12.369579350171918, | |
| "learning_rate": 0.0001636920384951881, | |
| "loss": 0.5124, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.9048958919527292, | |
| "grad_norm": 35.497941038034284, | |
| "learning_rate": 0.00016347331583552055, | |
| "loss": 0.5609, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.907709622960045, | |
| "grad_norm": 19.91481826563306, | |
| "learning_rate": 0.000163254593175853, | |
| "loss": 0.5812, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.9105233539673607, | |
| "grad_norm": 14.286913944349674, | |
| "learning_rate": 0.00016303587051618546, | |
| "loss": 0.5505, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.9133370849746765, | |
| "grad_norm": 45.55623816014948, | |
| "learning_rate": 0.0001628171478565179, | |
| "loss": 0.5946, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.9161508159819922, | |
| "grad_norm": 37.12930100826606, | |
| "learning_rate": 0.00016259842519685038, | |
| "loss": 0.6553, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.918964546989308, | |
| "grad_norm": 5.951242571048711, | |
| "learning_rate": 0.00016237970253718285, | |
| "loss": 0.4606, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.9217782779966235, | |
| "grad_norm": 15.667646539342769, | |
| "learning_rate": 0.00016216097987751532, | |
| "loss": 0.5596, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.9245920090039392, | |
| "grad_norm": 31.888221180998954, | |
| "learning_rate": 0.00016194225721784776, | |
| "loss": 0.665, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.927405740011255, | |
| "grad_norm": 20.015849211223937, | |
| "learning_rate": 0.0001617235345581802, | |
| "loss": 0.4471, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.9302194710185705, | |
| "grad_norm": 19.065880675790694, | |
| "learning_rate": 0.00016150481189851267, | |
| "loss": 0.5152, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.9330332020258862, | |
| "grad_norm": 97.42577987957829, | |
| "learning_rate": 0.00016128608923884514, | |
| "loss": 0.719, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.935846933033202, | |
| "grad_norm": 21.461313685288744, | |
| "learning_rate": 0.00016106736657917758, | |
| "loss": 0.4803, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.9386606640405177, | |
| "grad_norm": 31.746099523443103, | |
| "learning_rate": 0.00016084864391951005, | |
| "loss": 0.672, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.9414743950478335, | |
| "grad_norm": 11.063287518374715, | |
| "learning_rate": 0.00016062992125984252, | |
| "loss": 0.6651, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.9442881260551492, | |
| "grad_norm": 1.3011595771705704, | |
| "learning_rate": 0.00016041119860017494, | |
| "loss": 0.3245, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.947101857062465, | |
| "grad_norm": 11.00869364626475, | |
| "learning_rate": 0.0001601924759405074, | |
| "loss": 0.6375, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.9499155880697805, | |
| "grad_norm": 95.25621911518874, | |
| "learning_rate": 0.00015997375328083988, | |
| "loss": 0.6515, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.9527293190770962, | |
| "grad_norm": 980.764196522124, | |
| "learning_rate": 0.00015975503062117235, | |
| "loss": 0.7356, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.955543050084412, | |
| "grad_norm": 37.87200039766416, | |
| "learning_rate": 0.0001595363079615048, | |
| "loss": 0.5817, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.9583567810917275, | |
| "grad_norm": 17.35128744114319, | |
| "learning_rate": 0.00015931758530183726, | |
| "loss": 0.7061, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.9611705120990433, | |
| "grad_norm": 74.26952030970506, | |
| "learning_rate": 0.0001590988626421697, | |
| "loss": 0.6526, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.963984243106359, | |
| "grad_norm": 67.6583202864629, | |
| "learning_rate": 0.00015888013998250218, | |
| "loss": 0.5742, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.9667979741136747, | |
| "grad_norm": 78.23948101480053, | |
| "learning_rate": 0.00015866141732283462, | |
| "loss": 0.5176, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.9696117051209905, | |
| "grad_norm": 20.77366817098103, | |
| "learning_rate": 0.0001584426946631671, | |
| "loss": 0.6506, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9724254361283062, | |
| "grad_norm": 48.56847115116187, | |
| "learning_rate": 0.00015822397200349956, | |
| "loss": 0.6096, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.975239167135622, | |
| "grad_norm": 162.60631212883658, | |
| "learning_rate": 0.00015800524934383203, | |
| "loss": 0.8333, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.9780528981429375, | |
| "grad_norm": 89.91859486527336, | |
| "learning_rate": 0.00015778652668416444, | |
| "loss": 0.7861, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.9808666291502532, | |
| "grad_norm": 25.62581876051493, | |
| "learning_rate": 0.00015756780402449691, | |
| "loss": 0.7207, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.983680360157569, | |
| "grad_norm": 49.05293632646501, | |
| "learning_rate": 0.00015734908136482938, | |
| "loss": 0.6649, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.9864940911648845, | |
| "grad_norm": 29.01417189468632, | |
| "learning_rate": 0.00015713035870516183, | |
| "loss": 0.7179, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.9893078221722003, | |
| "grad_norm": 39.04202893773362, | |
| "learning_rate": 0.0001569116360454943, | |
| "loss": 0.4848, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.992121553179516, | |
| "grad_norm": 31.47398318556384, | |
| "learning_rate": 0.00015669291338582677, | |
| "loss": 0.4892, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.9949352841868317, | |
| "grad_norm": 12.299132356823057, | |
| "learning_rate": 0.00015647419072615924, | |
| "loss": 0.501, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.9977490151941475, | |
| "grad_norm": 7.597881461387406, | |
| "learning_rate": 0.00015625546806649165, | |
| "loss": 0.4163, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_0_f1": 0.5831303288672351, | |
| "eval_0_precision": 0.46033653846153844, | |
| "eval_0_recall": 0.795265780730897, | |
| "eval_1_f1": 0.7661028532376558, | |
| "eval_1_precision": 0.9009443439823187, | |
| "eval_1_recall": 0.6663694456828652, | |
| "eval_accuracy": 0.7003392798511546, | |
| "eval_loss": 0.6884765625, | |
| "eval_runtime": 468.4626, | |
| "eval_samples_per_second": 19.504, | |
| "eval_steps_per_second": 3.251, | |
| "step": 7108 | |
| }, | |
| { | |
| "epoch": 2.0005627462014632, | |
| "grad_norm": 14.396603604626321, | |
| "learning_rate": 0.00015603674540682412, | |
| "loss": 0.5533, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.003376477208779, | |
| "grad_norm": 2.8030111262625117, | |
| "learning_rate": 0.0001558180227471566, | |
| "loss": 0.7694, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.0061902082160947, | |
| "grad_norm": 1.4709503361365215, | |
| "learning_rate": 0.00015559930008748906, | |
| "loss": 0.654, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.00900393922341, | |
| "grad_norm": 89.697298372, | |
| "learning_rate": 0.0001553805774278215, | |
| "loss": 1.0665, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.0118176702307258, | |
| "grad_norm": 33.50349679176105, | |
| "learning_rate": 0.00015516185476815398, | |
| "loss": 0.6788, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.0146314012380415, | |
| "grad_norm": 17.68602683534024, | |
| "learning_rate": 0.00015494313210848642, | |
| "loss": 0.7757, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.0174451322453573, | |
| "grad_norm": 9.545453345027244, | |
| "learning_rate": 0.00015472440944881886, | |
| "loss": 0.6453, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.020258863252673, | |
| "grad_norm": 35.59753729386904, | |
| "learning_rate": 0.00015450568678915133, | |
| "loss": 0.3854, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.0230725942599888, | |
| "grad_norm": 9.072270826783758, | |
| "learning_rate": 0.0001542869641294838, | |
| "loss": 0.6609, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.0258863252673045, | |
| "grad_norm": 47.950374988048274, | |
| "learning_rate": 0.00015406824146981627, | |
| "loss": 1.2295, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.0287000562746202, | |
| "grad_norm": 20.65584618351164, | |
| "learning_rate": 0.00015384951881014874, | |
| "loss": 0.638, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.031513787281936, | |
| "grad_norm": 56.42081760798516, | |
| "learning_rate": 0.00015363079615048116, | |
| "loss": 0.6909, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.0343275182892517, | |
| "grad_norm": 40.50031891450003, | |
| "learning_rate": 0.00015341207349081363, | |
| "loss": 0.5031, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.037141249296567, | |
| "grad_norm": 17.00567280211469, | |
| "learning_rate": 0.0001531933508311461, | |
| "loss": 0.3856, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.039954980303883, | |
| "grad_norm": 4.489135688247525, | |
| "learning_rate": 0.00015297462817147854, | |
| "loss": 0.2859, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.0427687113111985, | |
| "grad_norm": 14.775510153378368, | |
| "learning_rate": 0.000152755905511811, | |
| "loss": 0.4759, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.0455824423185143, | |
| "grad_norm": 76.39163767252063, | |
| "learning_rate": 0.00015253718285214348, | |
| "loss": 0.6392, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.04839617332583, | |
| "grad_norm": 22.821590093980884, | |
| "learning_rate": 0.00015231846019247592, | |
| "loss": 0.7293, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.0512099043331458, | |
| "grad_norm": 9.836491539631897, | |
| "learning_rate": 0.00015209973753280837, | |
| "loss": 0.7348, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.0540236353404615, | |
| "grad_norm": 6.630565485108728, | |
| "learning_rate": 0.00015188101487314084, | |
| "loss": 0.5144, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.0568373663477773, | |
| "grad_norm": 34.675432308942774, | |
| "learning_rate": 0.0001516622922134733, | |
| "loss": 0.4596, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.059651097355093, | |
| "grad_norm": 7.181607771013236, | |
| "learning_rate": 0.00015144356955380578, | |
| "loss": 0.4084, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.0624648283624087, | |
| "grad_norm": 66.82180898278047, | |
| "learning_rate": 0.00015122484689413822, | |
| "loss": 0.5495, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.065278559369724, | |
| "grad_norm": 1.5514777421730275, | |
| "learning_rate": 0.0001510061242344707, | |
| "loss": 1.051, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.06809229037704, | |
| "grad_norm": 283.1577772960893, | |
| "learning_rate": 0.00015078740157480313, | |
| "loss": 0.6516, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.0709060213843555, | |
| "grad_norm": 3.7731035338477232, | |
| "learning_rate": 0.00015056867891513558, | |
| "loss": 0.4129, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.0737197523916713, | |
| "grad_norm": 20.19374459118123, | |
| "learning_rate": 0.00015034995625546805, | |
| "loss": 0.6187, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.076533483398987, | |
| "grad_norm": 78.73050551341473, | |
| "learning_rate": 0.00015013123359580052, | |
| "loss": 1.2969, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.0793472144063028, | |
| "grad_norm": 165.4510143886445, | |
| "learning_rate": 0.00014991251093613296, | |
| "loss": 0.7819, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.0821609454136185, | |
| "grad_norm": 7.206876778950156, | |
| "learning_rate": 0.00014969378827646543, | |
| "loss": 0.5006, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.0849746764209343, | |
| "grad_norm": 30.51391990574, | |
| "learning_rate": 0.0001494750656167979, | |
| "loss": 0.5426, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.08778840742825, | |
| "grad_norm": 82.36403908402391, | |
| "learning_rate": 0.00014925634295713034, | |
| "loss": 0.792, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.0906021384355657, | |
| "grad_norm": 4.553958671951656, | |
| "learning_rate": 0.0001490376202974628, | |
| "loss": 0.5293, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.093415869442881, | |
| "grad_norm": 33.752213518528826, | |
| "learning_rate": 0.00014881889763779525, | |
| "loss": 0.5422, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.096229600450197, | |
| "grad_norm": 113.08178742115065, | |
| "learning_rate": 0.00014860017497812772, | |
| "loss": 0.5121, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.0990433314575125, | |
| "grad_norm": 30.526152576678303, | |
| "learning_rate": 0.00014838145231846017, | |
| "loss": 0.5431, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.1018570624648283, | |
| "grad_norm": 51.53196387319876, | |
| "learning_rate": 0.00014816272965879264, | |
| "loss": 0.5991, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.104670793472144, | |
| "grad_norm": 25.153202967002166, | |
| "learning_rate": 0.0001479440069991251, | |
| "loss": 0.4238, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.1074845244794598, | |
| "grad_norm": 10.632643598102124, | |
| "learning_rate": 0.00014772528433945755, | |
| "loss": 0.4666, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.1102982554867755, | |
| "grad_norm": 25.335057772955818, | |
| "learning_rate": 0.00014750656167979002, | |
| "loss": 0.5208, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.1131119864940913, | |
| "grad_norm": 63.40523065355852, | |
| "learning_rate": 0.0001472878390201225, | |
| "loss": 0.6438, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.115925717501407, | |
| "grad_norm": 23.800767421170576, | |
| "learning_rate": 0.00014706911636045493, | |
| "loss": 0.3728, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.1187394485087228, | |
| "grad_norm": 27.228833850051487, | |
| "learning_rate": 0.00014685039370078738, | |
| "loss": 0.4183, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.121553179516038, | |
| "grad_norm": 27.19548175324042, | |
| "learning_rate": 0.00014663167104111985, | |
| "loss": 0.4994, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.124366910523354, | |
| "grad_norm": 24.460974703930734, | |
| "learning_rate": 0.0001464129483814523, | |
| "loss": 0.5199, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.1271806415306695, | |
| "grad_norm": 38.542816752552284, | |
| "learning_rate": 0.00014619422572178476, | |
| "loss": 0.4282, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.1299943725379853, | |
| "grad_norm": 18.694200296950598, | |
| "learning_rate": 0.00014597550306211723, | |
| "loss": 0.4003, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.132808103545301, | |
| "grad_norm": 47.57626879348759, | |
| "learning_rate": 0.00014575678040244967, | |
| "loss": 0.3651, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.135621834552617, | |
| "grad_norm": 137.96598042768042, | |
| "learning_rate": 0.00014553805774278214, | |
| "loss": 0.362, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.1384355655599325, | |
| "grad_norm": 43.01036785686837, | |
| "learning_rate": 0.0001453193350831146, | |
| "loss": 0.8131, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.1412492965672483, | |
| "grad_norm": 7.717528689632034, | |
| "learning_rate": 0.00014510061242344705, | |
| "loss": 0.4267, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.144063027574564, | |
| "grad_norm": 45.109974058089236, | |
| "learning_rate": 0.00014488188976377952, | |
| "loss": 0.5199, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.1468767585818798, | |
| "grad_norm": 34.364453456078586, | |
| "learning_rate": 0.00014466316710411197, | |
| "loss": 0.5723, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.1496904895891955, | |
| "grad_norm": 38.339990028883506, | |
| "learning_rate": 0.0001444444444444444, | |
| "loss": 1.1828, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.152504220596511, | |
| "grad_norm": 10.007067441938148, | |
| "learning_rate": 0.00014422572178477688, | |
| "loss": 0.749, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.1553179516038266, | |
| "grad_norm": 3.8129734415651444, | |
| "learning_rate": 0.00014400699912510935, | |
| "loss": 0.3419, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.1581316826111423, | |
| "grad_norm": 27.437919735141907, | |
| "learning_rate": 0.00014378827646544182, | |
| "loss": 0.9602, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.160945413618458, | |
| "grad_norm": 16.850020692243806, | |
| "learning_rate": 0.00014356955380577426, | |
| "loss": 0.5824, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.163759144625774, | |
| "grad_norm": 26.123500215108415, | |
| "learning_rate": 0.00014335083114610673, | |
| "loss": 0.5254, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.1665728756330895, | |
| "grad_norm": 7.580469258495237, | |
| "learning_rate": 0.00014313210848643918, | |
| "loss": 0.4178, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.1693866066404053, | |
| "grad_norm": 9.462955308502181, | |
| "learning_rate": 0.00014291338582677165, | |
| "loss": 0.4992, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.172200337647721, | |
| "grad_norm": 23.036715463308244, | |
| "learning_rate": 0.0001426946631671041, | |
| "loss": 0.7331, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.1750140686550368, | |
| "grad_norm": 13.038447468985156, | |
| "learning_rate": 0.00014247594050743656, | |
| "loss": 0.5071, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.177827799662352, | |
| "grad_norm": 47.061880181069775, | |
| "learning_rate": 0.000142257217847769, | |
| "loss": 0.5573, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.180641530669668, | |
| "grad_norm": 22.432526114178756, | |
| "learning_rate": 0.00014203849518810147, | |
| "loss": 0.7032, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.1834552616769836, | |
| "grad_norm": 15.983873087217463, | |
| "learning_rate": 0.00014181977252843394, | |
| "loss": 0.5136, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.1862689926842993, | |
| "grad_norm": 5.098441308324375, | |
| "learning_rate": 0.00014160104986876639, | |
| "loss": 0.3924, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.189082723691615, | |
| "grad_norm": 24.02126615806521, | |
| "learning_rate": 0.00014138232720909886, | |
| "loss": 0.4609, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.191896454698931, | |
| "grad_norm": 22.537388916291963, | |
| "learning_rate": 0.00014116360454943133, | |
| "loss": 0.927, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.1947101857062465, | |
| "grad_norm": 126.37626869176091, | |
| "learning_rate": 0.00014094488188976377, | |
| "loss": 0.5503, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.1975239167135623, | |
| "grad_norm": 16.78141729175572, | |
| "learning_rate": 0.00014072615923009624, | |
| "loss": 0.5167, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 2.200337647720878, | |
| "grad_norm": 11.596014649927676, | |
| "learning_rate": 0.00014050743657042868, | |
| "loss": 0.3368, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 2.2031513787281938, | |
| "grad_norm": 52.42749578186254, | |
| "learning_rate": 0.00014028871391076112, | |
| "loss": 0.7298, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 2.205965109735509, | |
| "grad_norm": 8.422176389308614, | |
| "learning_rate": 0.0001400699912510936, | |
| "loss": 0.6176, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 2.208778840742825, | |
| "grad_norm": 14.525285965374584, | |
| "learning_rate": 0.00013985126859142606, | |
| "loss": 0.3548, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.2115925717501406, | |
| "grad_norm": 74.49968322584152, | |
| "learning_rate": 0.0001396325459317585, | |
| "loss": 0.4476, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 2.2144063027574563, | |
| "grad_norm": 15.833711009373205, | |
| "learning_rate": 0.00013941382327209098, | |
| "loss": 0.5139, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 2.217220033764772, | |
| "grad_norm": 9.502417611259494, | |
| "learning_rate": 0.00013919510061242345, | |
| "loss": 0.3339, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 2.220033764772088, | |
| "grad_norm": 42.80936499484905, | |
| "learning_rate": 0.0001389763779527559, | |
| "loss": 0.8684, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 2.2228474957794035, | |
| "grad_norm": 22.65081394619362, | |
| "learning_rate": 0.00013875765529308836, | |
| "loss": 0.4503, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.2256612267867193, | |
| "grad_norm": 53.929376514239436, | |
| "learning_rate": 0.0001385389326334208, | |
| "loss": 0.631, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 2.228474957794035, | |
| "grad_norm": 22.765753793450298, | |
| "learning_rate": 0.00013832020997375327, | |
| "loss": 0.4681, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 2.231288688801351, | |
| "grad_norm": 20.083204174681672, | |
| "learning_rate": 0.00013810148731408572, | |
| "loss": 0.4727, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 2.234102419808666, | |
| "grad_norm": 41.8348829519395, | |
| "learning_rate": 0.00013788276465441819, | |
| "loss": 0.5225, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 2.236916150815982, | |
| "grad_norm": 24.39047987978454, | |
| "learning_rate": 0.00013766404199475066, | |
| "loss": 0.5678, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.2397298818232976, | |
| "grad_norm": 5.829038461480086, | |
| "learning_rate": 0.0001374453193350831, | |
| "loss": 0.5068, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 2.2425436128306133, | |
| "grad_norm": 3.39378744630721, | |
| "learning_rate": 0.00013722659667541557, | |
| "loss": 0.6194, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 2.245357343837929, | |
| "grad_norm": 5.237893149202979, | |
| "learning_rate": 0.00013700787401574804, | |
| "loss": 0.5441, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 2.248171074845245, | |
| "grad_norm": 35.256946231031435, | |
| "learning_rate": 0.00013678915135608048, | |
| "loss": 0.8805, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 2.2509848058525606, | |
| "grad_norm": 6.339404320662685, | |
| "learning_rate": 0.00013657042869641292, | |
| "loss": 0.3679, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.2537985368598763, | |
| "grad_norm": 39.61705527700101, | |
| "learning_rate": 0.0001363517060367454, | |
| "loss": 0.6514, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 2.256612267867192, | |
| "grad_norm": 4.860545258191048, | |
| "learning_rate": 0.00013613298337707784, | |
| "loss": 0.4431, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 2.259425998874508, | |
| "grad_norm": 23.334033297076132, | |
| "learning_rate": 0.0001359142607174103, | |
| "loss": 0.2908, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 2.2622397298818235, | |
| "grad_norm": 2.255539515554214, | |
| "learning_rate": 0.00013569553805774278, | |
| "loss": 0.5004, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 2.265053460889139, | |
| "grad_norm": 4.512388168148573, | |
| "learning_rate": 0.00013547681539807522, | |
| "loss": 0.3133, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.2678671918964546, | |
| "grad_norm": 88.81343772870977, | |
| "learning_rate": 0.0001352580927384077, | |
| "loss": 0.7748, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 2.2706809229037703, | |
| "grad_norm": 16.622608833874658, | |
| "learning_rate": 0.00013503937007874016, | |
| "loss": 0.4579, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 2.273494653911086, | |
| "grad_norm": 24.28677195401668, | |
| "learning_rate": 0.00013484251968503937, | |
| "loss": 1.008, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 2.276308384918402, | |
| "grad_norm": 12.088925730126462, | |
| "learning_rate": 0.0001346237970253718, | |
| "loss": 0.7886, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 2.2791221159257176, | |
| "grad_norm": 9.060718509259697, | |
| "learning_rate": 0.00013440507436570428, | |
| "loss": 0.395, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.2819358469330333, | |
| "grad_norm": 10.115823041010223, | |
| "learning_rate": 0.00013418635170603672, | |
| "loss": 0.4165, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 2.284749577940349, | |
| "grad_norm": 30.26934802189062, | |
| "learning_rate": 0.0001339676290463692, | |
| "loss": 0.542, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 2.287563308947665, | |
| "grad_norm": 25.66327045675839, | |
| "learning_rate": 0.00013374890638670164, | |
| "loss": 0.4935, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 2.29037703995498, | |
| "grad_norm": 56.781658920637945, | |
| "learning_rate": 0.0001335301837270341, | |
| "loss": 0.8249, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 2.293190770962296, | |
| "grad_norm": 21.68653409329514, | |
| "learning_rate": 0.00013331146106736658, | |
| "loss": 0.5771, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.2960045019696116, | |
| "grad_norm": 9.068927621383619, | |
| "learning_rate": 0.00013309273840769902, | |
| "loss": 0.4923, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 2.2988182329769273, | |
| "grad_norm": 54.33234299837627, | |
| "learning_rate": 0.0001328740157480315, | |
| "loss": 0.718, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 2.301631963984243, | |
| "grad_norm": 8.0851611902692, | |
| "learning_rate": 0.00013265529308836396, | |
| "loss": 0.3015, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 2.304445694991559, | |
| "grad_norm": 14.93759192354656, | |
| "learning_rate": 0.0001324365704286964, | |
| "loss": 0.4911, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 2.3072594259988746, | |
| "grad_norm": 67.05525829681581, | |
| "learning_rate": 0.00013221784776902884, | |
| "loss": 0.6472, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.3100731570061903, | |
| "grad_norm": 39.202689322357536, | |
| "learning_rate": 0.00013199912510936131, | |
| "loss": 0.4606, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 2.312886888013506, | |
| "grad_norm": 178.24379134099266, | |
| "learning_rate": 0.00013178040244969378, | |
| "loss": 0.6354, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 2.315700619020822, | |
| "grad_norm": 85.25424593199081, | |
| "learning_rate": 0.00013156167979002623, | |
| "loss": 0.4367, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 2.3185143500281375, | |
| "grad_norm": 53.75533136940712, | |
| "learning_rate": 0.0001313429571303587, | |
| "loss": 0.6239, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 2.321328081035453, | |
| "grad_norm": 12.586951832356146, | |
| "learning_rate": 0.00013112423447069117, | |
| "loss": 0.5565, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.3241418120427686, | |
| "grad_norm": 16.833339361466688, | |
| "learning_rate": 0.0001309055118110236, | |
| "loss": 0.5959, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 2.3269555430500843, | |
| "grad_norm": 50.26646442085951, | |
| "learning_rate": 0.00013068678915135608, | |
| "loss": 0.4654, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 2.3297692740574, | |
| "grad_norm": 129.88253348184315, | |
| "learning_rate": 0.00013046806649168852, | |
| "loss": 0.6027, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 2.332583005064716, | |
| "grad_norm": 120.6512466258759, | |
| "learning_rate": 0.000130249343832021, | |
| "loss": 0.6416, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 2.3353967360720316, | |
| "grad_norm": 76.6852745473175, | |
| "learning_rate": 0.00013003062117235344, | |
| "loss": 0.4121, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.3382104670793473, | |
| "grad_norm": 35.47557842382567, | |
| "learning_rate": 0.0001298118985126859, | |
| "loss": 0.5189, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 2.341024198086663, | |
| "grad_norm": 192.0503118081952, | |
| "learning_rate": 0.00012959317585301835, | |
| "loss": 0.8179, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 2.3438379290939784, | |
| "grad_norm": 18.588204667690324, | |
| "learning_rate": 0.00012937445319335082, | |
| "loss": 0.3917, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 2.346651660101294, | |
| "grad_norm": 191.70421960014338, | |
| "learning_rate": 0.0001291557305336833, | |
| "loss": 0.5404, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 2.34946539110861, | |
| "grad_norm": 238.9625701963259, | |
| "learning_rate": 0.00012893700787401573, | |
| "loss": 0.5827, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.3522791221159256, | |
| "grad_norm": 25.743296204281318, | |
| "learning_rate": 0.0001287182852143482, | |
| "loss": 0.3308, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 2.3550928531232413, | |
| "grad_norm": 17.13298864313216, | |
| "learning_rate": 0.00012849956255468065, | |
| "loss": 0.4597, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 2.357906584130557, | |
| "grad_norm": 27.69653969266591, | |
| "learning_rate": 0.00012828083989501312, | |
| "loss": 0.2887, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 2.360720315137873, | |
| "grad_norm": 18.82886891300214, | |
| "learning_rate": 0.00012808398950131232, | |
| "loss": 0.4994, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 2.3635340461451886, | |
| "grad_norm": 46.28194584088359, | |
| "learning_rate": 0.00012786526684164476, | |
| "loss": 0.6073, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.3663477771525043, | |
| "grad_norm": 4.807033074829807, | |
| "learning_rate": 0.00012764654418197723, | |
| "loss": 0.5521, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 2.36916150815982, | |
| "grad_norm": 25.502911733365003, | |
| "learning_rate": 0.0001274278215223097, | |
| "loss": 0.4886, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 2.371975239167136, | |
| "grad_norm": 37.960031508401514, | |
| "learning_rate": 0.00012720909886264215, | |
| "loss": 0.676, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 2.3747889701744516, | |
| "grad_norm": 9.396468446980126, | |
| "learning_rate": 0.00012699037620297462, | |
| "loss": 0.3818, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 2.377602701181767, | |
| "grad_norm": 140.64704015650366, | |
| "learning_rate": 0.0001267716535433071, | |
| "loss": 0.6695, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.3804164321890826, | |
| "grad_norm": 704.8913985778679, | |
| "learning_rate": 0.00012655293088363953, | |
| "loss": 0.4278, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 2.3832301631963984, | |
| "grad_norm": 24.070673929018703, | |
| "learning_rate": 0.000126334208223972, | |
| "loss": 0.6819, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 2.386043894203714, | |
| "grad_norm": 49.75637617417452, | |
| "learning_rate": 0.00012611548556430444, | |
| "loss": 0.7224, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 2.38885762521103, | |
| "grad_norm": 25.377596250206288, | |
| "learning_rate": 0.0001258967629046369, | |
| "loss": 0.5522, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 2.3916713562183456, | |
| "grad_norm": 44.21196059010374, | |
| "learning_rate": 0.00012567804024496936, | |
| "loss": 0.552, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.3944850872256613, | |
| "grad_norm": 93.86546608453293, | |
| "learning_rate": 0.00012545931758530183, | |
| "loss": 0.4249, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 2.397298818232977, | |
| "grad_norm": 20.326633023305288, | |
| "learning_rate": 0.0001252405949256343, | |
| "loss": 0.4898, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 2.4001125492402924, | |
| "grad_norm": 6.861362262817904, | |
| "learning_rate": 0.00012502187226596674, | |
| "loss": 0.4746, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 2.402926280247608, | |
| "grad_norm": 5.791391274939596, | |
| "learning_rate": 0.0001248031496062992, | |
| "loss": 0.4244, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 2.405740011254924, | |
| "grad_norm": 31.062706775859727, | |
| "learning_rate": 0.00012460629921259842, | |
| "loss": 0.6831, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.4085537422622396, | |
| "grad_norm": 155.36648204775855, | |
| "learning_rate": 0.00012438757655293089, | |
| "loss": 0.6126, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 2.4113674732695554, | |
| "grad_norm": 54.962311559010956, | |
| "learning_rate": 0.00012416885389326333, | |
| "loss": 0.5716, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 2.414181204276871, | |
| "grad_norm": 118.40831467456086, | |
| "learning_rate": 0.00012395013123359577, | |
| "loss": 0.7426, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 2.416994935284187, | |
| "grad_norm": 810.7603005158664, | |
| "learning_rate": 0.00012373140857392824, | |
| "loss": 0.5642, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 2.4198086662915026, | |
| "grad_norm": 51.79506077875997, | |
| "learning_rate": 0.0001235126859142607, | |
| "loss": 0.3925, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.4226223972988183, | |
| "grad_norm": 57.44045267412865, | |
| "learning_rate": 0.00012329396325459315, | |
| "loss": 0.4651, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 2.425436128306134, | |
| "grad_norm": 10.645743056447664, | |
| "learning_rate": 0.00012307524059492562, | |
| "loss": 0.5592, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 2.42824985931345, | |
| "grad_norm": 39.82888930894237, | |
| "learning_rate": 0.0001228565179352581, | |
| "loss": 0.5235, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 2.4310635903207656, | |
| "grad_norm": 202.40272841895077, | |
| "learning_rate": 0.00012263779527559054, | |
| "loss": 0.6403, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 2.433877321328081, | |
| "grad_norm": 8.768929997416398, | |
| "learning_rate": 0.000122419072615923, | |
| "loss": 0.3559, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.4366910523353966, | |
| "grad_norm": 73.24658593563804, | |
| "learning_rate": 0.00012220034995625545, | |
| "loss": 0.8297, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 2.4395047833427124, | |
| "grad_norm": 9.18198585469821, | |
| "learning_rate": 0.00012198162729658791, | |
| "loss": 0.7252, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 2.442318514350028, | |
| "grad_norm": 364.57870592301464, | |
| "learning_rate": 0.00012176290463692038, | |
| "loss": 0.5064, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 2.445132245357344, | |
| "grad_norm": 21.670796127528217, | |
| "learning_rate": 0.00012154418197725283, | |
| "loss": 0.374, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 2.4479459763646596, | |
| "grad_norm": 17.324860013255872, | |
| "learning_rate": 0.00012132545931758528, | |
| "loss": 0.4137, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.4507597073719753, | |
| "grad_norm": 22.780757919669334, | |
| "learning_rate": 0.00012110673665791775, | |
| "loss": 0.4646, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 2.453573438379291, | |
| "grad_norm": 39.75471721705483, | |
| "learning_rate": 0.00012088801399825022, | |
| "loss": 0.5432, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 2.4563871693866064, | |
| "grad_norm": 49.74115368293155, | |
| "learning_rate": 0.00012066929133858267, | |
| "loss": 0.9697, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 2.459200900393922, | |
| "grad_norm": 15.825907605556525, | |
| "learning_rate": 0.00012045056867891512, | |
| "loss": 0.4463, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 2.462014631401238, | |
| "grad_norm": 17.73048175980216, | |
| "learning_rate": 0.00012023184601924759, | |
| "loss": 0.4398, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.4648283624085536, | |
| "grad_norm": 30.183102182735098, | |
| "learning_rate": 0.00012001312335958004, | |
| "loss": 0.6332, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 2.4676420934158694, | |
| "grad_norm": 25.73335808399061, | |
| "learning_rate": 0.0001197944006999125, | |
| "loss": 0.4339, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 2.470455824423185, | |
| "grad_norm": 18.419862117919163, | |
| "learning_rate": 0.00011957567804024496, | |
| "loss": 0.4134, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 2.473269555430501, | |
| "grad_norm": 30.616274458695887, | |
| "learning_rate": 0.00011935695538057743, | |
| "loss": 0.2893, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 2.4760832864378166, | |
| "grad_norm": 1.356291998114622, | |
| "learning_rate": 0.00011913823272090987, | |
| "loss": 0.7339, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.4788970174451324, | |
| "grad_norm": 21.213031498236028, | |
| "learning_rate": 0.00011891951006124234, | |
| "loss": 0.3974, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 2.481710748452448, | |
| "grad_norm": 16.21040199222578, | |
| "learning_rate": 0.0001187007874015748, | |
| "loss": 0.5217, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 2.484524479459764, | |
| "grad_norm": 13.860914140582063, | |
| "learning_rate": 0.00011848206474190725, | |
| "loss": 0.4859, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 2.4873382104670796, | |
| "grad_norm": 48.496668101430515, | |
| "learning_rate": 0.00011826334208223971, | |
| "loss": 0.6578, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 2.490151941474395, | |
| "grad_norm": 8.89511240692744, | |
| "learning_rate": 0.00011804461942257218, | |
| "loss": 0.4366, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.4929656724817106, | |
| "grad_norm": 22.47093178418468, | |
| "learning_rate": 0.00011782589676290462, | |
| "loss": 0.5115, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 2.4957794034890264, | |
| "grad_norm": 16.50971197997101, | |
| "learning_rate": 0.00011760717410323709, | |
| "loss": 0.4433, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 2.498593134496342, | |
| "grad_norm": 6.399589126360222, | |
| "learning_rate": 0.00011738845144356955, | |
| "loss": 0.3354, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 2.501406865503658, | |
| "grad_norm": 10.640344890543053, | |
| "learning_rate": 0.00011716972878390199, | |
| "loss": 0.9088, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 2.5042205965109736, | |
| "grad_norm": 17.669174903791003, | |
| "learning_rate": 0.00011695100612423446, | |
| "loss": 0.7106, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.5070343275182894, | |
| "grad_norm": 19.417937682945556, | |
| "learning_rate": 0.00011673228346456692, | |
| "loss": 0.5522, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 2.509848058525605, | |
| "grad_norm": 21.275739872323875, | |
| "learning_rate": 0.00011651356080489937, | |
| "loss": 0.5965, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 2.5126617895329204, | |
| "grad_norm": 13.434077583334158, | |
| "learning_rate": 0.00011629483814523183, | |
| "loss": 0.5786, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 2.515475520540236, | |
| "grad_norm": 25.83592797652058, | |
| "learning_rate": 0.0001160761154855643, | |
| "loss": 0.4222, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 2.518289251547552, | |
| "grad_norm": 37.20038618687167, | |
| "learning_rate": 0.00011585739282589676, | |
| "loss": 0.4256, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.5211029825548676, | |
| "grad_norm": 53.97304836312147, | |
| "learning_rate": 0.00011563867016622921, | |
| "loss": 0.4834, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 2.5239167135621834, | |
| "grad_norm": 16.475642718077715, | |
| "learning_rate": 0.00011541994750656167, | |
| "loss": 0.5385, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 2.526730444569499, | |
| "grad_norm": 35.76870275664621, | |
| "learning_rate": 0.00011520122484689414, | |
| "loss": 0.4718, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 2.529544175576815, | |
| "grad_norm": 17.680183575624334, | |
| "learning_rate": 0.00011498250218722658, | |
| "loss": 0.549, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 2.5323579065841306, | |
| "grad_norm": 97.68298591049088, | |
| "learning_rate": 0.00011476377952755905, | |
| "loss": 0.4642, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.5351716375914464, | |
| "grad_norm": 95.91488844225455, | |
| "learning_rate": 0.00011454505686789151, | |
| "loss": 0.4499, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 2.537985368598762, | |
| "grad_norm": 49.98057434380942, | |
| "learning_rate": 0.00011432633420822395, | |
| "loss": 0.4452, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 2.540799099606078, | |
| "grad_norm": 86.94738373288978, | |
| "learning_rate": 0.00011410761154855642, | |
| "loss": 0.654, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 2.5436128306133936, | |
| "grad_norm": 46.27504444954838, | |
| "learning_rate": 0.00011388888888888889, | |
| "loss": 0.8217, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 2.546426561620709, | |
| "grad_norm": 6.8204282978011195, | |
| "learning_rate": 0.00011367016622922133, | |
| "loss": 0.3788, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.5492402926280247, | |
| "grad_norm": 74.45875969586639, | |
| "learning_rate": 0.00011345144356955379, | |
| "loss": 1.7253, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 2.5520540236353404, | |
| "grad_norm": 8.860003682861251, | |
| "learning_rate": 0.00011323272090988626, | |
| "loss": 0.4397, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 2.554867754642656, | |
| "grad_norm": 48.65502795851232, | |
| "learning_rate": 0.0001130139982502187, | |
| "loss": 0.386, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 2.557681485649972, | |
| "grad_norm": 3.2137363317945287, | |
| "learning_rate": 0.00011279527559055117, | |
| "loss": 0.2666, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 2.5604952166572876, | |
| "grad_norm": 124.42119058882817, | |
| "learning_rate": 0.00011257655293088363, | |
| "loss": 0.9019, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.5633089476646034, | |
| "grad_norm": 32.9544365134875, | |
| "learning_rate": 0.00011235783027121609, | |
| "loss": 0.9048, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 2.566122678671919, | |
| "grad_norm": 7.015944851676098, | |
| "learning_rate": 0.00011213910761154854, | |
| "loss": 0.3671, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 2.5689364096792344, | |
| "grad_norm": 36.00750003943152, | |
| "learning_rate": 0.00011192038495188101, | |
| "loss": 0.8046, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 2.57175014068655, | |
| "grad_norm": 24.83730040509871, | |
| "learning_rate": 0.00011170166229221346, | |
| "loss": 0.5702, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 2.574563871693866, | |
| "grad_norm": 5.192263862462742, | |
| "learning_rate": 0.00011148293963254593, | |
| "loss": 0.4801, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.5773776027011817, | |
| "grad_norm": 9.485040925668613, | |
| "learning_rate": 0.00011126421697287838, | |
| "loss": 0.7347, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 2.5801913337084974, | |
| "grad_norm": 188.0655816314744, | |
| "learning_rate": 0.00011104549431321082, | |
| "loss": 0.5629, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 2.583005064715813, | |
| "grad_norm": 4.262160421678828, | |
| "learning_rate": 0.0001108267716535433, | |
| "loss": 0.3597, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 2.585818795723129, | |
| "grad_norm": 48.99676536082116, | |
| "learning_rate": 0.00011060804899387576, | |
| "loss": 0.5096, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 2.5886325267304446, | |
| "grad_norm": 49.32059612461514, | |
| "learning_rate": 0.00011038932633420822, | |
| "loss": 0.8765, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.5914462577377604, | |
| "grad_norm": 11.590855409332988, | |
| "learning_rate": 0.00011017060367454066, | |
| "loss": 0.6143, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 2.594259988745076, | |
| "grad_norm": 38.29167922597077, | |
| "learning_rate": 0.00010995188101487313, | |
| "loss": 0.7939, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 2.597073719752392, | |
| "grad_norm": 32.31895255440478, | |
| "learning_rate": 0.00010973315835520559, | |
| "loss": 0.4451, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 2.5998874507597076, | |
| "grad_norm": 33.42321188287331, | |
| "learning_rate": 0.00010951443569553805, | |
| "loss": 0.5792, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.602701181767023, | |
| "grad_norm": 468.47470275260395, | |
| "learning_rate": 0.0001092957130358705, | |
| "loss": 0.4165, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.6055149127743387, | |
| "grad_norm": 8.786170853391159, | |
| "learning_rate": 0.00010907699037620297, | |
| "loss": 0.3003, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 2.6083286437816544, | |
| "grad_norm": 17.262373356558324, | |
| "learning_rate": 0.00010885826771653542, | |
| "loss": 0.3603, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 2.61114237478897, | |
| "grad_norm": 86.87405146897451, | |
| "learning_rate": 0.00010863954505686789, | |
| "loss": 0.3915, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 2.613956105796286, | |
| "grad_norm": 70.29811492932059, | |
| "learning_rate": 0.00010842082239720034, | |
| "loss": 0.5594, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 2.6167698368036016, | |
| "grad_norm": 102.37891262155871, | |
| "learning_rate": 0.0001082020997375328, | |
| "loss": 0.477, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.6195835678109174, | |
| "grad_norm": 116.93161920779444, | |
| "learning_rate": 0.00010798337707786526, | |
| "loss": 0.8184, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 2.622397298818233, | |
| "grad_norm": 36.178742582782164, | |
| "learning_rate": 0.00010776465441819773, | |
| "loss": 0.7671, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 2.6252110298255484, | |
| "grad_norm": 354.6394442768764, | |
| "learning_rate": 0.00010754593175853017, | |
| "loss": 0.5188, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 2.628024760832864, | |
| "grad_norm": 4.175931338154612, | |
| "learning_rate": 0.00010732720909886263, | |
| "loss": 0.4434, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 2.63083849184018, | |
| "grad_norm": 56.1280866933836, | |
| "learning_rate": 0.0001071084864391951, | |
| "loss": 0.8639, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.6336522228474957, | |
| "grad_norm": 59.20745896569175, | |
| "learning_rate": 0.00010688976377952754, | |
| "loss": 0.3947, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 2.6364659538548114, | |
| "grad_norm": 55.614426780242646, | |
| "learning_rate": 0.00010667104111986001, | |
| "loss": 0.4354, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 2.639279684862127, | |
| "grad_norm": 50.81213904295994, | |
| "learning_rate": 0.00010645231846019246, | |
| "loss": 0.5589, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 2.642093415869443, | |
| "grad_norm": 34.20241547637593, | |
| "learning_rate": 0.00010623359580052492, | |
| "loss": 0.5685, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 2.6449071468767587, | |
| "grad_norm": 17.555635593890102, | |
| "learning_rate": 0.00010601487314085738, | |
| "loss": 0.4639, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.6477208778840744, | |
| "grad_norm": 16.284132923705343, | |
| "learning_rate": 0.00010579615048118985, | |
| "loss": 0.3629, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 2.65053460889139, | |
| "grad_norm": 64.32745908031606, | |
| "learning_rate": 0.00010557742782152229, | |
| "loss": 0.488, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 2.653348339898706, | |
| "grad_norm": 75.65983508131147, | |
| "learning_rate": 0.00010535870516185476, | |
| "loss": 0.4648, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 2.6561620709060216, | |
| "grad_norm": 12.839163573898897, | |
| "learning_rate": 0.00010513998250218722, | |
| "loss": 0.5513, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 2.658975801913337, | |
| "grad_norm": 4.358631397049856, | |
| "learning_rate": 0.00010492125984251969, | |
| "loss": 0.4445, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.6617895329206527, | |
| "grad_norm": 26.10775381519202, | |
| "learning_rate": 0.00010470253718285213, | |
| "loss": 0.4745, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 2.6646032639279684, | |
| "grad_norm": 77.73370762217442, | |
| "learning_rate": 0.0001044838145231846, | |
| "loss": 0.7683, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 2.667416994935284, | |
| "grad_norm": 35.63066051419088, | |
| "learning_rate": 0.00010426509186351706, | |
| "loss": 0.423, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 2.6702307259426, | |
| "grad_norm": 15.36437442788385, | |
| "learning_rate": 0.0001040463692038495, | |
| "loss": 0.6104, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 2.6730444569499157, | |
| "grad_norm": 10.203853452654112, | |
| "learning_rate": 0.00010382764654418197, | |
| "loss": 0.5783, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.6758581879572314, | |
| "grad_norm": 20.8110952561946, | |
| "learning_rate": 0.00010360892388451444, | |
| "loss": 0.3268, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 2.678671918964547, | |
| "grad_norm": 30.832138697360744, | |
| "learning_rate": 0.00010339020122484688, | |
| "loss": 0.4376, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 2.6814856499718625, | |
| "grad_norm": 458.34051462177115, | |
| "learning_rate": 0.00010317147856517934, | |
| "loss": 0.3852, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 2.684299380979178, | |
| "grad_norm": 46.6171709293664, | |
| "learning_rate": 0.00010295275590551181, | |
| "loss": 0.9121, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.687113111986494, | |
| "grad_norm": 272.84120193286054, | |
| "learning_rate": 0.00010273403324584425, | |
| "loss": 0.89, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.6899268429938097, | |
| "grad_norm": 120.77864028855092, | |
| "learning_rate": 0.00010251531058617672, | |
| "loss": 0.3202, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 2.6927405740011254, | |
| "grad_norm": 15.406524257269288, | |
| "learning_rate": 0.00010229658792650918, | |
| "loss": 0.4391, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.695554305008441, | |
| "grad_norm": 19.60890393546223, | |
| "learning_rate": 0.00010207786526684163, | |
| "loss": 0.4306, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 2.698368036015757, | |
| "grad_norm": 43.141776219101715, | |
| "learning_rate": 0.00010185914260717409, | |
| "loss": 0.6398, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 2.7011817670230727, | |
| "grad_norm": 43.13769827492887, | |
| "learning_rate": 0.00010164041994750656, | |
| "loss": 0.3964, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.7039954980303884, | |
| "grad_norm": 3.6195378103594056, | |
| "learning_rate": 0.000101421697287839, | |
| "loss": 0.3933, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 2.706809229037704, | |
| "grad_norm": 37.37053494271392, | |
| "learning_rate": 0.00010120297462817147, | |
| "loss": 0.5938, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 2.70962296004502, | |
| "grad_norm": 14.787523531149347, | |
| "learning_rate": 0.00010098425196850393, | |
| "loss": 0.4718, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.7124366910523356, | |
| "grad_norm": 64.91170649580671, | |
| "learning_rate": 0.00010076552930883637, | |
| "loss": 0.479, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 2.715250422059651, | |
| "grad_norm": 10.027583266140544, | |
| "learning_rate": 0.00010054680664916884, | |
| "loss": 0.3553, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.7180641530669667, | |
| "grad_norm": 37.02256693061005, | |
| "learning_rate": 0.00010032808398950131, | |
| "loss": 0.4199, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.7208778840742824, | |
| "grad_norm": 18.07635825713862, | |
| "learning_rate": 0.00010010936132983376, | |
| "loss": 0.6734, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 2.723691615081598, | |
| "grad_norm": 13.442989922340166, | |
| "learning_rate": 9.989063867016621e-05, | |
| "loss": 0.4984, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 2.726505346088914, | |
| "grad_norm": 5.303880680734515, | |
| "learning_rate": 9.967191601049868e-05, | |
| "loss": 0.3973, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.7293190770962297, | |
| "grad_norm": 29.495823996978398, | |
| "learning_rate": 9.945319335083114e-05, | |
| "loss": 0.4915, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.7321328081035454, | |
| "grad_norm": 130.2447313645269, | |
| "learning_rate": 9.92344706911636e-05, | |
| "loss": 0.6082, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 2.734946539110861, | |
| "grad_norm": 6.49456770331547, | |
| "learning_rate": 9.901574803149605e-05, | |
| "loss": 0.3068, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.7377602701181765, | |
| "grad_norm": 118.57235424251638, | |
| "learning_rate": 9.879702537182852e-05, | |
| "loss": 0.6114, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 2.740574001125492, | |
| "grad_norm": 231.0023016216336, | |
| "learning_rate": 9.857830271216096e-05, | |
| "loss": 0.8726, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 2.743387732132808, | |
| "grad_norm": 31.19265942143221, | |
| "learning_rate": 9.835958005249344e-05, | |
| "loss": 0.541, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.7462014631401237, | |
| "grad_norm": 5.225618741939991, | |
| "learning_rate": 9.814085739282589e-05, | |
| "loss": 0.1824, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 2.7490151941474394, | |
| "grad_norm": 15.212142485160932, | |
| "learning_rate": 9.792213473315835e-05, | |
| "loss": 0.759, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 2.751828925154755, | |
| "grad_norm": 31.73381245582647, | |
| "learning_rate": 9.77034120734908e-05, | |
| "loss": 0.5458, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.754642656162071, | |
| "grad_norm": 0.3819179114787675, | |
| "learning_rate": 9.748468941382327e-05, | |
| "loss": 0.6246, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 2.7574563871693867, | |
| "grad_norm": 24.57005039190513, | |
| "learning_rate": 9.726596675415572e-05, | |
| "loss": 0.809, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.7602701181767024, | |
| "grad_norm": 205.8236592890733, | |
| "learning_rate": 9.704724409448817e-05, | |
| "loss": 0.7747, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.763083849184018, | |
| "grad_norm": 13.399260177045598, | |
| "learning_rate": 9.682852143482064e-05, | |
| "loss": 0.5277, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 2.765897580191334, | |
| "grad_norm": 21.19679766301598, | |
| "learning_rate": 9.660979877515309e-05, | |
| "loss": 0.7542, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 2.7687113111986497, | |
| "grad_norm": 15.049015807925302, | |
| "learning_rate": 9.639107611548556e-05, | |
| "loss": 0.4367, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.771525042205965, | |
| "grad_norm": 34.32401152713521, | |
| "learning_rate": 9.617235345581801e-05, | |
| "loss": 0.4548, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.7743387732132807, | |
| "grad_norm": 3.273022569610971, | |
| "learning_rate": 9.595363079615047e-05, | |
| "loss": 0.5695, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 2.7771525042205965, | |
| "grad_norm": 55.423334541815585, | |
| "learning_rate": 9.573490813648293e-05, | |
| "loss": 0.4559, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.779966235227912, | |
| "grad_norm": 28.16145208485805, | |
| "learning_rate": 9.55161854768154e-05, | |
| "loss": 0.5161, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 2.782779966235228, | |
| "grad_norm": 29.663487490852017, | |
| "learning_rate": 9.529746281714784e-05, | |
| "loss": 0.5348, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 2.7855936972425437, | |
| "grad_norm": 26.125896625555498, | |
| "learning_rate": 9.507874015748031e-05, | |
| "loss": 0.3743, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.7884074282498594, | |
| "grad_norm": 12.642777363424036, | |
| "learning_rate": 9.486001749781277e-05, | |
| "loss": 0.274, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 2.791221159257175, | |
| "grad_norm": 55.49624560948837, | |
| "learning_rate": 9.464129483814524e-05, | |
| "loss": 0.7875, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 2.7940348902644905, | |
| "grad_norm": 49.81266964604724, | |
| "learning_rate": 9.442257217847768e-05, | |
| "loss": 0.5697, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.7968486212718062, | |
| "grad_norm": 19.16263333950446, | |
| "learning_rate": 9.420384951881015e-05, | |
| "loss": 0.5035, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 2.799662352279122, | |
| "grad_norm": 26.980836018843, | |
| "learning_rate": 9.39851268591426e-05, | |
| "loss": 0.6275, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.8024760832864377, | |
| "grad_norm": 3.6601734211863945, | |
| "learning_rate": 9.376640419947505e-05, | |
| "loss": 0.4986, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.8052898142937535, | |
| "grad_norm": 80.76032184024673, | |
| "learning_rate": 9.354768153980752e-05, | |
| "loss": 0.554, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 2.808103545301069, | |
| "grad_norm": 12.4762811742511, | |
| "learning_rate": 9.332895888013999e-05, | |
| "loss": 0.4325, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 2.810917276308385, | |
| "grad_norm": 28.339007190053675, | |
| "learning_rate": 9.311023622047243e-05, | |
| "loss": 0.3323, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.8137310073157007, | |
| "grad_norm": 211.21007128891176, | |
| "learning_rate": 9.289151356080489e-05, | |
| "loss": 0.9021, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.8165447383230164, | |
| "grad_norm": 33.31758409534305, | |
| "learning_rate": 9.267279090113736e-05, | |
| "loss": 0.4361, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 2.819358469330332, | |
| "grad_norm": 38.93754008185466, | |
| "learning_rate": 9.24540682414698e-05, | |
| "loss": 0.6279, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 2.822172200337648, | |
| "grad_norm": 25.529883648136195, | |
| "learning_rate": 9.223534558180227e-05, | |
| "loss": 0.4936, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 2.8249859313449637, | |
| "grad_norm": 54.257511831814064, | |
| "learning_rate": 9.201662292213473e-05, | |
| "loss": 0.5469, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 2.827799662352279, | |
| "grad_norm": 20.90804542506976, | |
| "learning_rate": 9.179790026246718e-05, | |
| "loss": 0.3663, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.8306133933595947, | |
| "grad_norm": 87.21990526473672, | |
| "learning_rate": 9.157917760279964e-05, | |
| "loss": 0.6471, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 2.8334271243669105, | |
| "grad_norm": 4.279230208467407, | |
| "learning_rate": 9.136045494313211e-05, | |
| "loss": 0.4365, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 2.836240855374226, | |
| "grad_norm": 16.216814135083176, | |
| "learning_rate": 9.114173228346455e-05, | |
| "loss": 0.395, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 2.839054586381542, | |
| "grad_norm": 29.69031936910585, | |
| "learning_rate": 9.092300962379702e-05, | |
| "loss": 0.2356, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 2.8418683173888577, | |
| "grad_norm": 1.5595613943954527, | |
| "learning_rate": 9.070428696412948e-05, | |
| "loss": 0.2262, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.8446820483961734, | |
| "grad_norm": 4.525293278789326, | |
| "learning_rate": 9.048556430446192e-05, | |
| "loss": 0.9052, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 2.847495779403489, | |
| "grad_norm": 26.773310344606703, | |
| "learning_rate": 9.026684164479439e-05, | |
| "loss": 0.6461, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 2.8503095104108045, | |
| "grad_norm": 48.70908526560008, | |
| "learning_rate": 9.004811898512685e-05, | |
| "loss": 0.7359, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 2.8531232414181202, | |
| "grad_norm": 84.11864704783623, | |
| "learning_rate": 8.98293963254593e-05, | |
| "loss": 0.4548, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 2.855936972425436, | |
| "grad_norm": 5.524906428491934, | |
| "learning_rate": 8.961067366579176e-05, | |
| "loss": 0.4625, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.8587507034327517, | |
| "grad_norm": 10.319915749419431, | |
| "learning_rate": 8.939195100612423e-05, | |
| "loss": 0.6446, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 2.8615644344400675, | |
| "grad_norm": 22.781369712630177, | |
| "learning_rate": 8.917322834645669e-05, | |
| "loss": 0.353, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 2.864378165447383, | |
| "grad_norm": 35.25984458553167, | |
| "learning_rate": 8.895450568678914e-05, | |
| "loss": 0.6551, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 2.867191896454699, | |
| "grad_norm": 42.157133518741865, | |
| "learning_rate": 8.87357830271216e-05, | |
| "loss": 0.4496, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 2.8700056274620147, | |
| "grad_norm": 22.81314493600198, | |
| "learning_rate": 8.851706036745407e-05, | |
| "loss": 0.5678, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.8728193584693305, | |
| "grad_norm": 36.04659178861918, | |
| "learning_rate": 8.829833770778651e-05, | |
| "loss": 0.4828, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 2.875633089476646, | |
| "grad_norm": 56.67857438617218, | |
| "learning_rate": 8.807961504811898e-05, | |
| "loss": 0.5019, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 2.878446820483962, | |
| "grad_norm": 9.111045425788525, | |
| "learning_rate": 8.786089238845144e-05, | |
| "loss": 0.5607, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 2.8812605514912777, | |
| "grad_norm": 3.505050666852027, | |
| "learning_rate": 8.76421697287839e-05, | |
| "loss": 0.4513, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 2.884074282498593, | |
| "grad_norm": 54.490312257720156, | |
| "learning_rate": 8.742344706911635e-05, | |
| "loss": 0.6762, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.8868880135059087, | |
| "grad_norm": 25.476228006992702, | |
| "learning_rate": 8.720472440944882e-05, | |
| "loss": 0.5411, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.8897017445132245, | |
| "grad_norm": 12.990747730995873, | |
| "learning_rate": 8.698600174978127e-05, | |
| "loss": 0.5335, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 2.8925154755205402, | |
| "grad_norm": 116.48590241626219, | |
| "learning_rate": 8.676727909011372e-05, | |
| "loss": 0.3502, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 2.895329206527856, | |
| "grad_norm": 28.589708094686127, | |
| "learning_rate": 8.654855643044619e-05, | |
| "loss": 0.5962, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 2.8981429375351717, | |
| "grad_norm": 48.20502421441493, | |
| "learning_rate": 8.632983377077864e-05, | |
| "loss": 0.5072, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.9009566685424875, | |
| "grad_norm": 29.045361435820396, | |
| "learning_rate": 8.61111111111111e-05, | |
| "loss": 0.5328, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 2.903770399549803, | |
| "grad_norm": 23.463753067966675, | |
| "learning_rate": 8.589238845144356e-05, | |
| "loss": 0.4669, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 2.9065841305571185, | |
| "grad_norm": 8.94339841328865, | |
| "learning_rate": 8.567366579177602e-05, | |
| "loss": 0.6852, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 2.9093978615644343, | |
| "grad_norm": 13.126501900027074, | |
| "learning_rate": 8.545494313210847e-05, | |
| "loss": 0.5224, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 2.91221159257175, | |
| "grad_norm": 11.322296130692187, | |
| "learning_rate": 8.523622047244094e-05, | |
| "loss": 0.4298, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.9150253235790657, | |
| "grad_norm": 3.9331354922682498, | |
| "learning_rate": 8.501749781277339e-05, | |
| "loss": 0.3009, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 2.9178390545863815, | |
| "grad_norm": 2.3186540408341734, | |
| "learning_rate": 8.479877515310586e-05, | |
| "loss": 0.4631, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 2.9206527855936972, | |
| "grad_norm": 33.11162361117131, | |
| "learning_rate": 8.458005249343831e-05, | |
| "loss": 0.3775, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 2.923466516601013, | |
| "grad_norm": 12.497923893181124, | |
| "learning_rate": 8.436132983377076e-05, | |
| "loss": 0.5401, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 2.9262802476083287, | |
| "grad_norm": 9.707752939333481, | |
| "learning_rate": 8.414260717410323e-05, | |
| "loss": 0.5099, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.9290939786156445, | |
| "grad_norm": 33.075796904013835, | |
| "learning_rate": 8.39238845144357e-05, | |
| "loss": 0.4239, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 2.93190770962296, | |
| "grad_norm": 27.030408601399838, | |
| "learning_rate": 8.370516185476815e-05, | |
| "loss": 0.3516, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 2.934721440630276, | |
| "grad_norm": 40.90648498933933, | |
| "learning_rate": 8.34864391951006e-05, | |
| "loss": 0.4291, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 2.9375351716375917, | |
| "grad_norm": 43.38996380641155, | |
| "learning_rate": 8.326771653543307e-05, | |
| "loss": 0.7152, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 2.940348902644907, | |
| "grad_norm": 25.52567647846434, | |
| "learning_rate": 8.304899387576552e-05, | |
| "loss": 0.1973, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.9431626336522227, | |
| "grad_norm": 45.972037886947575, | |
| "learning_rate": 8.283027121609798e-05, | |
| "loss": 0.5599, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 2.9459763646595385, | |
| "grad_norm": 23.400081384448004, | |
| "learning_rate": 8.261154855643044e-05, | |
| "loss": 0.5785, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 2.9487900956668542, | |
| "grad_norm": 15.453689858013234, | |
| "learning_rate": 8.23928258967629e-05, | |
| "loss": 0.5648, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 2.95160382667417, | |
| "grad_norm": 23.99708247332893, | |
| "learning_rate": 8.217410323709535e-05, | |
| "loss": 0.6255, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 2.9544175576814857, | |
| "grad_norm": 85.44333249815278, | |
| "learning_rate": 8.195538057742782e-05, | |
| "loss": 0.4824, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.9572312886888015, | |
| "grad_norm": 13.197420910549328, | |
| "learning_rate": 8.173665791776027e-05, | |
| "loss": 0.382, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 2.9600450196961168, | |
| "grad_norm": 24.812200580491094, | |
| "learning_rate": 8.151793525809273e-05, | |
| "loss": 0.4638, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 2.9628587507034325, | |
| "grad_norm": 23.947322941527855, | |
| "learning_rate": 8.129921259842519e-05, | |
| "loss": 0.2828, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.9656724817107483, | |
| "grad_norm": 26.603437638257738, | |
| "learning_rate": 8.108048993875766e-05, | |
| "loss": 0.4689, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 2.968486212718064, | |
| "grad_norm": 25.162149919783538, | |
| "learning_rate": 8.08617672790901e-05, | |
| "loss": 0.7301, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.9712999437253798, | |
| "grad_norm": 1390.431135363237, | |
| "learning_rate": 8.064304461942257e-05, | |
| "loss": 0.5298, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 2.9741136747326955, | |
| "grad_norm": 51.62357269235231, | |
| "learning_rate": 8.042432195975503e-05, | |
| "loss": 0.3141, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 2.9769274057400112, | |
| "grad_norm": 21.428468158450375, | |
| "learning_rate": 8.020559930008747e-05, | |
| "loss": 0.5803, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 2.979741136747327, | |
| "grad_norm": 12.693813240141665, | |
| "learning_rate": 7.998687664041994e-05, | |
| "loss": 0.7488, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 2.9825548677546427, | |
| "grad_norm": 67.35584313661865, | |
| "learning_rate": 7.97681539807524e-05, | |
| "loss": 0.3862, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.9853685987619585, | |
| "grad_norm": 75.47237178728545, | |
| "learning_rate": 7.954943132108485e-05, | |
| "loss": 0.5021, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 2.9881823297692742, | |
| "grad_norm": 3.6925131359371934, | |
| "learning_rate": 7.933070866141731e-05, | |
| "loss": 0.4324, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 2.99099606077659, | |
| "grad_norm": 11.919767665974996, | |
| "learning_rate": 7.911198600174978e-05, | |
| "loss": 0.4497, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 2.9938097917839057, | |
| "grad_norm": 84.62823094746291, | |
| "learning_rate": 7.889326334208222e-05, | |
| "loss": 0.7873, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 2.996623522791221, | |
| "grad_norm": 47.500675839083364, | |
| "learning_rate": 7.867454068241469e-05, | |
| "loss": 0.4138, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.9994372537985368, | |
| "grad_norm": 91.2796382898892, | |
| "learning_rate": 7.845581802274715e-05, | |
| "loss": 0.8051, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_0_f1": 0.6165994034041059, | |
| "eval_0_precision": 0.5338802795502887, | |
| "eval_0_recall": 0.7296511627906976, | |
| "eval_1_f1": 0.8262425447316105, | |
| "eval_1_precision": 0.8886418063633253, | |
| "eval_1_recall": 0.772031505424283, | |
| "eval_accuracy": 0.7608624274926125, | |
| "eval_loss": 0.5966796875, | |
| "eval_runtime": 467.0404, | |
| "eval_samples_per_second": 19.564, | |
| "eval_steps_per_second": 3.261, | |
| "step": 10662 | |
| }, | |
| { | |
| "epoch": 3.0022509848058525, | |
| "grad_norm": 7.707584508187591, | |
| "learning_rate": 7.823709536307962e-05, | |
| "loss": 0.2622, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 3.0050647158131683, | |
| "grad_norm": 51.54450574912668, | |
| "learning_rate": 7.801837270341206e-05, | |
| "loss": 0.4806, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 3.007878446820484, | |
| "grad_norm": 48.99869202794937, | |
| "learning_rate": 7.779965004374453e-05, | |
| "loss": 0.5045, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 3.0106921778277997, | |
| "grad_norm": 18.76026822260351, | |
| "learning_rate": 7.758092738407699e-05, | |
| "loss": 0.5507, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.0135059088351155, | |
| "grad_norm": 53.4574294020042, | |
| "learning_rate": 7.736220472440943e-05, | |
| "loss": 0.3936, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 3.0163196398424312, | |
| "grad_norm": 126.24362236004032, | |
| "learning_rate": 7.71434820647419e-05, | |
| "loss": 0.3933, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 3.019133370849747, | |
| "grad_norm": 124.38545215336664, | |
| "learning_rate": 7.692475940507437e-05, | |
| "loss": 0.6389, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 3.0219471018570623, | |
| "grad_norm": 55.57968201814324, | |
| "learning_rate": 7.670603674540681e-05, | |
| "loss": 0.5718, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 3.024760832864378, | |
| "grad_norm": 48.963769100707765, | |
| "learning_rate": 7.648731408573927e-05, | |
| "loss": 0.6291, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 3.0275745638716938, | |
| "grad_norm": 12.119240877657461, | |
| "learning_rate": 7.626859142607174e-05, | |
| "loss": 0.5235, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 3.0303882948790095, | |
| "grad_norm": 20.915222819776293, | |
| "learning_rate": 7.604986876640418e-05, | |
| "loss": 0.4706, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 3.0332020258863253, | |
| "grad_norm": 9.102773937759299, | |
| "learning_rate": 7.583114610673665e-05, | |
| "loss": 0.232, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 3.036015756893641, | |
| "grad_norm": 10.249613894191867, | |
| "learning_rate": 7.561242344706911e-05, | |
| "loss": 0.2995, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 3.0388294879009567, | |
| "grad_norm": 67.40961792746334, | |
| "learning_rate": 7.539370078740157e-05, | |
| "loss": 0.4895, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.0416432189082725, | |
| "grad_norm": 6.032355759360925, | |
| "learning_rate": 7.517497812773402e-05, | |
| "loss": 0.446, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 3.0444569499155882, | |
| "grad_norm": 86.16554645668533, | |
| "learning_rate": 7.495625546806648e-05, | |
| "loss": 0.5506, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 3.047270680922904, | |
| "grad_norm": 41.082998364664704, | |
| "learning_rate": 7.473753280839895e-05, | |
| "loss": 0.4396, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 3.0500844119302193, | |
| "grad_norm": 22.994047192754973, | |
| "learning_rate": 7.45188101487314e-05, | |
| "loss": 0.4426, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 3.052898142937535, | |
| "grad_norm": 52.522206777883255, | |
| "learning_rate": 7.430008748906386e-05, | |
| "loss": 0.1551, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 3.0557118739448508, | |
| "grad_norm": 27.72295078584995, | |
| "learning_rate": 7.408136482939632e-05, | |
| "loss": 0.4259, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 3.0585256049521665, | |
| "grad_norm": 2.357706254274233, | |
| "learning_rate": 7.386264216972878e-05, | |
| "loss": 0.4119, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 3.0613393359594823, | |
| "grad_norm": 6.85515022950724, | |
| "learning_rate": 7.364391951006125e-05, | |
| "loss": 0.5523, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 3.064153066966798, | |
| "grad_norm": 34.30181906133321, | |
| "learning_rate": 7.342519685039369e-05, | |
| "loss": 0.2721, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 3.0669667979741138, | |
| "grad_norm": 54.82144297390585, | |
| "learning_rate": 7.320647419072614e-05, | |
| "loss": 0.9236, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.0697805289814295, | |
| "grad_norm": 129.72620772003393, | |
| "learning_rate": 7.298775153105861e-05, | |
| "loss": 0.5392, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 3.0725942599887452, | |
| "grad_norm": 3.2339209805746716, | |
| "learning_rate": 7.276902887139107e-05, | |
| "loss": 0.488, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 3.0754079909960605, | |
| "grad_norm": 10.02255512138656, | |
| "learning_rate": 7.255030621172353e-05, | |
| "loss": 0.5758, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 3.0782217220033763, | |
| "grad_norm": 18.329541476019806, | |
| "learning_rate": 7.233158355205598e-05, | |
| "loss": 0.4897, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 3.081035453010692, | |
| "grad_norm": 24.650839029351474, | |
| "learning_rate": 7.211286089238844e-05, | |
| "loss": 0.6129, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 3.083849184018008, | |
| "grad_norm": 3.945394920205831, | |
| "learning_rate": 7.189413823272091e-05, | |
| "loss": 0.2788, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 3.0866629150253235, | |
| "grad_norm": 8.209532211869098, | |
| "learning_rate": 7.167541557305337e-05, | |
| "loss": 0.6177, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 3.0894766460326393, | |
| "grad_norm": 42.404772064384424, | |
| "learning_rate": 7.145669291338582e-05, | |
| "loss": 0.5027, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 3.092290377039955, | |
| "grad_norm": 71.08207219724257, | |
| "learning_rate": 7.123797025371828e-05, | |
| "loss": 0.4492, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 3.0951041080472708, | |
| "grad_norm": 24.630629898005367, | |
| "learning_rate": 7.101924759405074e-05, | |
| "loss": 0.6953, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.0979178390545865, | |
| "grad_norm": 28.59624153496924, | |
| "learning_rate": 7.080052493438319e-05, | |
| "loss": 0.4651, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 3.1007315700619023, | |
| "grad_norm": 10.750146726227943, | |
| "learning_rate": 7.058180227471566e-05, | |
| "loss": 0.4746, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 3.103545301069218, | |
| "grad_norm": 1.182063914178294, | |
| "learning_rate": 7.036307961504812e-05, | |
| "loss": 0.3329, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 3.1063590320765333, | |
| "grad_norm": 60.111489378012585, | |
| "learning_rate": 7.014435695538056e-05, | |
| "loss": 0.5148, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 3.109172763083849, | |
| "grad_norm": 9.162581119984912, | |
| "learning_rate": 6.992563429571303e-05, | |
| "loss": 0.5404, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 3.111986494091165, | |
| "grad_norm": 630.9338889817419, | |
| "learning_rate": 6.970691163604549e-05, | |
| "loss": 0.454, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 3.1148002250984805, | |
| "grad_norm": 423.243815972294, | |
| "learning_rate": 6.948818897637794e-05, | |
| "loss": 0.6842, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 3.1176139561057963, | |
| "grad_norm": 11.73241000732919, | |
| "learning_rate": 6.92694663167104e-05, | |
| "loss": 0.1829, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 3.120427687113112, | |
| "grad_norm": 13.659308573313247, | |
| "learning_rate": 6.905074365704286e-05, | |
| "loss": 0.6033, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 3.1232414181204278, | |
| "grad_norm": 4.2358714754973805, | |
| "learning_rate": 6.883202099737533e-05, | |
| "loss": 0.589, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.1260551491277435, | |
| "grad_norm": 0.783313539057735, | |
| "learning_rate": 6.861329833770778e-05, | |
| "loss": 0.2935, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 3.1288688801350593, | |
| "grad_norm": 66.7036473639581, | |
| "learning_rate": 6.839457567804024e-05, | |
| "loss": 0.7288, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 3.1316826111423746, | |
| "grad_norm": 49.44587487081624, | |
| "learning_rate": 6.81758530183727e-05, | |
| "loss": 0.5051, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 3.1344963421496903, | |
| "grad_norm": 82.08430399312728, | |
| "learning_rate": 6.795713035870515e-05, | |
| "loss": 0.7624, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 3.137310073157006, | |
| "grad_norm": 41.54423962771268, | |
| "learning_rate": 6.773840769903761e-05, | |
| "loss": 0.6681, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 3.140123804164322, | |
| "grad_norm": 6.054984593639571, | |
| "learning_rate": 6.751968503937008e-05, | |
| "loss": 0.4486, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 3.1429375351716375, | |
| "grad_norm": 42.03326946639902, | |
| "learning_rate": 6.730096237970254e-05, | |
| "loss": 0.3373, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 3.1457512661789533, | |
| "grad_norm": 61.9766842778273, | |
| "learning_rate": 6.708223972003498e-05, | |
| "loss": 0.8549, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 3.148564997186269, | |
| "grad_norm": 31.696383718846477, | |
| "learning_rate": 6.686351706036745e-05, | |
| "loss": 0.8351, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 3.1513787281935848, | |
| "grad_norm": 40.35191568698288, | |
| "learning_rate": 6.66447944006999e-05, | |
| "loss": 0.3437, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.1541924592009005, | |
| "grad_norm": 10.679496275076508, | |
| "learning_rate": 6.642607174103236e-05, | |
| "loss": 0.4696, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 3.1570061902082163, | |
| "grad_norm": 15.537348649384192, | |
| "learning_rate": 6.620734908136482e-05, | |
| "loss": 0.4651, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 3.159819921215532, | |
| "grad_norm": 1.5590651269600222, | |
| "learning_rate": 6.598862642169728e-05, | |
| "loss": 0.355, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 3.1626336522228473, | |
| "grad_norm": 46.340907086190306, | |
| "learning_rate": 6.576990376202975e-05, | |
| "loss": 0.3964, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 3.165447383230163, | |
| "grad_norm": 62.12320109474248, | |
| "learning_rate": 6.55511811023622e-05, | |
| "loss": 0.6063, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 3.168261114237479, | |
| "grad_norm": 19.69816239773773, | |
| "learning_rate": 6.533245844269466e-05, | |
| "loss": 0.3146, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 3.1710748452447945, | |
| "grad_norm": 73.39996557832582, | |
| "learning_rate": 6.511373578302711e-05, | |
| "loss": 0.5348, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 3.1738885762521103, | |
| "grad_norm": 26.160849500666586, | |
| "learning_rate": 6.489501312335957e-05, | |
| "loss": 0.2218, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 3.176702307259426, | |
| "grad_norm": 11.032717942104254, | |
| "learning_rate": 6.467629046369203e-05, | |
| "loss": 0.4841, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 3.179516038266742, | |
| "grad_norm": 22.122247659427618, | |
| "learning_rate": 6.44575678040245e-05, | |
| "loss": 0.3955, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 3.1823297692740575, | |
| "grad_norm": 17.431813666502595, | |
| "learning_rate": 6.423884514435695e-05, | |
| "loss": 0.4135, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 3.1851435002813733, | |
| "grad_norm": 20.230459201101173, | |
| "learning_rate": 6.402012248468941e-05, | |
| "loss": 0.3017, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 3.1879572312886886, | |
| "grad_norm": 0.40726803235691345, | |
| "learning_rate": 6.380139982502187e-05, | |
| "loss": 0.5075, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 3.1907709622960043, | |
| "grad_norm": 15.28283361018702, | |
| "learning_rate": 6.358267716535432e-05, | |
| "loss": 0.6175, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 3.19358469330332, | |
| "grad_norm": 15.820899507911468, | |
| "learning_rate": 6.33639545056868e-05, | |
| "loss": 0.7891, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 3.196398424310636, | |
| "grad_norm": 55.444795130680475, | |
| "learning_rate": 6.314523184601924e-05, | |
| "loss": 0.5612, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 3.1992121553179516, | |
| "grad_norm": 32.00507189372659, | |
| "learning_rate": 6.292650918635169e-05, | |
| "loss": 0.3554, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 3.2020258863252673, | |
| "grad_norm": 2.0628646280824503, | |
| "learning_rate": 6.270778652668416e-05, | |
| "loss": 0.4491, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 3.204839617332583, | |
| "grad_norm": 29.489592086070243, | |
| "learning_rate": 6.248906386701662e-05, | |
| "loss": 0.7655, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 3.207653348339899, | |
| "grad_norm": 1.6061570305998563, | |
| "learning_rate": 6.227034120734908e-05, | |
| "loss": 0.3726, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.2104670793472145, | |
| "grad_norm": 17.153820015143744, | |
| "learning_rate": 6.205161854768153e-05, | |
| "loss": 0.63, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 3.2132808103545303, | |
| "grad_norm": 12.877483190952468, | |
| "learning_rate": 6.183289588801399e-05, | |
| "loss": 0.2199, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 3.216094541361846, | |
| "grad_norm": 284.41612018123254, | |
| "learning_rate": 6.161417322834645e-05, | |
| "loss": 0.7118, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 3.2189082723691613, | |
| "grad_norm": 88.85724386333004, | |
| "learning_rate": 6.139545056867892e-05, | |
| "loss": 0.6572, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 3.221722003376477, | |
| "grad_norm": 116.43335458089302, | |
| "learning_rate": 6.119860017497812e-05, | |
| "loss": 0.3925, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 3.224535734383793, | |
| "grad_norm": 9.34218826766074, | |
| "learning_rate": 6.0979877515310585e-05, | |
| "loss": 0.4467, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 3.2273494653911086, | |
| "grad_norm": 5.47990408045989, | |
| "learning_rate": 6.0761154855643035e-05, | |
| "loss": 0.6459, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 3.2301631963984243, | |
| "grad_norm": 12.032870993467688, | |
| "learning_rate": 6.0542432195975505e-05, | |
| "loss": 0.4638, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 3.23297692740574, | |
| "grad_norm": 10.978821779199087, | |
| "learning_rate": 6.0323709536307955e-05, | |
| "loss": 0.4797, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 3.235790658413056, | |
| "grad_norm": 26.344873524302695, | |
| "learning_rate": 6.010498687664041e-05, | |
| "loss": 0.4618, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.2386043894203715, | |
| "grad_norm": 36.71335853765133, | |
| "learning_rate": 5.9886264216972874e-05, | |
| "loss": 0.4282, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 3.2414181204276873, | |
| "grad_norm": 64.57144789900413, | |
| "learning_rate": 5.966754155730533e-05, | |
| "loss": 0.3277, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 3.2442318514350026, | |
| "grad_norm": 1.4007879828021592, | |
| "learning_rate": 5.944881889763779e-05, | |
| "loss": 0.4915, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 3.2470455824423183, | |
| "grad_norm": 3.33308990516012, | |
| "learning_rate": 5.923009623797025e-05, | |
| "loss": 0.6223, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 3.249859313449634, | |
| "grad_norm": 13.935577439593432, | |
| "learning_rate": 5.901137357830271e-05, | |
| "loss": 0.2424, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 3.25267304445695, | |
| "grad_norm": 31.282567929182168, | |
| "learning_rate": 5.879265091863516e-05, | |
| "loss": 0.4164, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 3.2554867754642656, | |
| "grad_norm": 172.63519093501742, | |
| "learning_rate": 5.8573928258967627e-05, | |
| "loss": 0.6778, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 3.2583005064715813, | |
| "grad_norm": 97.99842872138487, | |
| "learning_rate": 5.835520559930008e-05, | |
| "loss": 0.8454, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 3.261114237478897, | |
| "grad_norm": 35.317874766103294, | |
| "learning_rate": 5.8136482939632546e-05, | |
| "loss": 0.695, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 3.263927968486213, | |
| "grad_norm": 16.91213310108752, | |
| "learning_rate": 5.793963254593175e-05, | |
| "loss": 0.484, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.2667416994935286, | |
| "grad_norm": 128.9963496379245, | |
| "learning_rate": 5.772090988626421e-05, | |
| "loss": 0.4257, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 3.2695554305008443, | |
| "grad_norm": 193.3853393236727, | |
| "learning_rate": 5.750218722659667e-05, | |
| "loss": 0.5854, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 3.27236916150816, | |
| "grad_norm": 35.31919731163349, | |
| "learning_rate": 5.728346456692913e-05, | |
| "loss": 0.9169, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 3.2751828925154753, | |
| "grad_norm": 156.87027960130746, | |
| "learning_rate": 5.706474190726159e-05, | |
| "loss": 0.7004, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 3.277996623522791, | |
| "grad_norm": 23.682213809912607, | |
| "learning_rate": 5.684601924759405e-05, | |
| "loss": 0.3195, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 3.280810354530107, | |
| "grad_norm": 132.4746326275145, | |
| "learning_rate": 5.66272965879265e-05, | |
| "loss": 0.6326, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 3.2836240855374226, | |
| "grad_norm": 63.40352213008167, | |
| "learning_rate": 5.640857392825897e-05, | |
| "loss": 0.347, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 3.2864378165447383, | |
| "grad_norm": 9.625739657480374, | |
| "learning_rate": 5.618985126859142e-05, | |
| "loss": 0.7811, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 3.289251547552054, | |
| "grad_norm": 13.280340033380412, | |
| "learning_rate": 5.5971128608923875e-05, | |
| "loss": 0.8523, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 3.29206527855937, | |
| "grad_norm": 13.726951029125418, | |
| "learning_rate": 5.575240594925634e-05, | |
| "loss": 0.5483, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 3.2948790095666856, | |
| "grad_norm": 3.853407952070311, | |
| "learning_rate": 5.5533683289588794e-05, | |
| "loss": 0.635, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 3.2976927405740013, | |
| "grad_norm": 32.63263843171223, | |
| "learning_rate": 5.531496062992125e-05, | |
| "loss": 0.2759, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 3.3005064715813166, | |
| "grad_norm": 17.37594206746597, | |
| "learning_rate": 5.5096237970253714e-05, | |
| "loss": 0.3167, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 3.3033202025886323, | |
| "grad_norm": 10.92450992851185, | |
| "learning_rate": 5.487751531058617e-05, | |
| "loss": 0.2056, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 3.306133933595948, | |
| "grad_norm": 18.621614545321687, | |
| "learning_rate": 5.4658792650918634e-05, | |
| "loss": 0.2418, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 3.308947664603264, | |
| "grad_norm": 365.3094150018103, | |
| "learning_rate": 5.444006999125109e-05, | |
| "loss": 0.8144, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 3.3117613956105796, | |
| "grad_norm": 0.31282059170952675, | |
| "learning_rate": 5.422134733158355e-05, | |
| "loss": 0.5051, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 3.3145751266178953, | |
| "grad_norm": 34.65838459619827, | |
| "learning_rate": 5.400262467191601e-05, | |
| "loss": 1.258, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 3.317388857625211, | |
| "grad_norm": 62.48964602346488, | |
| "learning_rate": 5.380577427821522e-05, | |
| "loss": 1.1, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 3.320202588632527, | |
| "grad_norm": 126.25946649997921, | |
| "learning_rate": 5.358705161854768e-05, | |
| "loss": 0.4886, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.3230163196398426, | |
| "grad_norm": 94.42597920438025, | |
| "learning_rate": 5.3368328958880136e-05, | |
| "loss": 0.6764, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 3.3258300506471583, | |
| "grad_norm": 29.13735782010915, | |
| "learning_rate": 5.314960629921259e-05, | |
| "loss": 1.008, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 3.328643781654474, | |
| "grad_norm": 8.634773437316992, | |
| "learning_rate": 5.2930883639545056e-05, | |
| "loss": 0.2147, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 3.3314575126617894, | |
| "grad_norm": 14.39857109864753, | |
| "learning_rate": 5.271216097987751e-05, | |
| "loss": 0.4097, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 3.334271243669105, | |
| "grad_norm": 23.035243755638188, | |
| "learning_rate": 5.249343832020997e-05, | |
| "loss": 0.6312, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 3.337084974676421, | |
| "grad_norm": 43.17451612005898, | |
| "learning_rate": 5.227471566054243e-05, | |
| "loss": 0.7843, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 3.3398987056837366, | |
| "grad_norm": 15.873553878518269, | |
| "learning_rate": 5.205599300087488e-05, | |
| "loss": 0.3799, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 3.3427124366910523, | |
| "grad_norm": 5.309526682904749, | |
| "learning_rate": 5.183727034120735e-05, | |
| "loss": 0.2318, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 3.345526167698368, | |
| "grad_norm": 1.3966056606002777, | |
| "learning_rate": 5.16185476815398e-05, | |
| "loss": 0.3297, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 3.348339898705684, | |
| "grad_norm": 3.8353966809516478, | |
| "learning_rate": 5.139982502187226e-05, | |
| "loss": 0.2181, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 3.3511536297129996, | |
| "grad_norm": 32.67366702302119, | |
| "learning_rate": 5.118110236220472e-05, | |
| "loss": 0.7795, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 3.3539673607203153, | |
| "grad_norm": 9.654076908853929, | |
| "learning_rate": 5.096237970253718e-05, | |
| "loss": 0.9812, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 3.3567810917276306, | |
| "grad_norm": 10.000700109531257, | |
| "learning_rate": 5.0743657042869634e-05, | |
| "loss": 0.4223, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 3.3595948227349464, | |
| "grad_norm": 34.13418422325413, | |
| "learning_rate": 5.05249343832021e-05, | |
| "loss": 0.4949, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 3.362408553742262, | |
| "grad_norm": 1.0985807347140457, | |
| "learning_rate": 5.0306211723534554e-05, | |
| "loss": 0.4653, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 3.365222284749578, | |
| "grad_norm": 168.6850360069934, | |
| "learning_rate": 5.008748906386701e-05, | |
| "loss": 0.6093, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 3.3680360157568936, | |
| "grad_norm": 12.961068610872767, | |
| "learning_rate": 4.9868766404199474e-05, | |
| "loss": 0.5953, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 3.3708497467642093, | |
| "grad_norm": 18.333361541841942, | |
| "learning_rate": 4.965004374453193e-05, | |
| "loss": 0.3427, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 3.373663477771525, | |
| "grad_norm": 15.018142235150822, | |
| "learning_rate": 4.9431321084864386e-05, | |
| "loss": 0.4499, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 3.376477208778841, | |
| "grad_norm": 58.35352085477518, | |
| "learning_rate": 4.921259842519685e-05, | |
| "loss": 0.2707, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.3792909397861566, | |
| "grad_norm": 27.84061313602778, | |
| "learning_rate": 4.8993875765529306e-05, | |
| "loss": 0.4568, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 3.3821046707934723, | |
| "grad_norm": 2.221696017278666, | |
| "learning_rate": 4.877515310586177e-05, | |
| "loss": 0.286, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 3.384918401800788, | |
| "grad_norm": 5.977444422857166, | |
| "learning_rate": 4.855643044619422e-05, | |
| "loss": 0.5759, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 3.3877321328081034, | |
| "grad_norm": 1.1311358791589952, | |
| "learning_rate": 4.8337707786526676e-05, | |
| "loss": 0.5304, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 3.390545863815419, | |
| "grad_norm": 16.413270716064826, | |
| "learning_rate": 4.811898512685914e-05, | |
| "loss": 0.608, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 3.393359594822735, | |
| "grad_norm": 76.93565566008341, | |
| "learning_rate": 4.7900262467191595e-05, | |
| "loss": 0.4058, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 3.3961733258300506, | |
| "grad_norm": 106.30305951256041, | |
| "learning_rate": 4.768153980752405e-05, | |
| "loss": 0.3392, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 3.3989870568373663, | |
| "grad_norm": 94.06687107005396, | |
| "learning_rate": 4.7462817147856515e-05, | |
| "loss": 0.5494, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 3.401800787844682, | |
| "grad_norm": 25.01577867392826, | |
| "learning_rate": 4.724409448818897e-05, | |
| "loss": 0.5204, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 3.404614518851998, | |
| "grad_norm": 4.99525052635641, | |
| "learning_rate": 4.702537182852143e-05, | |
| "loss": 0.4441, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 3.4074282498593136, | |
| "grad_norm": 16.108946786215625, | |
| "learning_rate": 4.680664916885389e-05, | |
| "loss": 0.3515, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 3.4102419808666293, | |
| "grad_norm": 99.7096719318533, | |
| "learning_rate": 4.658792650918635e-05, | |
| "loss": 0.4423, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 3.4130557118739446, | |
| "grad_norm": 85.07580217985975, | |
| "learning_rate": 4.636920384951881e-05, | |
| "loss": 0.7414, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 3.4158694428812604, | |
| "grad_norm": 81.73261731795073, | |
| "learning_rate": 4.615048118985127e-05, | |
| "loss": 0.6783, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 3.418683173888576, | |
| "grad_norm": 1172.4065465322012, | |
| "learning_rate": 4.5931758530183724e-05, | |
| "loss": 0.68, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 3.421496904895892, | |
| "grad_norm": 57.09784884634665, | |
| "learning_rate": 4.571303587051619e-05, | |
| "loss": 0.6683, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 3.4243106359032076, | |
| "grad_norm": 30.70562258358342, | |
| "learning_rate": 4.549431321084864e-05, | |
| "loss": 0.5569, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 3.4271243669105234, | |
| "grad_norm": 3.4088793578308696, | |
| "learning_rate": 4.527559055118109e-05, | |
| "loss": 0.6287, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 3.429938097917839, | |
| "grad_norm": 4.84948894552461, | |
| "learning_rate": 4.5056867891513556e-05, | |
| "loss": 0.4114, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 3.432751828925155, | |
| "grad_norm": 1.1825455646656198, | |
| "learning_rate": 4.483814523184601e-05, | |
| "loss": 1.039, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.4355655599324706, | |
| "grad_norm": 16.863474594043463, | |
| "learning_rate": 4.461942257217847e-05, | |
| "loss": 0.6146, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 3.4383792909397863, | |
| "grad_norm": 3.912209944210823, | |
| "learning_rate": 4.440069991251093e-05, | |
| "loss": 0.2781, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 3.441193021947102, | |
| "grad_norm": 161.80969730924826, | |
| "learning_rate": 4.418197725284339e-05, | |
| "loss": 0.5749, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 3.4440067529544174, | |
| "grad_norm": 10.500387209468151, | |
| "learning_rate": 4.3963254593175845e-05, | |
| "loss": 0.5682, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 3.446820483961733, | |
| "grad_norm": 84.92953757043959, | |
| "learning_rate": 4.374453193350831e-05, | |
| "loss": 0.4613, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 3.449634214969049, | |
| "grad_norm": 5.437611277992704, | |
| "learning_rate": 4.3525809273840765e-05, | |
| "loss": 0.7144, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 3.4524479459763646, | |
| "grad_norm": 28.101996466451755, | |
| "learning_rate": 4.330708661417323e-05, | |
| "loss": 0.5975, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 3.4552616769836804, | |
| "grad_norm": 57.3104056471748, | |
| "learning_rate": 4.3088363954505685e-05, | |
| "loss": 0.6467, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 3.458075407990996, | |
| "grad_norm": 31.907644355337986, | |
| "learning_rate": 4.286964129483814e-05, | |
| "loss": 0.3168, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 3.460889138998312, | |
| "grad_norm": 121.91686564959656, | |
| "learning_rate": 4.2650918635170604e-05, | |
| "loss": 0.8983, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 3.4637028700056276, | |
| "grad_norm": 44.536124621836905, | |
| "learning_rate": 4.243219597550306e-05, | |
| "loss": 0.6855, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 3.4665166010129433, | |
| "grad_norm": 6.7326531100755505, | |
| "learning_rate": 4.221347331583551e-05, | |
| "loss": 0.2299, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 3.4693303320202586, | |
| "grad_norm": 15.971843953513892, | |
| "learning_rate": 4.199475065616798e-05, | |
| "loss": 0.5481, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 3.4721440630275744, | |
| "grad_norm": 17.685542251021793, | |
| "learning_rate": 4.177602799650043e-05, | |
| "loss": 0.3566, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 3.47495779403489, | |
| "grad_norm": 0.7679243984887517, | |
| "learning_rate": 4.155730533683289e-05, | |
| "loss": 0.929, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 3.477771525042206, | |
| "grad_norm": 35.31835874044769, | |
| "learning_rate": 4.133858267716535e-05, | |
| "loss": 0.3099, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 3.4805852560495216, | |
| "grad_norm": 2.1302391276294474, | |
| "learning_rate": 4.1119860017497806e-05, | |
| "loss": 0.7195, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 3.4833989870568374, | |
| "grad_norm": 30.66376411827359, | |
| "learning_rate": 4.090113735783027e-05, | |
| "loss": 0.5912, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 3.486212718064153, | |
| "grad_norm": 2.7815220772073475, | |
| "learning_rate": 4.0682414698162726e-05, | |
| "loss": 0.311, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 3.489026449071469, | |
| "grad_norm": 10.446918971739123, | |
| "learning_rate": 4.046369203849518e-05, | |
| "loss": 0.3615, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.4918401800787846, | |
| "grad_norm": 4.63253449800804, | |
| "learning_rate": 4.0244969378827646e-05, | |
| "loss": 0.6542, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 3.4946539110861004, | |
| "grad_norm": 29.433364869299208, | |
| "learning_rate": 4.00262467191601e-05, | |
| "loss": 0.4585, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 3.497467642093416, | |
| "grad_norm": 49.618623837965174, | |
| "learning_rate": 3.980752405949256e-05, | |
| "loss": 0.5086, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 3.500281373100732, | |
| "grad_norm": 1.2525886950971519, | |
| "learning_rate": 3.958880139982502e-05, | |
| "loss": 0.2, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 3.503095104108047, | |
| "grad_norm": 35.560120749370476, | |
| "learning_rate": 3.937007874015748e-05, | |
| "loss": 0.9585, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 3.505908835115363, | |
| "grad_norm": 76.4695471070044, | |
| "learning_rate": 3.9151356080489935e-05, | |
| "loss": 0.6961, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 3.5087225661226786, | |
| "grad_norm": 17.129864527344232, | |
| "learning_rate": 3.89326334208224e-05, | |
| "loss": 0.3217, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 3.5115362971299944, | |
| "grad_norm": 5.729130422882109, | |
| "learning_rate": 3.871391076115485e-05, | |
| "loss": 0.7064, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 3.51435002813731, | |
| "grad_norm": 235.83586490561873, | |
| "learning_rate": 3.849518810148732e-05, | |
| "loss": 0.3649, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 3.517163759144626, | |
| "grad_norm": 2.2017595700585457, | |
| "learning_rate": 3.827646544181977e-05, | |
| "loss": 0.4434, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.5199774901519416, | |
| "grad_norm": 6.576317534322007, | |
| "learning_rate": 3.8057742782152224e-05, | |
| "loss": 0.5095, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 3.522791221159257, | |
| "grad_norm": 1.6164548073339011, | |
| "learning_rate": 3.783902012248469e-05, | |
| "loss": 0.6829, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 3.5256049521665727, | |
| "grad_norm": 3.480112918705946, | |
| "learning_rate": 3.7620297462817144e-05, | |
| "loss": 0.9486, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 3.5284186831738884, | |
| "grad_norm": 92.6860952958962, | |
| "learning_rate": 3.740157480314961e-05, | |
| "loss": 0.4618, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 3.531232414181204, | |
| "grad_norm": 1.8127678229329505, | |
| "learning_rate": 3.718285214348206e-05, | |
| "loss": 0.5044, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 3.53404614518852, | |
| "grad_norm": 11.49210167806751, | |
| "learning_rate": 3.696412948381452e-05, | |
| "loss": 0.7462, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 3.5368598761958356, | |
| "grad_norm": 22.188651197414536, | |
| "learning_rate": 3.6745406824146976e-05, | |
| "loss": 0.3461, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 3.5396736072031514, | |
| "grad_norm": 31.842658386387633, | |
| "learning_rate": 3.652668416447944e-05, | |
| "loss": 0.3388, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 3.542487338210467, | |
| "grad_norm": 203.1349459704412, | |
| "learning_rate": 3.6307961504811896e-05, | |
| "loss": 0.3437, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 3.545301069217783, | |
| "grad_norm": 10.252277397482308, | |
| "learning_rate": 3.608923884514435e-05, | |
| "loss": 0.1386, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 3.5481148002250986, | |
| "grad_norm": 355.9201349404873, | |
| "learning_rate": 3.5870516185476816e-05, | |
| "loss": 0.2346, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 3.5509285312324144, | |
| "grad_norm": 48.7713276890867, | |
| "learning_rate": 3.565179352580927e-05, | |
| "loss": 0.6602, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 3.55374226223973, | |
| "grad_norm": 0.5124599339952361, | |
| "learning_rate": 3.543307086614173e-05, | |
| "loss": 0.4634, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 3.556555993247046, | |
| "grad_norm": 75.75380478169932, | |
| "learning_rate": 3.5214348206474185e-05, | |
| "loss": 0.9834, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 3.559369724254361, | |
| "grad_norm": 5.9723791827847466, | |
| "learning_rate": 3.499562554680665e-05, | |
| "loss": 0.2721, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 3.562183455261677, | |
| "grad_norm": 1.339210154695292, | |
| "learning_rate": 3.4776902887139105e-05, | |
| "loss": 0.5833, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 3.5649971862689926, | |
| "grad_norm": 2.340812474612855, | |
| "learning_rate": 3.455818022747156e-05, | |
| "loss": 0.8534, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 3.5678109172763084, | |
| "grad_norm": 42.020492946977896, | |
| "learning_rate": 3.4339457567804024e-05, | |
| "loss": 0.4952, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 3.570624648283624, | |
| "grad_norm": 30.231287046941738, | |
| "learning_rate": 3.412073490813648e-05, | |
| "loss": 0.469, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 3.57343837929094, | |
| "grad_norm": 20.637898535782263, | |
| "learning_rate": 3.390201224846894e-05, | |
| "loss": 0.5748, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 3.5762521102982556, | |
| "grad_norm": 27.709789371715573, | |
| "learning_rate": 3.36832895888014e-05, | |
| "loss": 0.5487, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 3.579065841305571, | |
| "grad_norm": 1.4646876271019715, | |
| "learning_rate": 3.346456692913386e-05, | |
| "loss": 0.2549, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 3.5818795723128867, | |
| "grad_norm": 47.275662835213524, | |
| "learning_rate": 3.3245844269466313e-05, | |
| "loss": 0.331, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 3.5846933033202024, | |
| "grad_norm": 135.70744957937237, | |
| "learning_rate": 3.302712160979877e-05, | |
| "loss": 0.5608, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 3.587507034327518, | |
| "grad_norm": 78.57081420410536, | |
| "learning_rate": 3.280839895013123e-05, | |
| "loss": 0.2891, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 3.590320765334834, | |
| "grad_norm": 1.7707715411426224, | |
| "learning_rate": 3.258967629046369e-05, | |
| "loss": 0.345, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 3.5931344963421497, | |
| "grad_norm": 25.509662246815907, | |
| "learning_rate": 3.2370953630796146e-05, | |
| "loss": 0.8202, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 3.5959482273494654, | |
| "grad_norm": 36.869039202453266, | |
| "learning_rate": 3.215223097112861e-05, | |
| "loss": 0.4163, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 3.598761958356781, | |
| "grad_norm": 2.193698658946938, | |
| "learning_rate": 3.1933508311461066e-05, | |
| "loss": 0.4813, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 3.601575689364097, | |
| "grad_norm": 86.82251364819027, | |
| "learning_rate": 3.171478565179352e-05, | |
| "loss": 0.2963, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 3.6043894203714126, | |
| "grad_norm": 175.32090889794566, | |
| "learning_rate": 3.149606299212598e-05, | |
| "loss": 0.4121, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 3.6072031513787284, | |
| "grad_norm": 77.92419912338526, | |
| "learning_rate": 3.127734033245844e-05, | |
| "loss": 0.6049, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 3.610016882386044, | |
| "grad_norm": 29.416631197885483, | |
| "learning_rate": 3.10586176727909e-05, | |
| "loss": 0.4979, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 3.61283061339336, | |
| "grad_norm": 11.168327597970197, | |
| "learning_rate": 3.0839895013123355e-05, | |
| "loss": 0.7981, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 3.615644344400675, | |
| "grad_norm": 5.295416735323613, | |
| "learning_rate": 3.062117235345582e-05, | |
| "loss": 0.5611, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 3.618458075407991, | |
| "grad_norm": 20.607563312252314, | |
| "learning_rate": 3.0402449693788275e-05, | |
| "loss": 0.4421, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 3.6212718064153067, | |
| "grad_norm": 79.3047250915384, | |
| "learning_rate": 3.018372703412073e-05, | |
| "loss": 0.6417, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 3.6240855374226224, | |
| "grad_norm": 51.360432585204684, | |
| "learning_rate": 2.996500437445319e-05, | |
| "loss": 0.4794, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 3.626899268429938, | |
| "grad_norm": 18.193700637933883, | |
| "learning_rate": 2.974628171478565e-05, | |
| "loss": 0.6764, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 3.629712999437254, | |
| "grad_norm": 13.2158616023827, | |
| "learning_rate": 2.952755905511811e-05, | |
| "loss": 0.561, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.6325267304445696, | |
| "grad_norm": 39.6048512902133, | |
| "learning_rate": 2.9308836395450564e-05, | |
| "loss": 0.5969, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 3.635340461451885, | |
| "grad_norm": 119.25617194048463, | |
| "learning_rate": 2.9090113735783023e-05, | |
| "loss": 0.5537, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 3.6381541924592007, | |
| "grad_norm": 17.312325283917904, | |
| "learning_rate": 2.8871391076115483e-05, | |
| "loss": 0.2931, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 3.6409679234665164, | |
| "grad_norm": 30.668034379631603, | |
| "learning_rate": 2.865266841644794e-05, | |
| "loss": 0.7078, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 3.643781654473832, | |
| "grad_norm": 45.124842339660304, | |
| "learning_rate": 2.84339457567804e-05, | |
| "loss": 0.5132, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 3.646595385481148, | |
| "grad_norm": 15.881149948027138, | |
| "learning_rate": 2.821522309711286e-05, | |
| "loss": 0.6237, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 3.6494091164884637, | |
| "grad_norm": 23.94430535891449, | |
| "learning_rate": 2.799650043744532e-05, | |
| "loss": 0.2053, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 3.6522228474957794, | |
| "grad_norm": 120.3735140731541, | |
| "learning_rate": 2.7777777777777772e-05, | |
| "loss": 0.3646, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 3.655036578503095, | |
| "grad_norm": 64.266595500627, | |
| "learning_rate": 2.7559055118110232e-05, | |
| "loss": 0.5375, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 3.657850309510411, | |
| "grad_norm": 6.0750969402208135, | |
| "learning_rate": 2.7340332458442692e-05, | |
| "loss": 1.0836, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.6606640405177266, | |
| "grad_norm": 74.7234722768726, | |
| "learning_rate": 2.7121609798775152e-05, | |
| "loss": 0.5668, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 3.6634777715250424, | |
| "grad_norm": 39.41229028715555, | |
| "learning_rate": 2.690288713910761e-05, | |
| "loss": 0.2667, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 3.666291502532358, | |
| "grad_norm": 3.053090186442824, | |
| "learning_rate": 2.6684164479440068e-05, | |
| "loss": 0.4439, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 3.669105233539674, | |
| "grad_norm": 71.21754074911294, | |
| "learning_rate": 2.6465441819772528e-05, | |
| "loss": 0.6616, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 3.671918964546989, | |
| "grad_norm": 45.19877536600059, | |
| "learning_rate": 2.6246719160104984e-05, | |
| "loss": 0.4174, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 3.674732695554305, | |
| "grad_norm": 372.5488030536916, | |
| "learning_rate": 2.602799650043744e-05, | |
| "loss": 0.5175, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 3.6775464265616207, | |
| "grad_norm": 57.57663292368472, | |
| "learning_rate": 2.58092738407699e-05, | |
| "loss": 0.8581, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 3.6803601575689364, | |
| "grad_norm": 1.7083829355501452, | |
| "learning_rate": 2.559055118110236e-05, | |
| "loss": 0.681, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 3.683173888576252, | |
| "grad_norm": 6.539997476072728, | |
| "learning_rate": 2.5371828521434817e-05, | |
| "loss": 0.1846, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 3.685987619583568, | |
| "grad_norm": 207.30300737141843, | |
| "learning_rate": 2.5153105861767277e-05, | |
| "loss": 0.3291, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.6888013505908837, | |
| "grad_norm": 83.28340761635474, | |
| "learning_rate": 2.4934383202099737e-05, | |
| "loss": 0.3801, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 3.691615081598199, | |
| "grad_norm": 29.331313165726307, | |
| "learning_rate": 2.4715660542432193e-05, | |
| "loss": 0.8411, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 3.6944288126055147, | |
| "grad_norm": 40.83094333479217, | |
| "learning_rate": 2.4496937882764653e-05, | |
| "loss": 0.2844, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 3.6972425436128304, | |
| "grad_norm": 282.66752780327295, | |
| "learning_rate": 2.427821522309711e-05, | |
| "loss": 0.7839, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 3.700056274620146, | |
| "grad_norm": 79.40867859040115, | |
| "learning_rate": 2.405949256342957e-05, | |
| "loss": 0.4881, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 3.702870005627462, | |
| "grad_norm": 14.039173501520008, | |
| "learning_rate": 2.3840769903762026e-05, | |
| "loss": 0.3336, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 3.7056837366347777, | |
| "grad_norm": 2.66570396278435, | |
| "learning_rate": 2.3622047244094486e-05, | |
| "loss": 0.4861, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 3.7084974676420934, | |
| "grad_norm": 297.6891719203325, | |
| "learning_rate": 2.3403324584426946e-05, | |
| "loss": 0.5156, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 3.711311198649409, | |
| "grad_norm": 42.44776195786567, | |
| "learning_rate": 2.3184601924759405e-05, | |
| "loss": 0.8828, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 3.714124929656725, | |
| "grad_norm": 28.455047894378716, | |
| "learning_rate": 2.2965879265091862e-05, | |
| "loss": 0.8758, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.7169386606640407, | |
| "grad_norm": 33.07750573387209, | |
| "learning_rate": 2.274715660542432e-05, | |
| "loss": 0.5996, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 3.7197523916713564, | |
| "grad_norm": 0.9938889512329678, | |
| "learning_rate": 2.2528433945756778e-05, | |
| "loss": 0.2031, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 3.722566122678672, | |
| "grad_norm": 23.80061424167977, | |
| "learning_rate": 2.2309711286089235e-05, | |
| "loss": 0.4923, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 3.725379853685988, | |
| "grad_norm": 1.131933505943834, | |
| "learning_rate": 2.2090988626421694e-05, | |
| "loss": 0.3908, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 3.728193584693303, | |
| "grad_norm": 446.7276314043673, | |
| "learning_rate": 2.1872265966754154e-05, | |
| "loss": 0.7402, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 3.731007315700619, | |
| "grad_norm": 1206.2342773424812, | |
| "learning_rate": 2.1653543307086614e-05, | |
| "loss": 0.2547, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 3.7338210467079347, | |
| "grad_norm": 1.7651821391803046, | |
| "learning_rate": 2.143482064741907e-05, | |
| "loss": 0.4101, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 3.7366347777152504, | |
| "grad_norm": 94.1699997933736, | |
| "learning_rate": 2.121609798775153e-05, | |
| "loss": 0.614, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 3.739448508722566, | |
| "grad_norm": 17.845824880937165, | |
| "learning_rate": 2.099737532808399e-05, | |
| "loss": 0.4902, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 3.742262239729882, | |
| "grad_norm": 26.896154000409293, | |
| "learning_rate": 2.0778652668416443e-05, | |
| "loss": 0.4603, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 3.7450759707371977, | |
| "grad_norm": 14.565889324285259, | |
| "learning_rate": 2.0559930008748903e-05, | |
| "loss": 0.5054, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 3.747889701744513, | |
| "grad_norm": 5.844233357741922, | |
| "learning_rate": 2.0341207349081363e-05, | |
| "loss": 0.4795, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 3.7507034327518287, | |
| "grad_norm": 206.0939101443417, | |
| "learning_rate": 2.0122484689413823e-05, | |
| "loss": 0.5896, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 3.7535171637591445, | |
| "grad_norm": 65.3190773750316, | |
| "learning_rate": 1.990376202974628e-05, | |
| "loss": 0.292, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 3.75633089476646, | |
| "grad_norm": 12.321249888093957, | |
| "learning_rate": 1.968503937007874e-05, | |
| "loss": 0.5816, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 3.759144625773776, | |
| "grad_norm": 20.741649400875783, | |
| "learning_rate": 1.94663167104112e-05, | |
| "loss": 0.2388, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 3.7619583567810917, | |
| "grad_norm": 2.622559284102868, | |
| "learning_rate": 1.924759405074366e-05, | |
| "loss": 0.6685, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 3.7647720877884074, | |
| "grad_norm": 23.160980070983843, | |
| "learning_rate": 1.9028871391076112e-05, | |
| "loss": 1.0501, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 3.767585818795723, | |
| "grad_norm": 87.1185595021463, | |
| "learning_rate": 1.8810148731408572e-05, | |
| "loss": 0.9589, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 3.770399549803039, | |
| "grad_norm": 30.316621338983605, | |
| "learning_rate": 1.859142607174103e-05, | |
| "loss": 0.2501, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 3.7732132808103547, | |
| "grad_norm": 23.92001820439982, | |
| "learning_rate": 1.8372703412073488e-05, | |
| "loss": 0.7739, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 3.7760270118176704, | |
| "grad_norm": 8.480682588233163, | |
| "learning_rate": 1.8153980752405948e-05, | |
| "loss": 0.4317, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 3.778840742824986, | |
| "grad_norm": 11.901124707133304, | |
| "learning_rate": 1.7935258092738408e-05, | |
| "loss": 0.2882, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 3.7816544738323015, | |
| "grad_norm": 121.66628180316688, | |
| "learning_rate": 1.7716535433070864e-05, | |
| "loss": 0.4091, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 3.784468204839617, | |
| "grad_norm": 23.934650131977175, | |
| "learning_rate": 1.7497812773403324e-05, | |
| "loss": 0.6122, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 3.787281935846933, | |
| "grad_norm": 68.18789104905484, | |
| "learning_rate": 1.727909011373578e-05, | |
| "loss": 0.5914, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 3.7900956668542487, | |
| "grad_norm": 35.60052500777, | |
| "learning_rate": 1.706036745406824e-05, | |
| "loss": 0.4227, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 3.7929093978615644, | |
| "grad_norm": 17.85284058651301, | |
| "learning_rate": 1.68416447944007e-05, | |
| "loss": 0.1204, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 3.79572312886888, | |
| "grad_norm": 2.4161722414635207, | |
| "learning_rate": 1.6622922134733157e-05, | |
| "loss": 0.5022, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 3.798536859876196, | |
| "grad_norm": 5.928187046017908, | |
| "learning_rate": 1.6404199475065617e-05, | |
| "loss": 0.6874, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.8013505908835117, | |
| "grad_norm": 63.792368045524945, | |
| "learning_rate": 1.6185476815398073e-05, | |
| "loss": 0.6038, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 3.804164321890827, | |
| "grad_norm": 13.41769308503903, | |
| "learning_rate": 1.5966754155730533e-05, | |
| "loss": 0.6469, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 3.8069780528981427, | |
| "grad_norm": 57.530424477841166, | |
| "learning_rate": 1.574803149606299e-05, | |
| "loss": 0.9088, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 3.8097917839054585, | |
| "grad_norm": 3.33259141334449, | |
| "learning_rate": 1.552930883639545e-05, | |
| "loss": 0.4786, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 3.812605514912774, | |
| "grad_norm": 119.02146057750649, | |
| "learning_rate": 1.531058617672791e-05, | |
| "loss": 0.6791, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 3.81541924592009, | |
| "grad_norm": 67.38672458073057, | |
| "learning_rate": 1.5091863517060365e-05, | |
| "loss": 0.4585, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 3.8182329769274057, | |
| "grad_norm": 104.08605683069419, | |
| "learning_rate": 1.4873140857392825e-05, | |
| "loss": 0.4812, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 3.8210467079347215, | |
| "grad_norm": 192.5384876170663, | |
| "learning_rate": 1.4654418197725282e-05, | |
| "loss": 0.4261, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 3.823860438942037, | |
| "grad_norm": 10.79902650951375, | |
| "learning_rate": 1.4435695538057742e-05, | |
| "loss": 0.3543, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 3.826674169949353, | |
| "grad_norm": 422.3326786781863, | |
| "learning_rate": 1.42169728783902e-05, | |
| "loss": 0.3316, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 3.8294879009566687, | |
| "grad_norm": 28.877601143071427, | |
| "learning_rate": 1.399825021872266e-05, | |
| "loss": 0.4107, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 3.8323016319639844, | |
| "grad_norm": 72.5714412046621, | |
| "learning_rate": 1.3779527559055116e-05, | |
| "loss": 0.5007, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 3.8351153629713, | |
| "grad_norm": 25.755963857052215, | |
| "learning_rate": 1.3560804899387576e-05, | |
| "loss": 0.6402, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 3.8379290939786155, | |
| "grad_norm": 1.533241984306202, | |
| "learning_rate": 1.3342082239720034e-05, | |
| "loss": 0.6787, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 3.8407428249859312, | |
| "grad_norm": 2.439663157954251, | |
| "learning_rate": 1.3123359580052492e-05, | |
| "loss": 0.3079, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 3.843556555993247, | |
| "grad_norm": 32.38387188806373, | |
| "learning_rate": 1.290463692038495e-05, | |
| "loss": 0.7875, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 3.8463702870005627, | |
| "grad_norm": 7.353540981294603, | |
| "learning_rate": 1.2685914260717409e-05, | |
| "loss": 0.5157, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 3.8491840180078785, | |
| "grad_norm": 11.82603640490557, | |
| "learning_rate": 1.2467191601049868e-05, | |
| "loss": 0.4921, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 3.851997749015194, | |
| "grad_norm": 2.7248783614576997, | |
| "learning_rate": 1.2248468941382327e-05, | |
| "loss": 0.437, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 3.85481148002251, | |
| "grad_norm": 342.82499995014996, | |
| "learning_rate": 1.2029746281714785e-05, | |
| "loss": 0.8291, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 3.8576252110298257, | |
| "grad_norm": 28.762266291398717, | |
| "learning_rate": 1.1811023622047243e-05, | |
| "loss": 0.7494, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 3.860438942037141, | |
| "grad_norm": 19.46284732459688, | |
| "learning_rate": 1.1592300962379703e-05, | |
| "loss": 0.3428, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 3.8632526730444567, | |
| "grad_norm": 11.122833006077931, | |
| "learning_rate": 1.137357830271216e-05, | |
| "loss": 0.8361, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 3.8660664040517725, | |
| "grad_norm": 12.12059180019161, | |
| "learning_rate": 1.1154855643044617e-05, | |
| "loss": 0.3567, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 3.8688801350590882, | |
| "grad_norm": 31.30428497306991, | |
| "learning_rate": 1.0936132983377077e-05, | |
| "loss": 0.5901, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 3.871693866066404, | |
| "grad_norm": 0.702699931126514, | |
| "learning_rate": 1.0717410323709535e-05, | |
| "loss": 0.2538, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 3.8745075970737197, | |
| "grad_norm": 1.0957739187938698, | |
| "learning_rate": 1.0498687664041995e-05, | |
| "loss": 0.3035, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 3.8773213280810355, | |
| "grad_norm": 26.16763388816232, | |
| "learning_rate": 1.0279965004374452e-05, | |
| "loss": 1.0008, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 3.880135059088351, | |
| "grad_norm": 7.266284196954616, | |
| "learning_rate": 1.0061242344706911e-05, | |
| "loss": 0.6148, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 3.882948790095667, | |
| "grad_norm": 1.280410994175035, | |
| "learning_rate": 9.84251968503937e-06, | |
| "loss": 0.31, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.8857625211029827, | |
| "grad_norm": 11.332894345972404, | |
| "learning_rate": 9.62379702537183e-06, | |
| "loss": 0.3175, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 3.8885762521102984, | |
| "grad_norm": 2.349285991354639, | |
| "learning_rate": 9.405074365704286e-06, | |
| "loss": 0.5999, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 3.891389983117614, | |
| "grad_norm": 0.3849237703681145, | |
| "learning_rate": 9.186351706036744e-06, | |
| "loss": 0.5271, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 3.8942037141249295, | |
| "grad_norm": 2.644331997108355, | |
| "learning_rate": 8.967629046369204e-06, | |
| "loss": 0.5702, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 3.8970174451322452, | |
| "grad_norm": 159.62015783602538, | |
| "learning_rate": 8.748906386701662e-06, | |
| "loss": 0.6812, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 3.899831176139561, | |
| "grad_norm": 9.77439075886051, | |
| "learning_rate": 8.53018372703412e-06, | |
| "loss": 0.9331, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 3.9026449071468767, | |
| "grad_norm": 9.517338120917794, | |
| "learning_rate": 8.311461067366578e-06, | |
| "loss": 0.3774, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 3.9054586381541925, | |
| "grad_norm": 1.2035309598986415, | |
| "learning_rate": 8.092738407699037e-06, | |
| "loss": 0.4958, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 3.908272369161508, | |
| "grad_norm": 22.577502570103032, | |
| "learning_rate": 7.874015748031495e-06, | |
| "loss": 0.42, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 3.911086100168824, | |
| "grad_norm": 10.830002884736738, | |
| "learning_rate": 7.655293088363955e-06, | |
| "loss": 0.4453, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 3.9138998311761397, | |
| "grad_norm": 186.4903752602189, | |
| "learning_rate": 7.436570428696413e-06, | |
| "loss": 0.5316, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 3.916713562183455, | |
| "grad_norm": 255.53257946868905, | |
| "learning_rate": 7.217847769028871e-06, | |
| "loss": 0.4243, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 3.9195272931907708, | |
| "grad_norm": 66.1440334798085, | |
| "learning_rate": 6.99912510936133e-06, | |
| "loss": 0.5682, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 3.9223410241980865, | |
| "grad_norm": 22.65274232742973, | |
| "learning_rate": 6.780402449693788e-06, | |
| "loss": 0.4842, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 3.9251547552054022, | |
| "grad_norm": 32.73205022090457, | |
| "learning_rate": 6.561679790026246e-06, | |
| "loss": 0.6708, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 3.927968486212718, | |
| "grad_norm": 88.75097655712725, | |
| "learning_rate": 6.342957130358704e-06, | |
| "loss": 0.4971, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 3.9307822172200337, | |
| "grad_norm": 124.77660806239149, | |
| "learning_rate": 6.124234470691163e-06, | |
| "loss": 0.8132, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 3.9335959482273495, | |
| "grad_norm": 52.88677822081236, | |
| "learning_rate": 5.905511811023621e-06, | |
| "loss": 0.498, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 3.9364096792346652, | |
| "grad_norm": 18.73784553764002, | |
| "learning_rate": 5.68678915135608e-06, | |
| "loss": 0.4054, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 3.939223410241981, | |
| "grad_norm": 22.54489264494174, | |
| "learning_rate": 5.4680664916885386e-06, | |
| "loss": 0.5623, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.9420371412492967, | |
| "grad_norm": 1.1389150302633293, | |
| "learning_rate": 5.2493438320209976e-06, | |
| "loss": 0.5645, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 3.9448508722566125, | |
| "grad_norm": 0.7575054525220537, | |
| "learning_rate": 5.030621172353456e-06, | |
| "loss": 0.5291, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 3.947664603263928, | |
| "grad_norm": 14.757391649488548, | |
| "learning_rate": 4.811898512685915e-06, | |
| "loss": 0.3024, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 3.9504783342712435, | |
| "grad_norm": 2.224238901964847, | |
| "learning_rate": 4.593175853018372e-06, | |
| "loss": 0.5885, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 3.9532920652785593, | |
| "grad_norm": 52.85370797076157, | |
| "learning_rate": 4.374453193350831e-06, | |
| "loss": 0.6216, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 3.956105796285875, | |
| "grad_norm": 4.926366273654347, | |
| "learning_rate": 4.155730533683289e-06, | |
| "loss": 0.8768, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 3.9589195272931907, | |
| "grad_norm": 66.74702595575317, | |
| "learning_rate": 3.937007874015747e-06, | |
| "loss": 0.8456, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 3.9617332583005065, | |
| "grad_norm": 108.65195259765787, | |
| "learning_rate": 3.7182852143482063e-06, | |
| "loss": 0.5933, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 3.9645469893078222, | |
| "grad_norm": 2.067190403694772, | |
| "learning_rate": 3.499562554680665e-06, | |
| "loss": 0.4804, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 3.967360720315138, | |
| "grad_norm": 49.20433018245054, | |
| "learning_rate": 3.280839895013123e-06, | |
| "loss": 0.8341, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 3.9701744513224537, | |
| "grad_norm": 18.11378428616437, | |
| "learning_rate": 3.0621172353455816e-06, | |
| "loss": 0.1358, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 3.972988182329769, | |
| "grad_norm": 20.757156258325487, | |
| "learning_rate": 2.84339457567804e-06, | |
| "loss": 0.4837, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 3.9758019133370848, | |
| "grad_norm": 82.24151004002375, | |
| "learning_rate": 2.6246719160104988e-06, | |
| "loss": 0.4444, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 3.9786156443444005, | |
| "grad_norm": 54.81837261410593, | |
| "learning_rate": 2.4059492563429574e-06, | |
| "loss": 0.4779, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 3.9814293753517163, | |
| "grad_norm": 54.39145269424402, | |
| "learning_rate": 2.1872265966754155e-06, | |
| "loss": 0.5289, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 3.984243106359032, | |
| "grad_norm": 94.0430998911988, | |
| "learning_rate": 1.9685039370078737e-06, | |
| "loss": 0.4829, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 3.9870568373663478, | |
| "grad_norm": 7.233363364036652, | |
| "learning_rate": 1.7497812773403325e-06, | |
| "loss": 0.214, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 3.9898705683736635, | |
| "grad_norm": 9.096609714666887, | |
| "learning_rate": 1.5310586176727908e-06, | |
| "loss": 0.759, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 3.9926842993809792, | |
| "grad_norm": 738.9111636480206, | |
| "learning_rate": 1.3123359580052494e-06, | |
| "loss": 0.4992, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 3.995498030388295, | |
| "grad_norm": 38.0307073875718, | |
| "learning_rate": 1.0936132983377078e-06, | |
| "loss": 0.2457, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 3.9983117613956107, | |
| "grad_norm": 216.92497095466356, | |
| "learning_rate": 8.748906386701662e-07, | |
| "loss": 0.3663, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_0_f1": 0.6524883028498512, | |
| "eval_0_precision": 0.6687009590235397, | |
| "eval_0_recall": 0.6370431893687708, | |
| "eval_1_f1": 0.8796050692602416, | |
| "eval_1_precision": 0.8722782405377758, | |
| "eval_1_recall": 0.8870560261554465, | |
| "eval_accuracy": 0.8211666849075189, | |
| "eval_loss": 0.7568359375, | |
| "eval_runtime": 468.6647, | |
| "eval_samples_per_second": 19.496, | |
| "eval_steps_per_second": 3.25, | |
| "step": 14216 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 14216, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 2.6717228113906893e+17, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |