{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15109409903477533, "eval_steps": 500, "global_step": 102000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.8999468632239884e-05, "grad_norm": 334.0, "learning_rate": 1.9e-05, "loss": 211.552, "step": 20 }, { "epoch": 7.799893726447977e-05, "grad_norm": 56.75, "learning_rate": 3.9e-05, "loss": 152.8776, "step": 40 }, { "epoch": 0.00011699840589671966, "grad_norm": 40.5, "learning_rate": 5.9e-05, "loss": 143.1569, "step": 60 }, { "epoch": 0.00015599787452895953, "grad_norm": 68.5, "learning_rate": 7.9e-05, "loss": 138.0697, "step": 80 }, { "epoch": 0.00019499734316119944, "grad_norm": 140.0, "learning_rate": 9.900000000000001e-05, "loss": 135.1088, "step": 100 }, { "epoch": 0.00023399681179343931, "grad_norm": 125.0, "learning_rate": 0.00011899999999999999, "loss": 131.6139, "step": 120 }, { "epoch": 0.0002729962804256792, "grad_norm": 106.5, "learning_rate": 0.00013900000000000002, "loss": 128.0376, "step": 140 }, { "epoch": 0.00031199574905791907, "grad_norm": 126.0, "learning_rate": 0.00015900000000000002, "loss": 123.8792, "step": 160 }, { "epoch": 0.00035099521769015897, "grad_norm": 168.0, "learning_rate": 0.000179, "loss": 119.9348, "step": 180 }, { "epoch": 0.0003899946863223989, "grad_norm": 148.0, "learning_rate": 0.000199, "loss": 116.157, "step": 200 }, { "epoch": 0.0004289941549546387, "grad_norm": 86.0, "learning_rate": 0.000219, "loss": 111.9886, "step": 220 }, { "epoch": 0.00046799362358687863, "grad_norm": 72.0, "learning_rate": 0.00023899999999999998, "loss": 108.1426, "step": 240 }, { "epoch": 0.0005069930922191185, "grad_norm": 126.5, "learning_rate": 0.000259, "loss": 104.5722, "step": 260 }, { "epoch": 0.0005459925608513584, "grad_norm": 178.0, "learning_rate": 0.000279, "loss": 100.9645, "step": 280 }, { "epoch": 0.0005849920294835983, "grad_norm": 186.0, "learning_rate": 0.000299, "loss": 98.4827, "step": 300 }, { "epoch": 0.0006239914981158381, "grad_norm": 71.5, "learning_rate": 0.000319, "loss": 96.0813, "step": 320 }, { "epoch": 0.0006629909667480781, "grad_norm": 202.0, "learning_rate": 0.00033900000000000005, "loss": 93.5772, "step": 340 }, { "epoch": 0.0007019904353803179, "grad_norm": 136.0, "learning_rate": 0.000359, "loss": 91.1229, "step": 360 }, { "epoch": 0.0007409899040125578, "grad_norm": 77.5, "learning_rate": 0.000379, "loss": 88.9925, "step": 380 }, { "epoch": 0.0007799893726447978, "grad_norm": 51.5, "learning_rate": 0.00039900000000000005, "loss": 86.1655, "step": 400 }, { "epoch": 0.0008189888412770376, "grad_norm": 57.5, "learning_rate": 0.000419, "loss": 83.4643, "step": 420 }, { "epoch": 0.0008579883099092775, "grad_norm": 72.0, "learning_rate": 0.000439, "loss": 81.3219, "step": 440 }, { "epoch": 0.0008969877785415174, "grad_norm": 77.0, "learning_rate": 0.00045900000000000004, "loss": 78.8648, "step": 460 }, { "epoch": 0.0009359872471737573, "grad_norm": 108.0, "learning_rate": 0.000479, "loss": 76.6103, "step": 480 }, { "epoch": 0.0009749867158059971, "grad_norm": 125.0, "learning_rate": 0.000499, "loss": 73.6485, "step": 500 }, { "epoch": 0.001013986184438237, "grad_norm": 88.5, "learning_rate": 0.0004999938230706361, "loss": 71.5323, "step": 520 }, { "epoch": 0.0010529856530704768, "grad_norm": 97.5, "learning_rate": 0.0004999873210397267, "loss": 69.3097, "step": 540 }, { "epoch": 0.0010919851217027169, "grad_norm": 92.5, "learning_rate": 0.0004999808190088174, "loss": 67.243, "step": 560 }, { "epoch": 0.0011309845903349567, "grad_norm": 65.5, "learning_rate": 0.000499974316977908, "loss": 65.3832, "step": 580 }, { "epoch": 0.0011699840589671966, "grad_norm": 113.0, "learning_rate": 0.0004999678149469987, "loss": 63.7967, "step": 600 }, { "epoch": 0.0012089835275994364, "grad_norm": 89.5, "learning_rate": 0.0004999613129160894, "loss": 62.3609, "step": 620 }, { "epoch": 0.0012479829962316763, "grad_norm": 51.0, "learning_rate": 0.00049995481088518, "loss": 60.9657, "step": 640 }, { "epoch": 0.0012869824648639161, "grad_norm": 48.0, "learning_rate": 0.0004999483088542707, "loss": 59.8919, "step": 660 }, { "epoch": 0.0013259819334961562, "grad_norm": 48.25, "learning_rate": 0.0004999418068233612, "loss": 58.5966, "step": 680 }, { "epoch": 0.001364981402128396, "grad_norm": 63.75, "learning_rate": 0.0004999353047924519, "loss": 57.6373, "step": 700 }, { "epoch": 0.0014039808707606359, "grad_norm": 57.5, "learning_rate": 0.0004999288027615425, "loss": 57.1306, "step": 720 }, { "epoch": 0.0014429803393928757, "grad_norm": 53.75, "learning_rate": 0.0004999223007306332, "loss": 55.8643, "step": 740 }, { "epoch": 0.0014819798080251156, "grad_norm": 43.0, "learning_rate": 0.0004999157986997238, "loss": 55.1049, "step": 760 }, { "epoch": 0.0015209792766573554, "grad_norm": 64.0, "learning_rate": 0.0004999092966688145, "loss": 54.4943, "step": 780 }, { "epoch": 0.0015599787452895955, "grad_norm": 52.25, "learning_rate": 0.0004999027946379052, "loss": 53.6, "step": 800 }, { "epoch": 0.0015989782139218354, "grad_norm": 52.75, "learning_rate": 0.0004998962926069958, "loss": 53.0005, "step": 820 }, { "epoch": 0.0016379776825540752, "grad_norm": 79.0, "learning_rate": 0.0004998897905760865, "loss": 52.2766, "step": 840 }, { "epoch": 0.001676977151186315, "grad_norm": 56.5, "learning_rate": 0.0004998832885451771, "loss": 51.8347, "step": 860 }, { "epoch": 0.001715976619818555, "grad_norm": 63.75, "learning_rate": 0.0004998767865142678, "loss": 51.2838, "step": 880 }, { "epoch": 0.0017549760884507948, "grad_norm": 46.75, "learning_rate": 0.0004998702844833583, "loss": 50.7197, "step": 900 }, { "epoch": 0.0017939755570830348, "grad_norm": 54.5, "learning_rate": 0.000499863782452449, "loss": 50.0984, "step": 920 }, { "epoch": 0.0018329750257152747, "grad_norm": 60.0, "learning_rate": 0.0004998572804215396, "loss": 49.603, "step": 940 }, { "epoch": 0.0018719744943475145, "grad_norm": 62.25, "learning_rate": 0.0004998507783906303, "loss": 49.3649, "step": 960 }, { "epoch": 0.0019109739629797544, "grad_norm": 45.5, "learning_rate": 0.000499844276359721, "loss": 48.9424, "step": 980 }, { "epoch": 0.0019499734316119942, "grad_norm": 53.25, "learning_rate": 0.0004998377743288116, "loss": 48.2553, "step": 1000 }, { "epoch": 0.001988972900244234, "grad_norm": 52.5, "learning_rate": 0.0004998312722979023, "loss": 47.8544, "step": 1020 }, { "epoch": 0.002027972368876474, "grad_norm": 34.25, "learning_rate": 0.0004998247702669929, "loss": 47.5541, "step": 1040 }, { "epoch": 0.0020669718375087138, "grad_norm": 39.75, "learning_rate": 0.0004998182682360836, "loss": 47.1335, "step": 1060 }, { "epoch": 0.0021059713061409536, "grad_norm": 56.0, "learning_rate": 0.0004998117662051742, "loss": 46.7965, "step": 1080 }, { "epoch": 0.002144970774773194, "grad_norm": 37.5, "learning_rate": 0.0004998052641742649, "loss": 46.4076, "step": 1100 }, { "epoch": 0.0021839702434054338, "grad_norm": 43.5, "learning_rate": 0.0004997987621433556, "loss": 46.0862, "step": 1120 }, { "epoch": 0.0022229697120376736, "grad_norm": 44.0, "learning_rate": 0.0004997922601124461, "loss": 45.7537, "step": 1140 }, { "epoch": 0.0022619691806699135, "grad_norm": 39.25, "learning_rate": 0.0004997857580815368, "loss": 45.5256, "step": 1160 }, { "epoch": 0.0023009686493021533, "grad_norm": 54.0, "learning_rate": 0.0004997792560506274, "loss": 45.1766, "step": 1180 }, { "epoch": 0.002339968117934393, "grad_norm": 31.5, "learning_rate": 0.0004997727540197181, "loss": 44.8069, "step": 1200 }, { "epoch": 0.002378967586566633, "grad_norm": 39.5, "learning_rate": 0.0004997662519888087, "loss": 44.5541, "step": 1220 }, { "epoch": 0.002417967055198873, "grad_norm": 63.75, "learning_rate": 0.0004997597499578994, "loss": 44.2843, "step": 1240 }, { "epoch": 0.0024569665238311127, "grad_norm": 38.5, "learning_rate": 0.00049975324792699, "loss": 44.0059, "step": 1260 }, { "epoch": 0.0024959659924633525, "grad_norm": 33.75, "learning_rate": 0.0004997467458960807, "loss": 43.8273, "step": 1280 }, { "epoch": 0.0025349654610955924, "grad_norm": 60.5, "learning_rate": 0.0004997402438651713, "loss": 43.4964, "step": 1300 }, { "epoch": 0.0025739649297278322, "grad_norm": 50.0, "learning_rate": 0.0004997337418342619, "loss": 43.152, "step": 1320 }, { "epoch": 0.0026129643983600725, "grad_norm": 43.75, "learning_rate": 0.0004997272398033526, "loss": 42.9883, "step": 1340 }, { "epoch": 0.0026519638669923124, "grad_norm": 35.25, "learning_rate": 0.0004997207377724432, "loss": 42.6924, "step": 1360 }, { "epoch": 0.0026909633356245522, "grad_norm": 40.5, "learning_rate": 0.0004997142357415339, "loss": 42.5154, "step": 1380 }, { "epoch": 0.002729962804256792, "grad_norm": 48.75, "learning_rate": 0.0004997077337106245, "loss": 42.2546, "step": 1400 }, { "epoch": 0.002768962272889032, "grad_norm": 55.0, "learning_rate": 0.0004997012316797152, "loss": 41.996, "step": 1420 }, { "epoch": 0.0028079617415212718, "grad_norm": 42.75, "learning_rate": 0.0004996947296488057, "loss": 41.7207, "step": 1440 }, { "epoch": 0.0028469612101535116, "grad_norm": 35.0, "learning_rate": 0.0004996882276178964, "loss": 41.5667, "step": 1460 }, { "epoch": 0.0028859606787857515, "grad_norm": 54.25, "learning_rate": 0.0004996817255869871, "loss": 41.3941, "step": 1480 }, { "epoch": 0.0029249601474179913, "grad_norm": 54.75, "learning_rate": 0.0004996752235560777, "loss": 41.3456, "step": 1500 }, { "epoch": 0.002963959616050231, "grad_norm": 36.75, "learning_rate": 0.0004996687215251684, "loss": 40.9435, "step": 1520 }, { "epoch": 0.003002959084682471, "grad_norm": 42.75, "learning_rate": 0.000499662219494259, "loss": 40.7629, "step": 1540 }, { "epoch": 0.003041958553314711, "grad_norm": 42.75, "learning_rate": 0.0004996557174633497, "loss": 40.5869, "step": 1560 }, { "epoch": 0.003080958021946951, "grad_norm": 33.75, "learning_rate": 0.0004996492154324403, "loss": 40.0826, "step": 1580 }, { "epoch": 0.003119957490579191, "grad_norm": 28.875, "learning_rate": 0.000499642713401531, "loss": 40.1069, "step": 1600 }, { "epoch": 0.003158956959211431, "grad_norm": 30.25, "learning_rate": 0.0004996362113706217, "loss": 40.0939, "step": 1620 }, { "epoch": 0.0031979564278436707, "grad_norm": 91.0, "learning_rate": 0.0004996297093397122, "loss": 39.8428, "step": 1640 }, { "epoch": 0.0032369558964759106, "grad_norm": 36.25, "learning_rate": 0.0004996232073088029, "loss": 39.7499, "step": 1660 }, { "epoch": 0.0032759553651081504, "grad_norm": 36.0, "learning_rate": 0.0004996167052778935, "loss": 39.4274, "step": 1680 }, { "epoch": 0.0033149548337403903, "grad_norm": 37.25, "learning_rate": 0.0004996102032469842, "loss": 39.3546, "step": 1700 }, { "epoch": 0.00335395430237263, "grad_norm": 36.0, "learning_rate": 0.0004996037012160748, "loss": 39.1877, "step": 1720 }, { "epoch": 0.00339295377100487, "grad_norm": 30.125, "learning_rate": 0.0004995971991851655, "loss": 39.171, "step": 1740 }, { "epoch": 0.00343195323963711, "grad_norm": 28.75, "learning_rate": 0.0004995906971542562, "loss": 38.6964, "step": 1760 }, { "epoch": 0.0034709527082693497, "grad_norm": 34.0, "learning_rate": 0.0004995841951233468, "loss": 38.6164, "step": 1780 }, { "epoch": 0.0035099521769015895, "grad_norm": 36.5, "learning_rate": 0.0004995776930924375, "loss": 38.4899, "step": 1800 }, { "epoch": 0.00354895164553383, "grad_norm": 55.0, "learning_rate": 0.0004995711910615281, "loss": 38.3576, "step": 1820 }, { "epoch": 0.0035879511141660696, "grad_norm": 31.5, "learning_rate": 0.0004995646890306188, "loss": 38.1502, "step": 1840 }, { "epoch": 0.0036269505827983095, "grad_norm": 25.5, "learning_rate": 0.0004995581869997094, "loss": 38.0605, "step": 1860 }, { "epoch": 0.0036659500514305493, "grad_norm": 39.0, "learning_rate": 0.0004995516849688, "loss": 37.7939, "step": 1880 }, { "epoch": 0.003704949520062789, "grad_norm": 34.5, "learning_rate": 0.0004995451829378906, "loss": 37.7678, "step": 1900 }, { "epoch": 0.003743948988695029, "grad_norm": 30.875, "learning_rate": 0.0004995386809069813, "loss": 37.5428, "step": 1920 }, { "epoch": 0.003782948457327269, "grad_norm": 30.75, "learning_rate": 0.000499532178876072, "loss": 37.4311, "step": 1940 }, { "epoch": 0.0038219479259595087, "grad_norm": 42.75, "learning_rate": 0.0004995256768451626, "loss": 37.2048, "step": 1960 }, { "epoch": 0.0038609473945917486, "grad_norm": 46.25, "learning_rate": 0.0004995191748142533, "loss": 37.1986, "step": 1980 }, { "epoch": 0.0038999468632239884, "grad_norm": 36.25, "learning_rate": 0.0004995126727833439, "loss": 36.9762, "step": 2000 }, { "epoch": 0.003938946331856229, "grad_norm": 47.5, "learning_rate": 0.0004995061707524346, "loss": 36.9171, "step": 2020 }, { "epoch": 0.003977945800488468, "grad_norm": 78.0, "learning_rate": 0.0004994996687215252, "loss": 36.8232, "step": 2040 }, { "epoch": 0.004016945269120708, "grad_norm": 28.125, "learning_rate": 0.0004994931666906158, "loss": 36.6007, "step": 2060 }, { "epoch": 0.004055944737752948, "grad_norm": 42.0, "learning_rate": 0.0004994866646597064, "loss": 36.4612, "step": 2080 }, { "epoch": 0.004094944206385188, "grad_norm": 54.25, "learning_rate": 0.0004994801626287971, "loss": 36.3412, "step": 2100 }, { "epoch": 0.0041339436750174275, "grad_norm": 47.0, "learning_rate": 0.0004994736605978878, "loss": 36.4033, "step": 2120 }, { "epoch": 0.004172943143649668, "grad_norm": 24.25, "learning_rate": 0.0004994671585669784, "loss": 36.0898, "step": 2140 }, { "epoch": 0.004211942612281907, "grad_norm": 32.25, "learning_rate": 0.0004994606565360691, "loss": 36.1283, "step": 2160 }, { "epoch": 0.0042509420809141475, "grad_norm": 65.5, "learning_rate": 0.0004994541545051597, "loss": 35.9975, "step": 2180 }, { "epoch": 0.004289941549546388, "grad_norm": 56.0, "learning_rate": 0.0004994476524742504, "loss": 35.7692, "step": 2200 }, { "epoch": 0.004328941018178627, "grad_norm": 35.25, "learning_rate": 0.0004994411504433409, "loss": 35.496, "step": 2220 }, { "epoch": 0.0043679404868108675, "grad_norm": 25.375, "learning_rate": 0.0004994346484124316, "loss": 35.5805, "step": 2240 }, { "epoch": 0.004406939955443107, "grad_norm": 34.0, "learning_rate": 0.0004994281463815222, "loss": 35.4159, "step": 2260 }, { "epoch": 0.004445939424075347, "grad_norm": 39.75, "learning_rate": 0.0004994216443506129, "loss": 35.5129, "step": 2280 }, { "epoch": 0.004484938892707587, "grad_norm": 45.0, "learning_rate": 0.0004994151423197036, "loss": 35.4243, "step": 2300 }, { "epoch": 0.004523938361339827, "grad_norm": 53.0, "learning_rate": 0.0004994086402887942, "loss": 35.3369, "step": 2320 }, { "epoch": 0.004562937829972066, "grad_norm": 30.0, "learning_rate": 0.0004994021382578849, "loss": 35.0457, "step": 2340 }, { "epoch": 0.004601937298604307, "grad_norm": 37.75, "learning_rate": 0.0004993956362269755, "loss": 34.9411, "step": 2360 }, { "epoch": 0.004640936767236546, "grad_norm": 31.625, "learning_rate": 0.0004993891341960661, "loss": 34.8776, "step": 2380 }, { "epoch": 0.004679936235868786, "grad_norm": 46.25, "learning_rate": 0.0004993826321651567, "loss": 34.8756, "step": 2400 }, { "epoch": 0.004718935704501027, "grad_norm": 24.25, "learning_rate": 0.0004993761301342474, "loss": 34.6115, "step": 2420 }, { "epoch": 0.004757935173133266, "grad_norm": 63.75, "learning_rate": 0.000499369628103338, "loss": 34.5717, "step": 2440 }, { "epoch": 0.004796934641765506, "grad_norm": 42.5, "learning_rate": 0.0004993631260724287, "loss": 34.5654, "step": 2460 }, { "epoch": 0.004835934110397746, "grad_norm": 24.125, "learning_rate": 0.0004993566240415194, "loss": 34.2722, "step": 2480 }, { "epoch": 0.004874933579029986, "grad_norm": 46.25, "learning_rate": 0.00049935012201061, "loss": 34.151, "step": 2500 }, { "epoch": 0.004913933047662225, "grad_norm": 22.25, "learning_rate": 0.0004993436199797007, "loss": 34.2639, "step": 2520 }, { "epoch": 0.004952932516294466, "grad_norm": 39.0, "learning_rate": 0.0004993371179487913, "loss": 34.0397, "step": 2540 }, { "epoch": 0.004991931984926705, "grad_norm": 39.25, "learning_rate": 0.000499330615917882, "loss": 33.9934, "step": 2560 }, { "epoch": 0.005030931453558945, "grad_norm": 33.75, "learning_rate": 0.0004993241138869727, "loss": 33.8442, "step": 2580 }, { "epoch": 0.005069930922191185, "grad_norm": 37.25, "learning_rate": 0.0004993176118560632, "loss": 33.8682, "step": 2600 }, { "epoch": 0.005108930390823425, "grad_norm": 28.375, "learning_rate": 0.0004993111098251539, "loss": 33.7223, "step": 2620 }, { "epoch": 0.0051479298594556645, "grad_norm": 34.25, "learning_rate": 0.0004993046077942445, "loss": 33.7083, "step": 2640 }, { "epoch": 0.005186929328087905, "grad_norm": 27.75, "learning_rate": 0.0004992981057633352, "loss": 33.4855, "step": 2660 }, { "epoch": 0.005225928796720145, "grad_norm": 36.5, "learning_rate": 0.0004992916037324258, "loss": 33.4651, "step": 2680 }, { "epoch": 0.0052649282653523845, "grad_norm": 32.0, "learning_rate": 0.0004992851017015165, "loss": 33.3867, "step": 2700 }, { "epoch": 0.005303927733984625, "grad_norm": 23.25, "learning_rate": 0.0004992785996706071, "loss": 33.2059, "step": 2720 }, { "epoch": 0.005342927202616864, "grad_norm": 28.125, "learning_rate": 0.0004992720976396978, "loss": 33.1986, "step": 2740 }, { "epoch": 0.0053819266712491045, "grad_norm": 32.75, "learning_rate": 0.0004992655956087885, "loss": 33.1129, "step": 2760 }, { "epoch": 0.005420926139881344, "grad_norm": 34.25, "learning_rate": 0.0004992590935778791, "loss": 33.0283, "step": 2780 }, { "epoch": 0.005459925608513584, "grad_norm": 46.75, "learning_rate": 0.0004992525915469698, "loss": 32.8969, "step": 2800 }, { "epoch": 0.005498925077145824, "grad_norm": 35.25, "learning_rate": 0.0004992460895160604, "loss": 32.8364, "step": 2820 }, { "epoch": 0.005537924545778064, "grad_norm": 42.75, "learning_rate": 0.000499239587485151, "loss": 32.9059, "step": 2840 }, { "epoch": 0.005576924014410303, "grad_norm": 35.5, "learning_rate": 0.0004992330854542416, "loss": 32.6822, "step": 2860 }, { "epoch": 0.0056159234830425436, "grad_norm": 34.0, "learning_rate": 0.0004992265834233323, "loss": 32.6822, "step": 2880 }, { "epoch": 0.005654922951674784, "grad_norm": 32.5, "learning_rate": 0.000499220081392423, "loss": 32.5353, "step": 2900 }, { "epoch": 0.005693922420307023, "grad_norm": 29.625, "learning_rate": 0.0004992135793615136, "loss": 32.56, "step": 2920 }, { "epoch": 0.0057329218889392635, "grad_norm": 23.0, "learning_rate": 0.0004992070773306043, "loss": 32.3732, "step": 2940 }, { "epoch": 0.005771921357571503, "grad_norm": 24.75, "learning_rate": 0.0004992005752996949, "loss": 32.4136, "step": 2960 }, { "epoch": 0.005810920826203743, "grad_norm": 23.875, "learning_rate": 0.0004991940732687855, "loss": 32.2992, "step": 2980 }, { "epoch": 0.005849920294835983, "grad_norm": 32.5, "learning_rate": 0.0004991875712378761, "loss": 32.2445, "step": 3000 }, { "epoch": 0.005888919763468223, "grad_norm": 24.125, "learning_rate": 0.0004991810692069668, "loss": 32.109, "step": 3020 }, { "epoch": 0.005927919232100462, "grad_norm": 24.125, "learning_rate": 0.0004991745671760574, "loss": 32.1211, "step": 3040 }, { "epoch": 0.005966918700732703, "grad_norm": 26.625, "learning_rate": 0.0004991680651451481, "loss": 31.9837, "step": 3060 }, { "epoch": 0.006005918169364942, "grad_norm": 31.125, "learning_rate": 0.0004991615631142388, "loss": 32.0123, "step": 3080 }, { "epoch": 0.006044917637997182, "grad_norm": 37.5, "learning_rate": 0.0004991550610833294, "loss": 31.9129, "step": 3100 }, { "epoch": 0.006083917106629422, "grad_norm": 47.5, "learning_rate": 0.0004991485590524201, "loss": 31.7838, "step": 3120 }, { "epoch": 0.006122916575261662, "grad_norm": 23.5, "learning_rate": 0.0004991420570215106, "loss": 31.7312, "step": 3140 }, { "epoch": 0.006161916043893902, "grad_norm": 22.875, "learning_rate": 0.0004991355549906013, "loss": 31.6424, "step": 3160 }, { "epoch": 0.006200915512526142, "grad_norm": 35.5, "learning_rate": 0.0004991290529596919, "loss": 31.662, "step": 3180 }, { "epoch": 0.006239914981158382, "grad_norm": 31.875, "learning_rate": 0.0004991225509287826, "loss": 31.416, "step": 3200 }, { "epoch": 0.0062789144497906214, "grad_norm": 26.125, "learning_rate": 0.0004991160488978732, "loss": 31.3726, "step": 3220 }, { "epoch": 0.006317913918422862, "grad_norm": 28.75, "learning_rate": 0.0004991095468669639, "loss": 31.3757, "step": 3240 }, { "epoch": 0.006356913387055101, "grad_norm": 30.375, "learning_rate": 0.0004991030448360546, "loss": 31.2872, "step": 3260 }, { "epoch": 0.006395912855687341, "grad_norm": 34.25, "learning_rate": 0.0004990965428051452, "loss": 31.1914, "step": 3280 }, { "epoch": 0.006434912324319581, "grad_norm": 35.0, "learning_rate": 0.0004990900407742359, "loss": 31.2682, "step": 3300 }, { "epoch": 0.006473911792951821, "grad_norm": 32.25, "learning_rate": 0.0004990835387433265, "loss": 31.162, "step": 3320 }, { "epoch": 0.0065129112615840605, "grad_norm": 45.0, "learning_rate": 0.0004990770367124171, "loss": 31.1904, "step": 3340 }, { "epoch": 0.006551910730216301, "grad_norm": 34.25, "learning_rate": 0.0004990705346815077, "loss": 31.0263, "step": 3360 }, { "epoch": 0.006590910198848541, "grad_norm": 26.125, "learning_rate": 0.0004990640326505984, "loss": 30.8982, "step": 3380 }, { "epoch": 0.0066299096674807805, "grad_norm": 24.875, "learning_rate": 0.000499057530619689, "loss": 30.8195, "step": 3400 }, { "epoch": 0.006668909136113021, "grad_norm": 27.25, "learning_rate": 0.0004990510285887797, "loss": 30.7594, "step": 3420 }, { "epoch": 0.00670790860474526, "grad_norm": 30.75, "learning_rate": 0.0004990445265578704, "loss": 30.8188, "step": 3440 }, { "epoch": 0.0067469080733775005, "grad_norm": 23.75, "learning_rate": 0.000499038024526961, "loss": 30.6754, "step": 3460 }, { "epoch": 0.00678590754200974, "grad_norm": 32.25, "learning_rate": 0.0004990315224960517, "loss": 30.736, "step": 3480 }, { "epoch": 0.00682490701064198, "grad_norm": 32.0, "learning_rate": 0.0004990250204651423, "loss": 30.6301, "step": 3500 }, { "epoch": 0.00686390647927422, "grad_norm": 23.375, "learning_rate": 0.000499018518434233, "loss": 30.5446, "step": 3520 }, { "epoch": 0.00690290594790646, "grad_norm": 24.125, "learning_rate": 0.0004990120164033236, "loss": 30.4778, "step": 3540 }, { "epoch": 0.006941905416538699, "grad_norm": 28.625, "learning_rate": 0.0004990055143724143, "loss": 30.4672, "step": 3560 }, { "epoch": 0.00698090488517094, "grad_norm": 18.75, "learning_rate": 0.0004989990123415049, "loss": 30.4023, "step": 3580 }, { "epoch": 0.007019904353803179, "grad_norm": 23.625, "learning_rate": 0.0004989925103105955, "loss": 30.29, "step": 3600 }, { "epoch": 0.007058903822435419, "grad_norm": 24.75, "learning_rate": 0.0004989860082796862, "loss": 30.1487, "step": 3620 }, { "epoch": 0.00709790329106766, "grad_norm": 21.625, "learning_rate": 0.0004989795062487768, "loss": 30.0894, "step": 3640 }, { "epoch": 0.007136902759699899, "grad_norm": 43.5, "learning_rate": 0.0004989730042178675, "loss": 30.2849, "step": 3660 }, { "epoch": 0.007175902228332139, "grad_norm": 48.0, "learning_rate": 0.0004989665021869581, "loss": 30.0532, "step": 3680 }, { "epoch": 0.007214901696964379, "grad_norm": 44.0, "learning_rate": 0.0004989600001560488, "loss": 29.884, "step": 3700 }, { "epoch": 0.007253901165596619, "grad_norm": 27.5, "learning_rate": 0.0004989534981251394, "loss": 29.9057, "step": 3720 }, { "epoch": 0.007292900634228858, "grad_norm": 29.875, "learning_rate": 0.0004989469960942301, "loss": 29.8925, "step": 3740 }, { "epoch": 0.007331900102861099, "grad_norm": 26.25, "learning_rate": 0.0004989404940633207, "loss": 30.0618, "step": 3760 }, { "epoch": 0.007370899571493338, "grad_norm": 21.5, "learning_rate": 0.0004989339920324113, "loss": 29.9752, "step": 3780 }, { "epoch": 0.007409899040125578, "grad_norm": 27.375, "learning_rate": 0.000498927490001502, "loss": 29.8659, "step": 3800 }, { "epoch": 0.007448898508757818, "grad_norm": 34.25, "learning_rate": 0.0004989209879705926, "loss": 29.8292, "step": 3820 }, { "epoch": 0.007487897977390058, "grad_norm": 44.0, "learning_rate": 0.0004989144859396833, "loss": 29.7528, "step": 3840 }, { "epoch": 0.007526897446022298, "grad_norm": 27.0, "learning_rate": 0.0004989079839087739, "loss": 29.5359, "step": 3860 }, { "epoch": 0.007565896914654538, "grad_norm": 19.875, "learning_rate": 0.0004989014818778646, "loss": 29.6089, "step": 3880 }, { "epoch": 0.007604896383286778, "grad_norm": 28.125, "learning_rate": 0.0004988949798469553, "loss": 29.6856, "step": 3900 }, { "epoch": 0.0076438958519190175, "grad_norm": 22.5, "learning_rate": 0.0004988884778160458, "loss": 29.6431, "step": 3920 }, { "epoch": 0.007682895320551258, "grad_norm": 23.625, "learning_rate": 0.0004988819757851365, "loss": 29.4747, "step": 3940 }, { "epoch": 0.007721894789183497, "grad_norm": 20.75, "learning_rate": 0.0004988754737542271, "loss": 29.4418, "step": 3960 }, { "epoch": 0.0077608942578157375, "grad_norm": 27.125, "learning_rate": 0.0004988689717233178, "loss": 29.3583, "step": 3980 }, { "epoch": 0.007799893726447977, "grad_norm": 32.25, "learning_rate": 0.0004988624696924084, "loss": 29.4409, "step": 4000 }, { "epoch": 0.007838893195080217, "grad_norm": 29.125, "learning_rate": 0.0004988559676614991, "loss": 29.2089, "step": 4020 }, { "epoch": 0.007877892663712457, "grad_norm": 26.0, "learning_rate": 0.0004988494656305897, "loss": 29.2038, "step": 4040 }, { "epoch": 0.007916892132344696, "grad_norm": 23.25, "learning_rate": 0.0004988429635996803, "loss": 29.1397, "step": 4060 }, { "epoch": 0.007955891600976936, "grad_norm": 20.0, "learning_rate": 0.000498836461568771, "loss": 29.2255, "step": 4080 }, { "epoch": 0.007994891069609177, "grad_norm": 23.125, "learning_rate": 0.0004988299595378616, "loss": 29.1196, "step": 4100 }, { "epoch": 0.008033890538241417, "grad_norm": 23.875, "learning_rate": 0.0004988234575069523, "loss": 28.9453, "step": 4120 }, { "epoch": 0.008072890006873657, "grad_norm": 23.875, "learning_rate": 0.0004988169554760429, "loss": 28.9975, "step": 4140 }, { "epoch": 0.008111889475505896, "grad_norm": 21.375, "learning_rate": 0.0004988104534451336, "loss": 29.0436, "step": 4160 }, { "epoch": 0.008150888944138136, "grad_norm": 22.625, "learning_rate": 0.0004988039514142242, "loss": 28.9583, "step": 4180 }, { "epoch": 0.008189888412770376, "grad_norm": 25.875, "learning_rate": 0.0004987974493833149, "loss": 29.0157, "step": 4200 }, { "epoch": 0.008228887881402617, "grad_norm": 26.0, "learning_rate": 0.0004987909473524055, "loss": 28.9062, "step": 4220 }, { "epoch": 0.008267887350034855, "grad_norm": 27.125, "learning_rate": 0.0004987844453214962, "loss": 28.854, "step": 4240 }, { "epoch": 0.008306886818667095, "grad_norm": 23.5, "learning_rate": 0.0004987779432905869, "loss": 28.8301, "step": 4260 }, { "epoch": 0.008345886287299336, "grad_norm": 25.125, "learning_rate": 0.0004987714412596775, "loss": 28.7554, "step": 4280 }, { "epoch": 0.008384885755931576, "grad_norm": 28.5, "learning_rate": 0.0004987649392287681, "loss": 28.7601, "step": 4300 }, { "epoch": 0.008423885224563814, "grad_norm": 17.0, "learning_rate": 0.0004987584371978587, "loss": 28.6043, "step": 4320 }, { "epoch": 0.008462884693196055, "grad_norm": 29.0, "learning_rate": 0.0004987519351669494, "loss": 28.7187, "step": 4340 }, { "epoch": 0.008501884161828295, "grad_norm": 27.5, "learning_rate": 0.00049874543313604, "loss": 28.6292, "step": 4360 }, { "epoch": 0.008540883630460535, "grad_norm": 22.75, "learning_rate": 0.0004987389311051307, "loss": 28.5058, "step": 4380 }, { "epoch": 0.008579883099092776, "grad_norm": 26.0, "learning_rate": 0.0004987324290742214, "loss": 28.5075, "step": 4400 }, { "epoch": 0.008618882567725014, "grad_norm": 24.125, "learning_rate": 0.000498725927043312, "loss": 28.4114, "step": 4420 }, { "epoch": 0.008657882036357254, "grad_norm": 25.25, "learning_rate": 0.0004987194250124027, "loss": 28.338, "step": 4440 }, { "epoch": 0.008696881504989495, "grad_norm": 25.125, "learning_rate": 0.0004987129229814933, "loss": 28.3999, "step": 4460 }, { "epoch": 0.008735880973621735, "grad_norm": 22.0, "learning_rate": 0.000498706420950584, "loss": 28.266, "step": 4480 }, { "epoch": 0.008774880442253974, "grad_norm": 22.625, "learning_rate": 0.0004986999189196746, "loss": 28.297, "step": 4500 }, { "epoch": 0.008813879910886214, "grad_norm": 24.25, "learning_rate": 0.0004986934168887652, "loss": 28.3294, "step": 4520 }, { "epoch": 0.008852879379518454, "grad_norm": 20.375, "learning_rate": 0.0004986869148578558, "loss": 28.2083, "step": 4540 }, { "epoch": 0.008891878848150694, "grad_norm": 28.25, "learning_rate": 0.0004986804128269465, "loss": 28.1562, "step": 4560 }, { "epoch": 0.008930878316782935, "grad_norm": 23.875, "learning_rate": 0.0004986739107960372, "loss": 28.0813, "step": 4580 }, { "epoch": 0.008969877785415173, "grad_norm": 18.875, "learning_rate": 0.0004986674087651278, "loss": 28.0924, "step": 4600 }, { "epoch": 0.009008877254047414, "grad_norm": 18.25, "learning_rate": 0.0004986609067342185, "loss": 28.0529, "step": 4620 }, { "epoch": 0.009047876722679654, "grad_norm": 22.0, "learning_rate": 0.0004986544047033091, "loss": 28.0213, "step": 4640 }, { "epoch": 0.009086876191311894, "grad_norm": 21.125, "learning_rate": 0.0004986479026723998, "loss": 28.0796, "step": 4660 }, { "epoch": 0.009125875659944133, "grad_norm": 22.75, "learning_rate": 0.0004986414006414903, "loss": 27.9454, "step": 4680 }, { "epoch": 0.009164875128576373, "grad_norm": 22.5, "learning_rate": 0.000498634898610581, "loss": 27.947, "step": 4700 }, { "epoch": 0.009203874597208613, "grad_norm": 21.5, "learning_rate": 0.0004986283965796716, "loss": 27.9934, "step": 4720 }, { "epoch": 0.009242874065840853, "grad_norm": 20.75, "learning_rate": 0.0004986218945487623, "loss": 27.8246, "step": 4740 }, { "epoch": 0.009281873534473092, "grad_norm": 21.625, "learning_rate": 0.000498615392517853, "loss": 27.8436, "step": 4760 }, { "epoch": 0.009320873003105332, "grad_norm": 29.0, "learning_rate": 0.0004986088904869436, "loss": 27.8608, "step": 4780 }, { "epoch": 0.009359872471737573, "grad_norm": 21.875, "learning_rate": 0.0004986023884560343, "loss": 27.7621, "step": 4800 }, { "epoch": 0.009398871940369813, "grad_norm": 21.75, "learning_rate": 0.0004985958864251249, "loss": 27.6439, "step": 4820 }, { "epoch": 0.009437871409002053, "grad_norm": 19.5, "learning_rate": 0.0004985893843942155, "loss": 27.6333, "step": 4840 }, { "epoch": 0.009476870877634292, "grad_norm": 25.125, "learning_rate": 0.0004985828823633061, "loss": 27.7432, "step": 4860 }, { "epoch": 0.009515870346266532, "grad_norm": 25.0, "learning_rate": 0.0004985763803323968, "loss": 27.5938, "step": 4880 }, { "epoch": 0.009554869814898772, "grad_norm": 18.5, "learning_rate": 0.0004985698783014875, "loss": 27.5637, "step": 4900 }, { "epoch": 0.009593869283531013, "grad_norm": 42.25, "learning_rate": 0.0004985633762705781, "loss": 27.5991, "step": 4920 }, { "epoch": 0.009632868752163251, "grad_norm": 35.5, "learning_rate": 0.0004985568742396688, "loss": 27.5453, "step": 4940 }, { "epoch": 0.009671868220795491, "grad_norm": 23.125, "learning_rate": 0.0004985503722087594, "loss": 27.3537, "step": 4960 }, { "epoch": 0.009710867689427732, "grad_norm": 18.125, "learning_rate": 0.0004985438701778501, "loss": 27.3934, "step": 4980 }, { "epoch": 0.009749867158059972, "grad_norm": 23.125, "learning_rate": 0.0004985373681469407, "loss": 27.3773, "step": 5000 }, { "epoch": 0.00978886662669221, "grad_norm": 18.75, "learning_rate": 0.0004985308661160314, "loss": 27.4244, "step": 5020 }, { "epoch": 0.00982786609532445, "grad_norm": 32.5, "learning_rate": 0.0004985243640851219, "loss": 27.3794, "step": 5040 }, { "epoch": 0.009866865563956691, "grad_norm": 29.125, "learning_rate": 0.0004985178620542126, "loss": 27.2708, "step": 5060 }, { "epoch": 0.009905865032588931, "grad_norm": 22.75, "learning_rate": 0.0004985113600233033, "loss": 27.3295, "step": 5080 }, { "epoch": 0.009944864501221172, "grad_norm": 21.0, "learning_rate": 0.0004985048579923939, "loss": 27.2439, "step": 5100 }, { "epoch": 0.00998386396985341, "grad_norm": 19.375, "learning_rate": 0.0004984983559614846, "loss": 27.2066, "step": 5120 }, { "epoch": 0.01002286343848565, "grad_norm": 20.0, "learning_rate": 0.0004984918539305752, "loss": 27.1348, "step": 5140 }, { "epoch": 0.01006186290711789, "grad_norm": 28.5, "learning_rate": 0.0004984853518996659, "loss": 27.1004, "step": 5160 }, { "epoch": 0.010100862375750131, "grad_norm": 21.625, "learning_rate": 0.0004984788498687565, "loss": 27.0346, "step": 5180 }, { "epoch": 0.01013986184438237, "grad_norm": 25.875, "learning_rate": 0.0004984723478378472, "loss": 27.0413, "step": 5200 }, { "epoch": 0.01017886131301461, "grad_norm": 18.125, "learning_rate": 0.0004984658458069379, "loss": 27.0338, "step": 5220 }, { "epoch": 0.01021786078164685, "grad_norm": 19.375, "learning_rate": 0.0004984593437760285, "loss": 27.0449, "step": 5240 }, { "epoch": 0.01025686025027909, "grad_norm": 36.5, "learning_rate": 0.0004984528417451192, "loss": 27.0432, "step": 5260 }, { "epoch": 0.010295859718911329, "grad_norm": 34.25, "learning_rate": 0.0004984463397142097, "loss": 26.9851, "step": 5280 }, { "epoch": 0.01033485918754357, "grad_norm": 31.375, "learning_rate": 0.0004984398376833004, "loss": 26.7249, "step": 5300 }, { "epoch": 0.01037385865617581, "grad_norm": 32.25, "learning_rate": 0.000498433335652391, "loss": 26.7909, "step": 5320 }, { "epoch": 0.01041285812480805, "grad_norm": 18.625, "learning_rate": 0.0004984268336214817, "loss": 26.8868, "step": 5340 }, { "epoch": 0.01045185759344029, "grad_norm": 21.25, "learning_rate": 0.0004984203315905723, "loss": 26.7785, "step": 5360 }, { "epoch": 0.010490857062072529, "grad_norm": 21.375, "learning_rate": 0.000498413829559663, "loss": 26.7886, "step": 5380 }, { "epoch": 0.010529856530704769, "grad_norm": 19.25, "learning_rate": 0.0004984073275287537, "loss": 26.8436, "step": 5400 }, { "epoch": 0.01056885599933701, "grad_norm": 20.0, "learning_rate": 0.0004984008254978443, "loss": 26.7706, "step": 5420 }, { "epoch": 0.01060785546796925, "grad_norm": 14.5625, "learning_rate": 0.000498394323466935, "loss": 26.7413, "step": 5440 }, { "epoch": 0.010646854936601488, "grad_norm": 18.0, "learning_rate": 0.0004983878214360255, "loss": 26.6448, "step": 5460 }, { "epoch": 0.010685854405233728, "grad_norm": 23.25, "learning_rate": 0.0004983813194051162, "loss": 26.6625, "step": 5480 }, { "epoch": 0.010724853873865969, "grad_norm": 20.625, "learning_rate": 0.0004983748173742068, "loss": 26.6769, "step": 5500 }, { "epoch": 0.010763853342498209, "grad_norm": 17.375, "learning_rate": 0.0004983683153432975, "loss": 26.5887, "step": 5520 }, { "epoch": 0.01080285281113045, "grad_norm": 27.0, "learning_rate": 0.0004983618133123881, "loss": 26.682, "step": 5540 }, { "epoch": 0.010841852279762688, "grad_norm": 22.5, "learning_rate": 0.0004983553112814788, "loss": 26.54, "step": 5560 }, { "epoch": 0.010880851748394928, "grad_norm": 20.0, "learning_rate": 0.0004983488092505695, "loss": 26.5296, "step": 5580 }, { "epoch": 0.010919851217027168, "grad_norm": 26.5, "learning_rate": 0.00049834230721966, "loss": 26.5472, "step": 5600 }, { "epoch": 0.010958850685659409, "grad_norm": 20.625, "learning_rate": 0.0004983358051887507, "loss": 26.6012, "step": 5620 }, { "epoch": 0.010997850154291647, "grad_norm": 27.5, "learning_rate": 0.0004983293031578413, "loss": 26.5274, "step": 5640 }, { "epoch": 0.011036849622923887, "grad_norm": 20.25, "learning_rate": 0.000498322801126932, "loss": 26.3862, "step": 5660 }, { "epoch": 0.011075849091556128, "grad_norm": 20.25, "learning_rate": 0.0004983162990960226, "loss": 26.4283, "step": 5680 }, { "epoch": 0.011114848560188368, "grad_norm": 22.0, "learning_rate": 0.0004983097970651133, "loss": 26.3131, "step": 5700 }, { "epoch": 0.011153848028820607, "grad_norm": 25.375, "learning_rate": 0.000498303295034204, "loss": 26.3354, "step": 5720 }, { "epoch": 0.011192847497452847, "grad_norm": 18.125, "learning_rate": 0.0004982967930032946, "loss": 26.2492, "step": 5740 }, { "epoch": 0.011231846966085087, "grad_norm": 19.875, "learning_rate": 0.0004982902909723852, "loss": 26.2118, "step": 5760 }, { "epoch": 0.011270846434717327, "grad_norm": 27.25, "learning_rate": 0.0004982837889414758, "loss": 26.2985, "step": 5780 }, { "epoch": 0.011309845903349568, "grad_norm": 19.875, "learning_rate": 0.0004982772869105665, "loss": 26.2099, "step": 5800 }, { "epoch": 0.011348845371981806, "grad_norm": 24.625, "learning_rate": 0.0004982707848796571, "loss": 26.2026, "step": 5820 }, { "epoch": 0.011387844840614047, "grad_norm": 18.875, "learning_rate": 0.0004982642828487478, "loss": 26.1828, "step": 5840 }, { "epoch": 0.011426844309246287, "grad_norm": 26.25, "learning_rate": 0.0004982577808178384, "loss": 26.0029, "step": 5860 }, { "epoch": 0.011465843777878527, "grad_norm": 18.375, "learning_rate": 0.0004982512787869291, "loss": 26.0813, "step": 5880 }, { "epoch": 0.011504843246510766, "grad_norm": 19.75, "learning_rate": 0.0004982447767560198, "loss": 26.1285, "step": 5900 }, { "epoch": 0.011543842715143006, "grad_norm": 27.5, "learning_rate": 0.0004982382747251104, "loss": 26.0351, "step": 5920 }, { "epoch": 0.011582842183775246, "grad_norm": 26.125, "learning_rate": 0.0004982317726942011, "loss": 25.9751, "step": 5940 }, { "epoch": 0.011621841652407486, "grad_norm": 23.375, "learning_rate": 0.0004982252706632917, "loss": 26.0156, "step": 5960 }, { "epoch": 0.011660841121039725, "grad_norm": 16.375, "learning_rate": 0.0004982187686323824, "loss": 25.9039, "step": 5980 }, { "epoch": 0.011699840589671965, "grad_norm": 21.25, "learning_rate": 0.000498212266601473, "loss": 25.8193, "step": 6000 }, { "epoch": 0.011738840058304206, "grad_norm": 21.75, "learning_rate": 0.0004982057645705636, "loss": 26.0094, "step": 6020 }, { "epoch": 0.011777839526936446, "grad_norm": 21.875, "learning_rate": 0.0004981992625396542, "loss": 25.8137, "step": 6040 }, { "epoch": 0.011816838995568686, "grad_norm": 16.25, "learning_rate": 0.0004981927605087449, "loss": 25.8299, "step": 6060 }, { "epoch": 0.011855838464200925, "grad_norm": 18.625, "learning_rate": 0.0004981862584778356, "loss": 25.8973, "step": 6080 }, { "epoch": 0.011894837932833165, "grad_norm": 16.125, "learning_rate": 0.0004981797564469262, "loss": 25.749, "step": 6100 }, { "epoch": 0.011933837401465405, "grad_norm": 20.75, "learning_rate": 0.0004981732544160169, "loss": 25.9066, "step": 6120 }, { "epoch": 0.011972836870097646, "grad_norm": 20.875, "learning_rate": 0.0004981667523851075, "loss": 25.8258, "step": 6140 }, { "epoch": 0.012011836338729884, "grad_norm": 17.625, "learning_rate": 0.0004981602503541982, "loss": 25.7555, "step": 6160 }, { "epoch": 0.012050835807362124, "grad_norm": 23.125, "learning_rate": 0.0004981537483232888, "loss": 25.7249, "step": 6180 }, { "epoch": 0.012089835275994365, "grad_norm": 18.375, "learning_rate": 0.0004981472462923795, "loss": 25.7314, "step": 6200 }, { "epoch": 0.012128834744626605, "grad_norm": 20.125, "learning_rate": 0.00049814074426147, "loss": 25.66, "step": 6220 }, { "epoch": 0.012167834213258844, "grad_norm": 16.125, "learning_rate": 0.0004981342422305607, "loss": 25.6404, "step": 6240 }, { "epoch": 0.012206833681891084, "grad_norm": 16.375, "learning_rate": 0.0004981277401996514, "loss": 25.589, "step": 6260 }, { "epoch": 0.012245833150523324, "grad_norm": 21.625, "learning_rate": 0.000498121238168742, "loss": 25.6582, "step": 6280 }, { "epoch": 0.012284832619155564, "grad_norm": 27.0, "learning_rate": 0.0004981147361378327, "loss": 25.5586, "step": 6300 }, { "epoch": 0.012323832087787805, "grad_norm": 16.625, "learning_rate": 0.0004981082341069233, "loss": 25.5918, "step": 6320 }, { "epoch": 0.012362831556420043, "grad_norm": 18.75, "learning_rate": 0.000498101732076014, "loss": 25.5063, "step": 6340 }, { "epoch": 0.012401831025052283, "grad_norm": 18.375, "learning_rate": 0.0004980952300451046, "loss": 25.3815, "step": 6360 }, { "epoch": 0.012440830493684524, "grad_norm": 20.375, "learning_rate": 0.0004980887280141952, "loss": 25.5039, "step": 6380 }, { "epoch": 0.012479829962316764, "grad_norm": 19.625, "learning_rate": 0.0004980822259832859, "loss": 25.5223, "step": 6400 }, { "epoch": 0.012518829430949003, "grad_norm": 19.25, "learning_rate": 0.0004980757239523765, "loss": 25.4755, "step": 6420 }, { "epoch": 0.012557828899581243, "grad_norm": 34.0, "learning_rate": 0.0004980692219214672, "loss": 25.4718, "step": 6440 }, { "epoch": 0.012596828368213483, "grad_norm": 25.375, "learning_rate": 0.0004980627198905578, "loss": 25.3276, "step": 6460 }, { "epoch": 0.012635827836845723, "grad_norm": 17.625, "learning_rate": 0.0004980562178596485, "loss": 25.2483, "step": 6480 }, { "epoch": 0.012674827305477964, "grad_norm": 18.875, "learning_rate": 0.0004980497158287391, "loss": 25.3567, "step": 6500 }, { "epoch": 0.012713826774110202, "grad_norm": 25.375, "learning_rate": 0.0004980432137978297, "loss": 25.2982, "step": 6520 }, { "epoch": 0.012752826242742443, "grad_norm": 18.25, "learning_rate": 0.0004980367117669203, "loss": 25.3238, "step": 6540 }, { "epoch": 0.012791825711374683, "grad_norm": 21.25, "learning_rate": 0.000498030209736011, "loss": 25.3001, "step": 6560 }, { "epoch": 0.012830825180006923, "grad_norm": 26.75, "learning_rate": 0.0004980237077051017, "loss": 25.2905, "step": 6580 }, { "epoch": 0.012869824648639162, "grad_norm": 19.75, "learning_rate": 0.0004980172056741923, "loss": 25.2419, "step": 6600 }, { "epoch": 0.012908824117271402, "grad_norm": 19.5, "learning_rate": 0.000498010703643283, "loss": 25.2094, "step": 6620 }, { "epoch": 0.012947823585903642, "grad_norm": 19.875, "learning_rate": 0.0004980042016123736, "loss": 25.2563, "step": 6640 }, { "epoch": 0.012986823054535883, "grad_norm": 21.125, "learning_rate": 0.0004979976995814643, "loss": 25.0934, "step": 6660 }, { "epoch": 0.013025822523168121, "grad_norm": 16.25, "learning_rate": 0.0004979911975505549, "loss": 25.1629, "step": 6680 }, { "epoch": 0.013064821991800361, "grad_norm": 18.875, "learning_rate": 0.0004979846955196456, "loss": 25.1417, "step": 6700 }, { "epoch": 0.013103821460432602, "grad_norm": 16.125, "learning_rate": 0.0004979781934887363, "loss": 25.0869, "step": 6720 }, { "epoch": 0.013142820929064842, "grad_norm": 24.25, "learning_rate": 0.0004979716914578268, "loss": 25.0643, "step": 6740 }, { "epoch": 0.013181820397697082, "grad_norm": 33.0, "learning_rate": 0.0004979651894269175, "loss": 25.1113, "step": 6760 }, { "epoch": 0.01322081986632932, "grad_norm": 28.0, "learning_rate": 0.0004979586873960081, "loss": 25.0073, "step": 6780 }, { "epoch": 0.013259819334961561, "grad_norm": 19.875, "learning_rate": 0.0004979521853650988, "loss": 25.0049, "step": 6800 }, { "epoch": 0.013298818803593801, "grad_norm": 22.375, "learning_rate": 0.0004979456833341894, "loss": 25.0438, "step": 6820 }, { "epoch": 0.013337818272226042, "grad_norm": 16.0, "learning_rate": 0.0004979391813032801, "loss": 25.021, "step": 6840 }, { "epoch": 0.01337681774085828, "grad_norm": 19.25, "learning_rate": 0.0004979326792723707, "loss": 24.9931, "step": 6860 }, { "epoch": 0.01341581720949052, "grad_norm": 23.5, "learning_rate": 0.0004979261772414614, "loss": 24.8964, "step": 6880 }, { "epoch": 0.01345481667812276, "grad_norm": 18.625, "learning_rate": 0.0004979196752105521, "loss": 24.9723, "step": 6900 }, { "epoch": 0.013493816146755001, "grad_norm": 16.375, "learning_rate": 0.0004979131731796427, "loss": 24.8963, "step": 6920 }, { "epoch": 0.01353281561538724, "grad_norm": 16.5, "learning_rate": 0.0004979066711487334, "loss": 24.919, "step": 6940 }, { "epoch": 0.01357181508401948, "grad_norm": 19.25, "learning_rate": 0.000497900169117824, "loss": 24.914, "step": 6960 }, { "epoch": 0.01361081455265172, "grad_norm": 19.625, "learning_rate": 0.0004978936670869146, "loss": 24.9669, "step": 6980 }, { "epoch": 0.01364981402128396, "grad_norm": 16.875, "learning_rate": 0.0004978871650560052, "loss": 24.7311, "step": 7000 }, { "epoch": 0.0136888134899162, "grad_norm": 16.375, "learning_rate": 0.0004978806630250959, "loss": 24.8256, "step": 7020 }, { "epoch": 0.01372781295854844, "grad_norm": 17.25, "learning_rate": 0.0004978741609941866, "loss": 24.8171, "step": 7040 }, { "epoch": 0.01376681242718068, "grad_norm": 19.0, "learning_rate": 0.0004978676589632772, "loss": 24.7304, "step": 7060 }, { "epoch": 0.01380581189581292, "grad_norm": 20.5, "learning_rate": 0.0004978611569323679, "loss": 24.7342, "step": 7080 }, { "epoch": 0.01384481136444516, "grad_norm": 17.75, "learning_rate": 0.0004978546549014585, "loss": 24.7016, "step": 7100 }, { "epoch": 0.013883810833077399, "grad_norm": 15.4375, "learning_rate": 0.0004978481528705492, "loss": 24.6967, "step": 7120 }, { "epoch": 0.013922810301709639, "grad_norm": 19.25, "learning_rate": 0.0004978416508396397, "loss": 24.7322, "step": 7140 }, { "epoch": 0.01396180977034188, "grad_norm": 17.125, "learning_rate": 0.0004978351488087304, "loss": 24.7373, "step": 7160 }, { "epoch": 0.01400080923897412, "grad_norm": 19.375, "learning_rate": 0.000497828646777821, "loss": 24.7211, "step": 7180 }, { "epoch": 0.014039808707606358, "grad_norm": 16.875, "learning_rate": 0.0004978221447469117, "loss": 24.6276, "step": 7200 }, { "epoch": 0.014078808176238598, "grad_norm": 16.125, "learning_rate": 0.0004978156427160024, "loss": 24.5834, "step": 7220 }, { "epoch": 0.014117807644870839, "grad_norm": 22.375, "learning_rate": 0.000497809140685093, "loss": 24.616, "step": 7240 }, { "epoch": 0.014156807113503079, "grad_norm": 19.375, "learning_rate": 0.0004978026386541837, "loss": 24.5678, "step": 7260 }, { "epoch": 0.01419580658213532, "grad_norm": 15.375, "learning_rate": 0.0004977961366232743, "loss": 24.5189, "step": 7280 }, { "epoch": 0.014234806050767558, "grad_norm": 24.0, "learning_rate": 0.0004977896345923649, "loss": 24.5328, "step": 7300 }, { "epoch": 0.014273805519399798, "grad_norm": 20.375, "learning_rate": 0.0004977831325614555, "loss": 24.489, "step": 7320 }, { "epoch": 0.014312804988032038, "grad_norm": 17.75, "learning_rate": 0.0004977766305305462, "loss": 24.5116, "step": 7340 }, { "epoch": 0.014351804456664279, "grad_norm": 16.375, "learning_rate": 0.0004977701284996368, "loss": 24.4816, "step": 7360 }, { "epoch": 0.014390803925296517, "grad_norm": 16.0, "learning_rate": 0.0004977636264687275, "loss": 24.583, "step": 7380 }, { "epoch": 0.014429803393928757, "grad_norm": 16.875, "learning_rate": 0.0004977571244378182, "loss": 24.4943, "step": 7400 }, { "epoch": 0.014468802862560998, "grad_norm": 18.875, "learning_rate": 0.0004977506224069088, "loss": 24.4481, "step": 7420 }, { "epoch": 0.014507802331193238, "grad_norm": 23.375, "learning_rate": 0.0004977441203759995, "loss": 24.4096, "step": 7440 }, { "epoch": 0.014546801799825477, "grad_norm": 16.5, "learning_rate": 0.0004977376183450901, "loss": 24.4307, "step": 7460 }, { "epoch": 0.014585801268457717, "grad_norm": 16.25, "learning_rate": 0.0004977311163141807, "loss": 24.4186, "step": 7480 }, { "epoch": 0.014624800737089957, "grad_norm": 13.5, "learning_rate": 0.0004977246142832713, "loss": 24.3886, "step": 7500 }, { "epoch": 0.014663800205722197, "grad_norm": 18.75, "learning_rate": 0.000497718112252362, "loss": 24.3556, "step": 7520 }, { "epoch": 0.014702799674354438, "grad_norm": 15.875, "learning_rate": 0.0004977116102214527, "loss": 24.2817, "step": 7540 }, { "epoch": 0.014741799142986676, "grad_norm": 17.25, "learning_rate": 0.0004977051081905433, "loss": 24.2631, "step": 7560 }, { "epoch": 0.014780798611618916, "grad_norm": 21.25, "learning_rate": 0.000497698606159634, "loss": 24.2981, "step": 7580 }, { "epoch": 0.014819798080251157, "grad_norm": 17.875, "learning_rate": 0.0004976921041287246, "loss": 24.37, "step": 7600 }, { "epoch": 0.014858797548883397, "grad_norm": 16.375, "learning_rate": 0.0004976856020978153, "loss": 24.2352, "step": 7620 }, { "epoch": 0.014897797017515636, "grad_norm": 17.125, "learning_rate": 0.0004976791000669059, "loss": 24.272, "step": 7640 }, { "epoch": 0.014936796486147876, "grad_norm": 15.6875, "learning_rate": 0.0004976725980359966, "loss": 24.23, "step": 7660 }, { "epoch": 0.014975795954780116, "grad_norm": 19.75, "learning_rate": 0.0004976660960050873, "loss": 24.2317, "step": 7680 }, { "epoch": 0.015014795423412356, "grad_norm": 20.75, "learning_rate": 0.0004976595939741779, "loss": 24.2702, "step": 7700 }, { "epoch": 0.015053794892044597, "grad_norm": 15.4375, "learning_rate": 0.0004976530919432685, "loss": 24.1744, "step": 7720 }, { "epoch": 0.015092794360676835, "grad_norm": 23.375, "learning_rate": 0.0004976465899123591, "loss": 24.2229, "step": 7740 }, { "epoch": 0.015131793829309076, "grad_norm": 18.375, "learning_rate": 0.0004976400878814498, "loss": 24.219, "step": 7760 }, { "epoch": 0.015170793297941316, "grad_norm": 18.5, "learning_rate": 0.0004976335858505404, "loss": 24.1914, "step": 7780 }, { "epoch": 0.015209792766573556, "grad_norm": 25.75, "learning_rate": 0.0004976270838196311, "loss": 24.1835, "step": 7800 }, { "epoch": 0.015248792235205795, "grad_norm": 26.25, "learning_rate": 0.0004976205817887217, "loss": 24.0501, "step": 7820 }, { "epoch": 0.015287791703838035, "grad_norm": 20.75, "learning_rate": 0.0004976140797578124, "loss": 24.0442, "step": 7840 }, { "epoch": 0.015326791172470275, "grad_norm": 18.125, "learning_rate": 0.0004976075777269031, "loss": 24.1007, "step": 7860 }, { "epoch": 0.015365790641102516, "grad_norm": 19.375, "learning_rate": 0.0004976010756959937, "loss": 24.1539, "step": 7880 }, { "epoch": 0.015404790109734754, "grad_norm": 17.375, "learning_rate": 0.0004975945736650844, "loss": 24.0851, "step": 7900 }, { "epoch": 0.015443789578366994, "grad_norm": 17.0, "learning_rate": 0.0004975880716341749, "loss": 24.0792, "step": 7920 }, { "epoch": 0.015482789046999235, "grad_norm": 14.9375, "learning_rate": 0.0004975815696032656, "loss": 23.9018, "step": 7940 }, { "epoch": 0.015521788515631475, "grad_norm": 18.25, "learning_rate": 0.0004975750675723562, "loss": 23.9829, "step": 7960 }, { "epoch": 0.015560787984263715, "grad_norm": 16.625, "learning_rate": 0.0004975685655414469, "loss": 23.9823, "step": 7980 }, { "epoch": 0.015599787452895954, "grad_norm": 17.5, "learning_rate": 0.0004975620635105375, "loss": 24.065, "step": 8000 }, { "epoch": 0.015638786921528196, "grad_norm": 13.4375, "learning_rate": 0.0004975555614796282, "loss": 24.0005, "step": 8020 }, { "epoch": 0.015677786390160434, "grad_norm": 15.6875, "learning_rate": 0.0004975490594487189, "loss": 23.996, "step": 8040 }, { "epoch": 0.015716785858792673, "grad_norm": 19.375, "learning_rate": 0.0004975425574178094, "loss": 23.9214, "step": 8060 }, { "epoch": 0.015755785327424915, "grad_norm": 16.5, "learning_rate": 0.0004975360553869001, "loss": 23.8357, "step": 8080 }, { "epoch": 0.015794784796057153, "grad_norm": 16.75, "learning_rate": 0.0004975295533559907, "loss": 23.8132, "step": 8100 }, { "epoch": 0.015833784264689392, "grad_norm": 15.9375, "learning_rate": 0.0004975230513250814, "loss": 23.9483, "step": 8120 }, { "epoch": 0.015872783733321634, "grad_norm": 16.75, "learning_rate": 0.000497516549294172, "loss": 23.8352, "step": 8140 }, { "epoch": 0.015911783201953873, "grad_norm": 23.25, "learning_rate": 0.0004975100472632627, "loss": 23.8012, "step": 8160 }, { "epoch": 0.015950782670586115, "grad_norm": 15.0, "learning_rate": 0.0004975035452323533, "loss": 23.8594, "step": 8180 }, { "epoch": 0.015989782139218353, "grad_norm": 16.75, "learning_rate": 0.000497497043201444, "loss": 23.7244, "step": 8200 }, { "epoch": 0.01602878160785059, "grad_norm": 16.125, "learning_rate": 0.0004974905411705346, "loss": 23.7633, "step": 8220 }, { "epoch": 0.016067781076482834, "grad_norm": 14.0, "learning_rate": 0.0004974840391396252, "loss": 23.8175, "step": 8240 }, { "epoch": 0.016106780545115072, "grad_norm": 17.875, "learning_rate": 0.0004974775371087159, "loss": 23.8383, "step": 8260 }, { "epoch": 0.016145780013747314, "grad_norm": 18.875, "learning_rate": 0.0004974710350778065, "loss": 23.7206, "step": 8280 }, { "epoch": 0.016184779482379553, "grad_norm": 13.5, "learning_rate": 0.0004974645330468972, "loss": 23.7844, "step": 8300 }, { "epoch": 0.01622377895101179, "grad_norm": 15.75, "learning_rate": 0.0004974580310159878, "loss": 23.762, "step": 8320 }, { "epoch": 0.016262778419644033, "grad_norm": 16.625, "learning_rate": 0.0004974515289850785, "loss": 23.6945, "step": 8340 }, { "epoch": 0.016301777888276272, "grad_norm": 16.625, "learning_rate": 0.0004974450269541692, "loss": 23.7307, "step": 8360 }, { "epoch": 0.01634077735690851, "grad_norm": 16.875, "learning_rate": 0.0004974385249232598, "loss": 23.6975, "step": 8380 }, { "epoch": 0.016379776825540752, "grad_norm": 15.25, "learning_rate": 0.0004974320228923505, "loss": 23.6695, "step": 8400 }, { "epoch": 0.01641877629417299, "grad_norm": 18.0, "learning_rate": 0.0004974255208614411, "loss": 23.6389, "step": 8420 }, { "epoch": 0.016457775762805233, "grad_norm": 17.0, "learning_rate": 0.0004974190188305317, "loss": 23.6681, "step": 8440 }, { "epoch": 0.01649677523143747, "grad_norm": 15.5, "learning_rate": 0.0004974125167996223, "loss": 23.636, "step": 8460 }, { "epoch": 0.01653577470006971, "grad_norm": 16.5, "learning_rate": 0.000497406014768713, "loss": 23.649, "step": 8480 }, { "epoch": 0.016574774168701952, "grad_norm": 15.4375, "learning_rate": 0.0004973995127378036, "loss": 23.6334, "step": 8500 }, { "epoch": 0.01661377363733419, "grad_norm": 16.5, "learning_rate": 0.0004973930107068943, "loss": 23.5497, "step": 8520 }, { "epoch": 0.016652773105966433, "grad_norm": 16.5, "learning_rate": 0.000497386508675985, "loss": 23.593, "step": 8540 }, { "epoch": 0.01669177257459867, "grad_norm": 17.125, "learning_rate": 0.0004973800066450756, "loss": 23.4901, "step": 8560 }, { "epoch": 0.01673077204323091, "grad_norm": 20.25, "learning_rate": 0.0004973735046141663, "loss": 23.5908, "step": 8580 }, { "epoch": 0.016769771511863152, "grad_norm": 18.5, "learning_rate": 0.0004973670025832569, "loss": 23.4932, "step": 8600 }, { "epoch": 0.01680877098049539, "grad_norm": 17.75, "learning_rate": 0.0004973605005523476, "loss": 23.5075, "step": 8620 }, { "epoch": 0.01684777044912763, "grad_norm": 14.3125, "learning_rate": 0.0004973539985214382, "loss": 23.5798, "step": 8640 }, { "epoch": 0.01688676991775987, "grad_norm": 15.0625, "learning_rate": 0.0004973474964905289, "loss": 23.469, "step": 8660 }, { "epoch": 0.01692576938639211, "grad_norm": 18.125, "learning_rate": 0.0004973409944596194, "loss": 23.4152, "step": 8680 }, { "epoch": 0.01696476885502435, "grad_norm": 14.625, "learning_rate": 0.0004973344924287101, "loss": 23.4037, "step": 8700 }, { "epoch": 0.01700376832365659, "grad_norm": 16.75, "learning_rate": 0.0004973279903978008, "loss": 23.4624, "step": 8720 }, { "epoch": 0.01704276779228883, "grad_norm": 14.875, "learning_rate": 0.0004973214883668914, "loss": 23.4046, "step": 8740 }, { "epoch": 0.01708176726092107, "grad_norm": 15.0625, "learning_rate": 0.0004973149863359821, "loss": 23.3663, "step": 8760 }, { "epoch": 0.01712076672955331, "grad_norm": 19.625, "learning_rate": 0.0004973084843050727, "loss": 23.4256, "step": 8780 }, { "epoch": 0.01715976619818555, "grad_norm": 20.25, "learning_rate": 0.0004973019822741634, "loss": 23.3051, "step": 8800 }, { "epoch": 0.01719876566681779, "grad_norm": 13.5, "learning_rate": 0.000497295480243254, "loss": 23.2982, "step": 8820 }, { "epoch": 0.01723776513545003, "grad_norm": 16.25, "learning_rate": 0.0004972889782123446, "loss": 23.3641, "step": 8840 }, { "epoch": 0.01727676460408227, "grad_norm": 17.5, "learning_rate": 0.0004972824761814353, "loss": 23.3412, "step": 8860 }, { "epoch": 0.01731576407271451, "grad_norm": 18.75, "learning_rate": 0.0004972759741505259, "loss": 23.1724, "step": 8880 }, { "epoch": 0.017354763541346747, "grad_norm": 14.0, "learning_rate": 0.0004972694721196166, "loss": 23.3955, "step": 8900 }, { "epoch": 0.01739376300997899, "grad_norm": 17.75, "learning_rate": 0.0004972629700887072, "loss": 23.2876, "step": 8920 }, { "epoch": 0.017432762478611228, "grad_norm": 19.25, "learning_rate": 0.0004972564680577979, "loss": 23.2602, "step": 8940 }, { "epoch": 0.01747176194724347, "grad_norm": 14.0, "learning_rate": 0.0004972499660268885, "loss": 23.2543, "step": 8960 }, { "epoch": 0.01751076141587571, "grad_norm": 16.875, "learning_rate": 0.0004972434639959791, "loss": 23.2048, "step": 8980 }, { "epoch": 0.017549760884507947, "grad_norm": 13.1875, "learning_rate": 0.0004972369619650697, "loss": 23.2341, "step": 9000 }, { "epoch": 0.01758876035314019, "grad_norm": 15.625, "learning_rate": 0.0004972304599341604, "loss": 23.0799, "step": 9020 }, { "epoch": 0.017627759821772428, "grad_norm": 18.0, "learning_rate": 0.0004972239579032511, "loss": 23.2729, "step": 9040 }, { "epoch": 0.01766675929040467, "grad_norm": 15.125, "learning_rate": 0.0004972174558723417, "loss": 23.2567, "step": 9060 }, { "epoch": 0.017705758759036908, "grad_norm": 14.8125, "learning_rate": 0.0004972109538414324, "loss": 23.1843, "step": 9080 }, { "epoch": 0.017744758227669147, "grad_norm": 16.625, "learning_rate": 0.000497204451810523, "loss": 23.1561, "step": 9100 }, { "epoch": 0.01778375769630139, "grad_norm": 16.125, "learning_rate": 0.0004971979497796137, "loss": 23.1139, "step": 9120 }, { "epoch": 0.017822757164933627, "grad_norm": 15.3125, "learning_rate": 0.0004971914477487043, "loss": 23.0666, "step": 9140 }, { "epoch": 0.01786175663356587, "grad_norm": 16.5, "learning_rate": 0.000497184945717795, "loss": 23.2283, "step": 9160 }, { "epoch": 0.017900756102198108, "grad_norm": 17.625, "learning_rate": 0.0004971784436868855, "loss": 23.1668, "step": 9180 }, { "epoch": 0.017939755570830346, "grad_norm": 13.0625, "learning_rate": 0.0004971719416559762, "loss": 23.0682, "step": 9200 }, { "epoch": 0.01797875503946259, "grad_norm": 14.3125, "learning_rate": 0.0004971654396250669, "loss": 23.0662, "step": 9220 }, { "epoch": 0.018017754508094827, "grad_norm": 18.0, "learning_rate": 0.0004971589375941575, "loss": 23.0918, "step": 9240 }, { "epoch": 0.018056753976727066, "grad_norm": 14.9375, "learning_rate": 0.0004971524355632482, "loss": 23.1391, "step": 9260 }, { "epoch": 0.018095753445359308, "grad_norm": 15.3125, "learning_rate": 0.0004971459335323388, "loss": 23.0307, "step": 9280 }, { "epoch": 0.018134752913991546, "grad_norm": 15.5625, "learning_rate": 0.0004971394315014295, "loss": 23.1396, "step": 9300 }, { "epoch": 0.018173752382623788, "grad_norm": 24.5, "learning_rate": 0.0004971329294705201, "loss": 23.1475, "step": 9320 }, { "epoch": 0.018212751851256027, "grad_norm": 16.375, "learning_rate": 0.0004971264274396108, "loss": 23.0066, "step": 9340 }, { "epoch": 0.018251751319888265, "grad_norm": 16.25, "learning_rate": 0.0004971199254087015, "loss": 22.9784, "step": 9360 }, { "epoch": 0.018290750788520507, "grad_norm": 17.625, "learning_rate": 0.0004971134233777921, "loss": 23.0222, "step": 9380 }, { "epoch": 0.018329750257152746, "grad_norm": 18.75, "learning_rate": 0.0004971069213468828, "loss": 22.9303, "step": 9400 }, { "epoch": 0.018368749725784988, "grad_norm": 19.625, "learning_rate": 0.0004971004193159733, "loss": 23.0506, "step": 9420 }, { "epoch": 0.018407749194417226, "grad_norm": 12.875, "learning_rate": 0.000497093917285064, "loss": 22.9712, "step": 9440 }, { "epoch": 0.018446748663049465, "grad_norm": 13.75, "learning_rate": 0.0004970874152541546, "loss": 23.0226, "step": 9460 }, { "epoch": 0.018485748131681707, "grad_norm": 13.5, "learning_rate": 0.0004970809132232453, "loss": 22.9014, "step": 9480 }, { "epoch": 0.018524747600313946, "grad_norm": 15.4375, "learning_rate": 0.000497074411192336, "loss": 22.8532, "step": 9500 }, { "epoch": 0.018563747068946184, "grad_norm": 13.0625, "learning_rate": 0.0004970679091614266, "loss": 22.8703, "step": 9520 }, { "epoch": 0.018602746537578426, "grad_norm": 13.375, "learning_rate": 0.0004970614071305173, "loss": 22.8849, "step": 9540 }, { "epoch": 0.018641746006210665, "grad_norm": 16.25, "learning_rate": 0.0004970549050996079, "loss": 22.7775, "step": 9560 }, { "epoch": 0.018680745474842907, "grad_norm": 16.125, "learning_rate": 0.0004970484030686986, "loss": 22.8374, "step": 9580 }, { "epoch": 0.018719744943475145, "grad_norm": 15.1875, "learning_rate": 0.0004970419010377891, "loss": 22.7304, "step": 9600 }, { "epoch": 0.018758744412107384, "grad_norm": 14.5625, "learning_rate": 0.0004970353990068798, "loss": 22.8324, "step": 9620 }, { "epoch": 0.018797743880739626, "grad_norm": 22.875, "learning_rate": 0.0004970288969759704, "loss": 22.8605, "step": 9640 }, { "epoch": 0.018836743349371864, "grad_norm": 20.875, "learning_rate": 0.0004970223949450611, "loss": 22.7303, "step": 9660 }, { "epoch": 0.018875742818004106, "grad_norm": 19.5, "learning_rate": 0.0004970158929141518, "loss": 22.7659, "step": 9680 }, { "epoch": 0.018914742286636345, "grad_norm": 14.8125, "learning_rate": 0.0004970093908832424, "loss": 22.7596, "step": 9700 }, { "epoch": 0.018953741755268583, "grad_norm": 16.375, "learning_rate": 0.0004970028888523331, "loss": 22.814, "step": 9720 }, { "epoch": 0.018992741223900825, "grad_norm": 14.875, "learning_rate": 0.0004969963868214237, "loss": 22.8122, "step": 9740 }, { "epoch": 0.019031740692533064, "grad_norm": 15.875, "learning_rate": 0.0004969898847905143, "loss": 22.8844, "step": 9760 }, { "epoch": 0.019070740161165303, "grad_norm": 17.0, "learning_rate": 0.0004969833827596049, "loss": 22.767, "step": 9780 }, { "epoch": 0.019109739629797545, "grad_norm": 14.4375, "learning_rate": 0.0004969768807286956, "loss": 22.7825, "step": 9800 }, { "epoch": 0.019148739098429783, "grad_norm": 13.875, "learning_rate": 0.0004969703786977862, "loss": 22.6987, "step": 9820 }, { "epoch": 0.019187738567062025, "grad_norm": 15.5, "learning_rate": 0.0004969638766668769, "loss": 22.7735, "step": 9840 }, { "epoch": 0.019226738035694264, "grad_norm": 15.0, "learning_rate": 0.0004969573746359676, "loss": 22.7771, "step": 9860 }, { "epoch": 0.019265737504326502, "grad_norm": 14.875, "learning_rate": 0.0004969508726050582, "loss": 22.7458, "step": 9880 }, { "epoch": 0.019304736972958744, "grad_norm": 15.0625, "learning_rate": 0.0004969443705741489, "loss": 22.6444, "step": 9900 }, { "epoch": 0.019343736441590983, "grad_norm": 15.125, "learning_rate": 0.0004969378685432394, "loss": 22.6917, "step": 9920 }, { "epoch": 0.019382735910223225, "grad_norm": 14.375, "learning_rate": 0.0004969313665123301, "loss": 22.6688, "step": 9940 }, { "epoch": 0.019421735378855463, "grad_norm": 13.25, "learning_rate": 0.0004969248644814207, "loss": 22.6698, "step": 9960 }, { "epoch": 0.019460734847487702, "grad_norm": 16.375, "learning_rate": 0.0004969183624505114, "loss": 22.4882, "step": 9980 }, { "epoch": 0.019499734316119944, "grad_norm": 18.25, "learning_rate": 0.000496911860419602, "loss": 22.7369, "step": 10000 }, { "epoch": 0.019538733784752182, "grad_norm": 14.0, "learning_rate": 0.0004969053583886927, "loss": 22.6118, "step": 10020 }, { "epoch": 0.01957773325338442, "grad_norm": 15.9375, "learning_rate": 0.0004968988563577834, "loss": 22.6524, "step": 10040 }, { "epoch": 0.019616732722016663, "grad_norm": 15.4375, "learning_rate": 0.000496892354326874, "loss": 22.6315, "step": 10060 }, { "epoch": 0.0196557321906489, "grad_norm": 15.9375, "learning_rate": 0.0004968858522959647, "loss": 22.5476, "step": 10080 }, { "epoch": 0.019694731659281144, "grad_norm": 15.625, "learning_rate": 0.0004968793502650553, "loss": 22.5912, "step": 10100 }, { "epoch": 0.019733731127913382, "grad_norm": 15.625, "learning_rate": 0.000496872848234146, "loss": 22.4939, "step": 10120 }, { "epoch": 0.01977273059654562, "grad_norm": 15.875, "learning_rate": 0.0004968663462032366, "loss": 22.553, "step": 10140 }, { "epoch": 0.019811730065177863, "grad_norm": 15.5625, "learning_rate": 0.0004968598441723272, "loss": 22.5758, "step": 10160 }, { "epoch": 0.0198507295338101, "grad_norm": 14.5, "learning_rate": 0.0004968533421414179, "loss": 22.4262, "step": 10180 }, { "epoch": 0.019889729002442343, "grad_norm": 14.6875, "learning_rate": 0.0004968468401105085, "loss": 22.5434, "step": 10200 }, { "epoch": 0.019928728471074582, "grad_norm": 16.25, "learning_rate": 0.0004968403380795992, "loss": 22.5854, "step": 10220 }, { "epoch": 0.01996772793970682, "grad_norm": 12.375, "learning_rate": 0.0004968338360486898, "loss": 22.5922, "step": 10240 }, { "epoch": 0.020006727408339062, "grad_norm": 14.9375, "learning_rate": 0.0004968273340177805, "loss": 22.5855, "step": 10260 }, { "epoch": 0.0200457268769713, "grad_norm": 13.8125, "learning_rate": 0.0004968208319868711, "loss": 22.5251, "step": 10280 }, { "epoch": 0.02008472634560354, "grad_norm": 13.0625, "learning_rate": 0.0004968143299559618, "loss": 22.5096, "step": 10300 }, { "epoch": 0.02012372581423578, "grad_norm": 14.125, "learning_rate": 0.0004968078279250525, "loss": 22.5954, "step": 10320 }, { "epoch": 0.02016272528286802, "grad_norm": 15.4375, "learning_rate": 0.0004968013258941431, "loss": 22.4293, "step": 10340 }, { "epoch": 0.020201724751500262, "grad_norm": 19.25, "learning_rate": 0.0004967948238632338, "loss": 22.4421, "step": 10360 }, { "epoch": 0.0202407242201325, "grad_norm": 14.6875, "learning_rate": 0.0004967883218323243, "loss": 22.3723, "step": 10380 }, { "epoch": 0.02027972368876474, "grad_norm": 14.0625, "learning_rate": 0.000496781819801415, "loss": 22.3531, "step": 10400 }, { "epoch": 0.02031872315739698, "grad_norm": 14.1875, "learning_rate": 0.0004967753177705056, "loss": 22.3866, "step": 10420 }, { "epoch": 0.02035772262602922, "grad_norm": 11.9375, "learning_rate": 0.0004967688157395963, "loss": 22.4081, "step": 10440 }, { "epoch": 0.020396722094661462, "grad_norm": 17.0, "learning_rate": 0.0004967623137086869, "loss": 22.4827, "step": 10460 }, { "epoch": 0.0204357215632937, "grad_norm": 14.9375, "learning_rate": 0.0004967558116777776, "loss": 22.3606, "step": 10480 }, { "epoch": 0.02047472103192594, "grad_norm": 14.25, "learning_rate": 0.0004967493096468683, "loss": 22.3265, "step": 10500 }, { "epoch": 0.02051372050055818, "grad_norm": 15.0, "learning_rate": 0.0004967428076159588, "loss": 22.3838, "step": 10520 }, { "epoch": 0.02055271996919042, "grad_norm": 13.25, "learning_rate": 0.0004967363055850495, "loss": 22.3391, "step": 10540 }, { "epoch": 0.020591719437822658, "grad_norm": 14.1875, "learning_rate": 0.0004967298035541401, "loss": 22.341, "step": 10560 }, { "epoch": 0.0206307189064549, "grad_norm": 16.625, "learning_rate": 0.0004967233015232308, "loss": 22.3056, "step": 10580 }, { "epoch": 0.02066971837508714, "grad_norm": 13.3125, "learning_rate": 0.0004967167994923214, "loss": 22.3764, "step": 10600 }, { "epoch": 0.02070871784371938, "grad_norm": 13.6875, "learning_rate": 0.0004967102974614121, "loss": 22.299, "step": 10620 }, { "epoch": 0.02074771731235162, "grad_norm": 17.875, "learning_rate": 0.0004967037954305027, "loss": 22.3097, "step": 10640 }, { "epoch": 0.020786716780983858, "grad_norm": 15.0, "learning_rate": 0.0004966972933995934, "loss": 22.2525, "step": 10660 }, { "epoch": 0.0208257162496161, "grad_norm": 12.8125, "learning_rate": 0.000496690791368684, "loss": 22.3429, "step": 10680 }, { "epoch": 0.020864715718248338, "grad_norm": 14.6875, "learning_rate": 0.0004966842893377746, "loss": 22.2322, "step": 10700 }, { "epoch": 0.02090371518688058, "grad_norm": 18.25, "learning_rate": 0.0004966777873068653, "loss": 22.3028, "step": 10720 }, { "epoch": 0.02094271465551282, "grad_norm": 15.5, "learning_rate": 0.0004966712852759559, "loss": 22.2583, "step": 10740 }, { "epoch": 0.020981714124145057, "grad_norm": 15.0625, "learning_rate": 0.0004966647832450466, "loss": 22.2295, "step": 10760 }, { "epoch": 0.0210207135927773, "grad_norm": 14.4375, "learning_rate": 0.0004966582812141372, "loss": 22.1151, "step": 10780 }, { "epoch": 0.021059713061409538, "grad_norm": 13.875, "learning_rate": 0.0004966517791832279, "loss": 22.2718, "step": 10800 }, { "epoch": 0.021098712530041776, "grad_norm": 19.625, "learning_rate": 0.0004966452771523186, "loss": 22.2769, "step": 10820 }, { "epoch": 0.02113771199867402, "grad_norm": 13.25, "learning_rate": 0.0004966387751214092, "loss": 22.1288, "step": 10840 }, { "epoch": 0.021176711467306257, "grad_norm": 13.5, "learning_rate": 0.0004966322730904999, "loss": 22.1344, "step": 10860 }, { "epoch": 0.0212157109359385, "grad_norm": 14.8125, "learning_rate": 0.0004966257710595904, "loss": 22.0977, "step": 10880 }, { "epoch": 0.021254710404570738, "grad_norm": 14.125, "learning_rate": 0.0004966192690286811, "loss": 22.1913, "step": 10900 }, { "epoch": 0.021293709873202976, "grad_norm": 14.5, "learning_rate": 0.0004966127669977717, "loss": 22.1427, "step": 10920 }, { "epoch": 0.021332709341835218, "grad_norm": 15.25, "learning_rate": 0.0004966062649668624, "loss": 22.2011, "step": 10940 }, { "epoch": 0.021371708810467457, "grad_norm": 13.375, "learning_rate": 0.000496599762935953, "loss": 22.0977, "step": 10960 }, { "epoch": 0.0214107082790997, "grad_norm": 13.25, "learning_rate": 0.0004965932609050437, "loss": 22.1501, "step": 10980 }, { "epoch": 0.021449707747731937, "grad_norm": 18.0, "learning_rate": 0.0004965867588741344, "loss": 22.1463, "step": 11000 }, { "epoch": 0.021488707216364176, "grad_norm": 15.5625, "learning_rate": 0.000496580256843225, "loss": 22.1454, "step": 11020 }, { "epoch": 0.021527706684996418, "grad_norm": 16.625, "learning_rate": 0.0004965737548123157, "loss": 22.1316, "step": 11040 }, { "epoch": 0.021566706153628656, "grad_norm": 14.0625, "learning_rate": 0.0004965672527814063, "loss": 22.1292, "step": 11060 }, { "epoch": 0.0216057056222609, "grad_norm": 13.75, "learning_rate": 0.000496560750750497, "loss": 21.9403, "step": 11080 }, { "epoch": 0.021644705090893137, "grad_norm": 13.75, "learning_rate": 0.0004965542487195876, "loss": 22.0882, "step": 11100 }, { "epoch": 0.021683704559525376, "grad_norm": 13.5, "learning_rate": 0.0004965477466886782, "loss": 22.168, "step": 11120 }, { "epoch": 0.021722704028157618, "grad_norm": 14.8125, "learning_rate": 0.0004965412446577688, "loss": 22.0935, "step": 11140 }, { "epoch": 0.021761703496789856, "grad_norm": 12.9375, "learning_rate": 0.0004965347426268595, "loss": 22.0371, "step": 11160 }, { "epoch": 0.021800702965422095, "grad_norm": 12.0625, "learning_rate": 0.0004965282405959502, "loss": 22.0473, "step": 11180 }, { "epoch": 0.021839702434054337, "grad_norm": 13.875, "learning_rate": 0.0004965217385650408, "loss": 21.9825, "step": 11200 }, { "epoch": 0.021878701902686575, "grad_norm": 12.8125, "learning_rate": 0.0004965152365341315, "loss": 22.0741, "step": 11220 }, { "epoch": 0.021917701371318817, "grad_norm": 15.875, "learning_rate": 0.0004965087345032221, "loss": 22.1032, "step": 11240 }, { "epoch": 0.021956700839951056, "grad_norm": 12.3125, "learning_rate": 0.0004965022324723128, "loss": 22.0735, "step": 11260 }, { "epoch": 0.021995700308583294, "grad_norm": 14.4375, "learning_rate": 0.0004964957304414034, "loss": 22.082, "step": 11280 }, { "epoch": 0.022034699777215536, "grad_norm": 12.9375, "learning_rate": 0.000496489228410494, "loss": 22.004, "step": 11300 }, { "epoch": 0.022073699245847775, "grad_norm": 12.3125, "learning_rate": 0.0004964827263795846, "loss": 21.9941, "step": 11320 }, { "epoch": 0.022112698714480017, "grad_norm": 13.25, "learning_rate": 0.0004964762243486753, "loss": 22.0307, "step": 11340 }, { "epoch": 0.022151698183112255, "grad_norm": 12.625, "learning_rate": 0.000496469722317766, "loss": 21.9673, "step": 11360 }, { "epoch": 0.022190697651744494, "grad_norm": 13.625, "learning_rate": 0.0004964632202868566, "loss": 21.8667, "step": 11380 }, { "epoch": 0.022229697120376736, "grad_norm": 13.1875, "learning_rate": 0.0004964567182559473, "loss": 21.9068, "step": 11400 }, { "epoch": 0.022268696589008975, "grad_norm": 15.5625, "learning_rate": 0.0004964502162250379, "loss": 21.9525, "step": 11420 }, { "epoch": 0.022307696057641213, "grad_norm": 16.25, "learning_rate": 0.0004964437141941286, "loss": 21.8878, "step": 11440 }, { "epoch": 0.022346695526273455, "grad_norm": 14.6875, "learning_rate": 0.0004964372121632191, "loss": 21.9101, "step": 11460 }, { "epoch": 0.022385694994905694, "grad_norm": 13.0, "learning_rate": 0.0004964307101323098, "loss": 21.9164, "step": 11480 }, { "epoch": 0.022424694463537936, "grad_norm": 14.5625, "learning_rate": 0.0004964242081014005, "loss": 21.8257, "step": 11500 }, { "epoch": 0.022463693932170174, "grad_norm": 12.75, "learning_rate": 0.0004964177060704911, "loss": 21.8326, "step": 11520 }, { "epoch": 0.022502693400802413, "grad_norm": 12.3125, "learning_rate": 0.0004964112040395818, "loss": 21.9554, "step": 11540 }, { "epoch": 0.022541692869434655, "grad_norm": 14.875, "learning_rate": 0.0004964047020086724, "loss": 21.8956, "step": 11560 }, { "epoch": 0.022580692338066893, "grad_norm": 13.75, "learning_rate": 0.0004963981999777631, "loss": 21.8812, "step": 11580 }, { "epoch": 0.022619691806699135, "grad_norm": 13.25, "learning_rate": 0.0004963916979468537, "loss": 21.7865, "step": 11600 }, { "epoch": 0.022658691275331374, "grad_norm": 13.6875, "learning_rate": 0.0004963851959159443, "loss": 21.7968, "step": 11620 }, { "epoch": 0.022697690743963612, "grad_norm": 15.75, "learning_rate": 0.0004963786938850349, "loss": 21.884, "step": 11640 }, { "epoch": 0.022736690212595854, "grad_norm": 12.8125, "learning_rate": 0.0004963721918541256, "loss": 21.8214, "step": 11660 }, { "epoch": 0.022775689681228093, "grad_norm": 11.6875, "learning_rate": 0.0004963656898232163, "loss": 21.8057, "step": 11680 }, { "epoch": 0.02281468914986033, "grad_norm": 15.25, "learning_rate": 0.0004963591877923069, "loss": 21.7443, "step": 11700 }, { "epoch": 0.022853688618492574, "grad_norm": 13.875, "learning_rate": 0.0004963526857613976, "loss": 21.8851, "step": 11720 }, { "epoch": 0.022892688087124812, "grad_norm": 11.75, "learning_rate": 0.0004963461837304882, "loss": 21.7462, "step": 11740 }, { "epoch": 0.022931687555757054, "grad_norm": 13.75, "learning_rate": 0.0004963396816995789, "loss": 21.7735, "step": 11760 }, { "epoch": 0.022970687024389293, "grad_norm": 13.4375, "learning_rate": 0.0004963331796686695, "loss": 21.8391, "step": 11780 }, { "epoch": 0.02300968649302153, "grad_norm": 11.375, "learning_rate": 0.0004963266776377602, "loss": 21.7618, "step": 11800 }, { "epoch": 0.023048685961653773, "grad_norm": 13.25, "learning_rate": 0.0004963201756068509, "loss": 21.793, "step": 11820 }, { "epoch": 0.023087685430286012, "grad_norm": 11.375, "learning_rate": 0.0004963136735759415, "loss": 21.7453, "step": 11840 }, { "epoch": 0.023126684898918254, "grad_norm": 15.0625, "learning_rate": 0.0004963071715450321, "loss": 21.6847, "step": 11860 }, { "epoch": 0.023165684367550492, "grad_norm": 13.25, "learning_rate": 0.0004963006695141227, "loss": 21.7001, "step": 11880 }, { "epoch": 0.02320468383618273, "grad_norm": 14.5, "learning_rate": 0.0004962941674832134, "loss": 21.6767, "step": 11900 }, { "epoch": 0.023243683304814973, "grad_norm": 13.3125, "learning_rate": 0.000496287665452304, "loss": 21.7875, "step": 11920 }, { "epoch": 0.02328268277344721, "grad_norm": 14.1875, "learning_rate": 0.0004962811634213947, "loss": 21.7244, "step": 11940 }, { "epoch": 0.02332168224207945, "grad_norm": 12.5625, "learning_rate": 0.0004962746613904853, "loss": 21.7276, "step": 11960 }, { "epoch": 0.023360681710711692, "grad_norm": 12.375, "learning_rate": 0.000496268159359576, "loss": 21.6254, "step": 11980 }, { "epoch": 0.02339968117934393, "grad_norm": 12.0625, "learning_rate": 0.0004962616573286667, "loss": 21.6983, "step": 12000 }, { "epoch": 0.023438680647976173, "grad_norm": 16.75, "learning_rate": 0.0004962551552977573, "loss": 21.684, "step": 12020 }, { "epoch": 0.02347768011660841, "grad_norm": 14.4375, "learning_rate": 0.000496248653266848, "loss": 21.5841, "step": 12040 }, { "epoch": 0.02351667958524065, "grad_norm": 16.625, "learning_rate": 0.0004962421512359385, "loss": 21.6848, "step": 12060 }, { "epoch": 0.023555679053872892, "grad_norm": 12.3125, "learning_rate": 0.0004962356492050292, "loss": 21.585, "step": 12080 }, { "epoch": 0.02359467852250513, "grad_norm": 16.0, "learning_rate": 0.0004962291471741198, "loss": 21.5377, "step": 12100 }, { "epoch": 0.023633677991137372, "grad_norm": 12.6875, "learning_rate": 0.0004962226451432105, "loss": 21.5931, "step": 12120 }, { "epoch": 0.02367267745976961, "grad_norm": 13.1875, "learning_rate": 0.0004962161431123012, "loss": 21.6174, "step": 12140 }, { "epoch": 0.02371167692840185, "grad_norm": 13.5, "learning_rate": 0.0004962096410813918, "loss": 21.5403, "step": 12160 }, { "epoch": 0.02375067639703409, "grad_norm": 16.375, "learning_rate": 0.0004962031390504825, "loss": 21.6368, "step": 12180 }, { "epoch": 0.02378967586566633, "grad_norm": 17.25, "learning_rate": 0.0004961966370195731, "loss": 21.6614, "step": 12200 }, { "epoch": 0.02382867533429857, "grad_norm": 13.75, "learning_rate": 0.0004961901349886637, "loss": 21.5281, "step": 12220 }, { "epoch": 0.02386767480293081, "grad_norm": 14.0625, "learning_rate": 0.0004961836329577543, "loss": 21.5222, "step": 12240 }, { "epoch": 0.02390667427156305, "grad_norm": 12.5625, "learning_rate": 0.000496177130926845, "loss": 21.5975, "step": 12260 }, { "epoch": 0.02394567374019529, "grad_norm": 13.5, "learning_rate": 0.0004961706288959356, "loss": 21.5852, "step": 12280 }, { "epoch": 0.02398467320882753, "grad_norm": 13.9375, "learning_rate": 0.0004961641268650263, "loss": 21.5273, "step": 12300 }, { "epoch": 0.024023672677459768, "grad_norm": 14.4375, "learning_rate": 0.000496157624834117, "loss": 21.5755, "step": 12320 }, { "epoch": 0.02406267214609201, "grad_norm": 14.875, "learning_rate": 0.0004961511228032076, "loss": 21.5002, "step": 12340 }, { "epoch": 0.02410167161472425, "grad_norm": 11.625, "learning_rate": 0.0004961446207722983, "loss": 21.5308, "step": 12360 }, { "epoch": 0.02414067108335649, "grad_norm": 13.875, "learning_rate": 0.0004961381187413888, "loss": 21.4955, "step": 12380 }, { "epoch": 0.02417967055198873, "grad_norm": 14.25, "learning_rate": 0.0004961316167104795, "loss": 21.4756, "step": 12400 }, { "epoch": 0.024218670020620968, "grad_norm": 11.5, "learning_rate": 0.0004961251146795701, "loss": 21.6021, "step": 12420 }, { "epoch": 0.02425766948925321, "grad_norm": 13.125, "learning_rate": 0.0004961186126486608, "loss": 21.4975, "step": 12440 }, { "epoch": 0.02429666895788545, "grad_norm": 13.375, "learning_rate": 0.0004961121106177514, "loss": 21.5139, "step": 12460 }, { "epoch": 0.024335668426517687, "grad_norm": 14.25, "learning_rate": 0.0004961056085868421, "loss": 21.4851, "step": 12480 }, { "epoch": 0.02437466789514993, "grad_norm": 13.375, "learning_rate": 0.0004960991065559328, "loss": 21.4945, "step": 12500 }, { "epoch": 0.024413667363782168, "grad_norm": 14.0625, "learning_rate": 0.0004960926045250234, "loss": 21.5072, "step": 12520 }, { "epoch": 0.02445266683241441, "grad_norm": 13.25, "learning_rate": 0.0004960861024941141, "loss": 21.512, "step": 12540 }, { "epoch": 0.024491666301046648, "grad_norm": 14.4375, "learning_rate": 0.0004960796004632047, "loss": 21.4804, "step": 12560 }, { "epoch": 0.024530665769678887, "grad_norm": 11.125, "learning_rate": 0.0004960730984322953, "loss": 21.449, "step": 12580 }, { "epoch": 0.02456966523831113, "grad_norm": 11.4375, "learning_rate": 0.0004960665964013859, "loss": 21.4375, "step": 12600 }, { "epoch": 0.024608664706943367, "grad_norm": 14.75, "learning_rate": 0.0004960600943704766, "loss": 21.4692, "step": 12620 }, { "epoch": 0.02464766417557561, "grad_norm": 12.0, "learning_rate": 0.0004960535923395672, "loss": 21.4474, "step": 12640 }, { "epoch": 0.024686663644207848, "grad_norm": 10.875, "learning_rate": 0.0004960470903086579, "loss": 21.4147, "step": 12660 }, { "epoch": 0.024725663112840086, "grad_norm": 12.5, "learning_rate": 0.0004960405882777486, "loss": 21.441, "step": 12680 }, { "epoch": 0.02476466258147233, "grad_norm": 13.9375, "learning_rate": 0.0004960340862468392, "loss": 21.4102, "step": 12700 }, { "epoch": 0.024803662050104567, "grad_norm": 12.0, "learning_rate": 0.0004960275842159299, "loss": 21.4091, "step": 12720 }, { "epoch": 0.024842661518736806, "grad_norm": 14.25, "learning_rate": 0.0004960210821850205, "loss": 21.4476, "step": 12740 }, { "epoch": 0.024881660987369048, "grad_norm": 13.625, "learning_rate": 0.0004960145801541112, "loss": 21.4262, "step": 12760 }, { "epoch": 0.024920660456001286, "grad_norm": 12.6875, "learning_rate": 0.0004960080781232018, "loss": 21.3011, "step": 12780 }, { "epoch": 0.024959659924633528, "grad_norm": 12.9375, "learning_rate": 0.0004960015760922925, "loss": 21.2773, "step": 12800 }, { "epoch": 0.024998659393265767, "grad_norm": 12.0, "learning_rate": 0.0004959950740613831, "loss": 21.3236, "step": 12820 }, { "epoch": 0.025037658861898005, "grad_norm": 12.375, "learning_rate": 0.0004959885720304737, "loss": 21.3943, "step": 12840 }, { "epoch": 0.025076658330530247, "grad_norm": 11.9375, "learning_rate": 0.0004959820699995644, "loss": 21.3049, "step": 12860 }, { "epoch": 0.025115657799162486, "grad_norm": 13.3125, "learning_rate": 0.000495975567968655, "loss": 21.3379, "step": 12880 }, { "epoch": 0.025154657267794728, "grad_norm": 15.5, "learning_rate": 0.0004959690659377457, "loss": 21.3669, "step": 12900 }, { "epoch": 0.025193656736426966, "grad_norm": 12.5, "learning_rate": 0.0004959625639068363, "loss": 21.3183, "step": 12920 }, { "epoch": 0.025232656205059205, "grad_norm": 13.75, "learning_rate": 0.000495956061875927, "loss": 21.3579, "step": 12940 }, { "epoch": 0.025271655673691447, "grad_norm": 14.3125, "learning_rate": 0.0004959495598450177, "loss": 21.2901, "step": 12960 }, { "epoch": 0.025310655142323685, "grad_norm": 15.375, "learning_rate": 0.0004959430578141083, "loss": 21.2053, "step": 12980 }, { "epoch": 0.025349654610955927, "grad_norm": 12.9375, "learning_rate": 0.0004959365557831989, "loss": 21.2128, "step": 13000 }, { "epoch": 0.025388654079588166, "grad_norm": 12.625, "learning_rate": 0.0004959300537522895, "loss": 21.3803, "step": 13020 }, { "epoch": 0.025427653548220405, "grad_norm": 13.375, "learning_rate": 0.0004959235517213802, "loss": 21.2563, "step": 13040 }, { "epoch": 0.025466653016852647, "grad_norm": 13.375, "learning_rate": 0.0004959170496904708, "loss": 21.3006, "step": 13060 }, { "epoch": 0.025505652485484885, "grad_norm": 14.125, "learning_rate": 0.0004959105476595615, "loss": 21.2853, "step": 13080 }, { "epoch": 0.025544651954117124, "grad_norm": 13.125, "learning_rate": 0.0004959040456286521, "loss": 21.2232, "step": 13100 }, { "epoch": 0.025583651422749366, "grad_norm": 13.3125, "learning_rate": 0.0004958975435977428, "loss": 21.2367, "step": 13120 }, { "epoch": 0.025622650891381604, "grad_norm": 13.5625, "learning_rate": 0.0004958910415668333, "loss": 21.2089, "step": 13140 }, { "epoch": 0.025661650360013846, "grad_norm": 12.5625, "learning_rate": 0.000495884539535924, "loss": 21.2281, "step": 13160 }, { "epoch": 0.025700649828646085, "grad_norm": 16.75, "learning_rate": 0.0004958780375050147, "loss": 21.1918, "step": 13180 }, { "epoch": 0.025739649297278323, "grad_norm": 12.5, "learning_rate": 0.0004958715354741053, "loss": 21.2277, "step": 13200 }, { "epoch": 0.025778648765910565, "grad_norm": 11.9375, "learning_rate": 0.000495865033443196, "loss": 21.2046, "step": 13220 }, { "epoch": 0.025817648234542804, "grad_norm": 12.5, "learning_rate": 0.0004958585314122866, "loss": 21.1419, "step": 13240 }, { "epoch": 0.025856647703175046, "grad_norm": 12.5625, "learning_rate": 0.0004958520293813773, "loss": 21.1426, "step": 13260 }, { "epoch": 0.025895647171807284, "grad_norm": 14.0, "learning_rate": 0.000495845527350468, "loss": 21.1818, "step": 13280 }, { "epoch": 0.025934646640439523, "grad_norm": 16.0, "learning_rate": 0.0004958390253195586, "loss": 21.2342, "step": 13300 }, { "epoch": 0.025973646109071765, "grad_norm": 13.75, "learning_rate": 0.0004958325232886492, "loss": 21.1994, "step": 13320 }, { "epoch": 0.026012645577704004, "grad_norm": 10.875, "learning_rate": 0.0004958260212577398, "loss": 21.1593, "step": 13340 }, { "epoch": 0.026051645046336242, "grad_norm": 14.5, "learning_rate": 0.0004958195192268305, "loss": 21.1435, "step": 13360 }, { "epoch": 0.026090644514968484, "grad_norm": 13.1875, "learning_rate": 0.0004958130171959211, "loss": 21.1894, "step": 13380 }, { "epoch": 0.026129643983600723, "grad_norm": 12.75, "learning_rate": 0.0004958065151650118, "loss": 21.2187, "step": 13400 }, { "epoch": 0.026168643452232965, "grad_norm": 12.75, "learning_rate": 0.0004958000131341024, "loss": 21.2178, "step": 13420 }, { "epoch": 0.026207642920865203, "grad_norm": 13.6875, "learning_rate": 0.0004957935111031931, "loss": 21.0969, "step": 13440 }, { "epoch": 0.026246642389497442, "grad_norm": 11.0, "learning_rate": 0.0004957870090722838, "loss": 21.106, "step": 13460 }, { "epoch": 0.026285641858129684, "grad_norm": 11.625, "learning_rate": 0.0004957805070413744, "loss": 21.0498, "step": 13480 }, { "epoch": 0.026324641326761922, "grad_norm": 13.3125, "learning_rate": 0.0004957740050104651, "loss": 21.1176, "step": 13500 }, { "epoch": 0.026363640795394164, "grad_norm": 14.0625, "learning_rate": 0.0004957675029795557, "loss": 21.0922, "step": 13520 }, { "epoch": 0.026402640264026403, "grad_norm": 12.4375, "learning_rate": 0.0004957610009486464, "loss": 21.0689, "step": 13540 }, { "epoch": 0.02644163973265864, "grad_norm": 12.0625, "learning_rate": 0.0004957544989177369, "loss": 21.0994, "step": 13560 }, { "epoch": 0.026480639201290884, "grad_norm": 12.75, "learning_rate": 0.0004957479968868276, "loss": 21.1864, "step": 13580 }, { "epoch": 0.026519638669923122, "grad_norm": 12.0, "learning_rate": 0.0004957414948559182, "loss": 21.1495, "step": 13600 }, { "epoch": 0.02655863813855536, "grad_norm": 13.6875, "learning_rate": 0.0004957349928250089, "loss": 21.0898, "step": 13620 }, { "epoch": 0.026597637607187603, "grad_norm": 11.9375, "learning_rate": 0.0004957284907940996, "loss": 21.0443, "step": 13640 }, { "epoch": 0.02663663707581984, "grad_norm": 14.125, "learning_rate": 0.0004957219887631902, "loss": 21.1125, "step": 13660 }, { "epoch": 0.026675636544452083, "grad_norm": 11.5625, "learning_rate": 0.0004957154867322809, "loss": 21.0037, "step": 13680 }, { "epoch": 0.026714636013084322, "grad_norm": 13.75, "learning_rate": 0.0004957089847013715, "loss": 21.0579, "step": 13700 }, { "epoch": 0.02675363548171656, "grad_norm": 12.8125, "learning_rate": 0.0004957024826704622, "loss": 21.0379, "step": 13720 }, { "epoch": 0.026792634950348802, "grad_norm": 13.5, "learning_rate": 0.0004956959806395528, "loss": 21.0031, "step": 13740 }, { "epoch": 0.02683163441898104, "grad_norm": 12.75, "learning_rate": 0.0004956894786086434, "loss": 21.0308, "step": 13760 }, { "epoch": 0.026870633887613283, "grad_norm": 12.25, "learning_rate": 0.000495682976577734, "loss": 20.9972, "step": 13780 }, { "epoch": 0.02690963335624552, "grad_norm": 13.0625, "learning_rate": 0.0004956764745468247, "loss": 20.9336, "step": 13800 }, { "epoch": 0.02694863282487776, "grad_norm": 13.0625, "learning_rate": 0.0004956699725159154, "loss": 21.0188, "step": 13820 }, { "epoch": 0.026987632293510002, "grad_norm": 11.8125, "learning_rate": 0.000495663470485006, "loss": 21.047, "step": 13840 }, { "epoch": 0.02702663176214224, "grad_norm": 11.4375, "learning_rate": 0.0004956569684540967, "loss": 20.9846, "step": 13860 }, { "epoch": 0.02706563123077448, "grad_norm": 11.875, "learning_rate": 0.0004956504664231873, "loss": 20.9735, "step": 13880 }, { "epoch": 0.02710463069940672, "grad_norm": 11.625, "learning_rate": 0.000495643964392278, "loss": 20.9583, "step": 13900 }, { "epoch": 0.02714363016803896, "grad_norm": 12.6875, "learning_rate": 0.0004956374623613685, "loss": 20.9797, "step": 13920 }, { "epoch": 0.0271826296366712, "grad_norm": 11.1875, "learning_rate": 0.0004956309603304592, "loss": 20.912, "step": 13940 }, { "epoch": 0.02722162910530344, "grad_norm": 18.375, "learning_rate": 0.0004956244582995499, "loss": 20.9992, "step": 13960 }, { "epoch": 0.02726062857393568, "grad_norm": 11.8125, "learning_rate": 0.0004956179562686405, "loss": 20.9348, "step": 13980 }, { "epoch": 0.02729962804256792, "grad_norm": 13.5625, "learning_rate": 0.0004956114542377312, "loss": 20.9266, "step": 14000 }, { "epoch": 0.02733862751120016, "grad_norm": 12.875, "learning_rate": 0.0004956049522068218, "loss": 20.9607, "step": 14020 }, { "epoch": 0.0273776269798324, "grad_norm": 12.0625, "learning_rate": 0.0004955984501759125, "loss": 21.0236, "step": 14040 }, { "epoch": 0.02741662644846464, "grad_norm": 11.75, "learning_rate": 0.000495591948145003, "loss": 20.9549, "step": 14060 }, { "epoch": 0.02745562591709688, "grad_norm": 11.625, "learning_rate": 0.0004955854461140937, "loss": 20.9407, "step": 14080 }, { "epoch": 0.02749462538572912, "grad_norm": 11.6875, "learning_rate": 0.0004955789440831843, "loss": 20.9356, "step": 14100 }, { "epoch": 0.02753362485436136, "grad_norm": 12.1875, "learning_rate": 0.000495572442052275, "loss": 20.8993, "step": 14120 }, { "epoch": 0.027572624322993598, "grad_norm": 12.0, "learning_rate": 0.0004955659400213657, "loss": 20.789, "step": 14140 }, { "epoch": 0.02761162379162584, "grad_norm": 12.125, "learning_rate": 0.0004955594379904563, "loss": 20.8079, "step": 14160 }, { "epoch": 0.027650623260258078, "grad_norm": 12.0, "learning_rate": 0.000495552935959547, "loss": 20.9396, "step": 14180 }, { "epoch": 0.02768962272889032, "grad_norm": 11.8125, "learning_rate": 0.0004955464339286376, "loss": 20.8879, "step": 14200 }, { "epoch": 0.02772862219752256, "grad_norm": 11.8125, "learning_rate": 0.0004955399318977283, "loss": 20.8179, "step": 14220 }, { "epoch": 0.027767621666154797, "grad_norm": 13.1875, "learning_rate": 0.0004955334298668189, "loss": 20.9109, "step": 14240 }, { "epoch": 0.02780662113478704, "grad_norm": 11.75, "learning_rate": 0.0004955269278359096, "loss": 20.7993, "step": 14260 }, { "epoch": 0.027845620603419278, "grad_norm": 12.375, "learning_rate": 0.0004955204258050001, "loss": 20.9209, "step": 14280 }, { "epoch": 0.02788462007205152, "grad_norm": 11.25, "learning_rate": 0.0004955139237740908, "loss": 20.8294, "step": 14300 }, { "epoch": 0.02792361954068376, "grad_norm": 15.6875, "learning_rate": 0.0004955074217431815, "loss": 20.7905, "step": 14320 }, { "epoch": 0.027962619009315997, "grad_norm": 12.25, "learning_rate": 0.0004955009197122721, "loss": 20.7146, "step": 14340 }, { "epoch": 0.02800161847794824, "grad_norm": 12.0625, "learning_rate": 0.0004954944176813628, "loss": 20.7778, "step": 14360 }, { "epoch": 0.028040617946580478, "grad_norm": 12.6875, "learning_rate": 0.0004954879156504534, "loss": 20.8435, "step": 14380 }, { "epoch": 0.028079617415212716, "grad_norm": 10.8125, "learning_rate": 0.0004954814136195441, "loss": 20.8421, "step": 14400 }, { "epoch": 0.028118616883844958, "grad_norm": 11.4375, "learning_rate": 0.0004954749115886347, "loss": 20.8006, "step": 14420 }, { "epoch": 0.028157616352477197, "grad_norm": 11.3125, "learning_rate": 0.0004954684095577254, "loss": 20.8072, "step": 14440 }, { "epoch": 0.02819661582110944, "grad_norm": 11.5, "learning_rate": 0.0004954619075268161, "loss": 20.769, "step": 14460 }, { "epoch": 0.028235615289741677, "grad_norm": 11.4375, "learning_rate": 0.0004954554054959067, "loss": 20.8338, "step": 14480 }, { "epoch": 0.028274614758373916, "grad_norm": 10.375, "learning_rate": 0.0004954489034649974, "loss": 20.8088, "step": 14500 }, { "epoch": 0.028313614227006158, "grad_norm": 11.25, "learning_rate": 0.000495442401434088, "loss": 20.7043, "step": 14520 }, { "epoch": 0.028352613695638396, "grad_norm": 12.0625, "learning_rate": 0.0004954358994031786, "loss": 20.722, "step": 14540 }, { "epoch": 0.02839161316427064, "grad_norm": 12.125, "learning_rate": 0.0004954293973722692, "loss": 20.8949, "step": 14560 }, { "epoch": 0.028430612632902877, "grad_norm": 13.1875, "learning_rate": 0.0004954228953413599, "loss": 20.771, "step": 14580 }, { "epoch": 0.028469612101535115, "grad_norm": 13.625, "learning_rate": 0.0004954163933104505, "loss": 20.7833, "step": 14600 }, { "epoch": 0.028508611570167357, "grad_norm": 11.125, "learning_rate": 0.0004954098912795412, "loss": 20.7086, "step": 14620 }, { "epoch": 0.028547611038799596, "grad_norm": 11.6875, "learning_rate": 0.0004954033892486319, "loss": 20.7229, "step": 14640 }, { "epoch": 0.028586610507431835, "grad_norm": 12.3125, "learning_rate": 0.0004953968872177225, "loss": 20.735, "step": 14660 }, { "epoch": 0.028625609976064077, "grad_norm": 12.4375, "learning_rate": 0.0004953903851868131, "loss": 20.6955, "step": 14680 }, { "epoch": 0.028664609444696315, "grad_norm": 10.375, "learning_rate": 0.0004953838831559037, "loss": 20.8495, "step": 14700 }, { "epoch": 0.028703608913328557, "grad_norm": 11.625, "learning_rate": 0.0004953773811249944, "loss": 20.7476, "step": 14720 }, { "epoch": 0.028742608381960796, "grad_norm": 12.125, "learning_rate": 0.000495370879094085, "loss": 20.7175, "step": 14740 }, { "epoch": 0.028781607850593034, "grad_norm": 11.125, "learning_rate": 0.0004953643770631757, "loss": 20.696, "step": 14760 }, { "epoch": 0.028820607319225276, "grad_norm": 10.875, "learning_rate": 0.0004953578750322664, "loss": 20.8024, "step": 14780 }, { "epoch": 0.028859606787857515, "grad_norm": 10.0625, "learning_rate": 0.000495351373001357, "loss": 20.6606, "step": 14800 }, { "epoch": 0.028898606256489757, "grad_norm": 11.0625, "learning_rate": 0.0004953448709704477, "loss": 20.681, "step": 14820 }, { "epoch": 0.028937605725121995, "grad_norm": 11.5, "learning_rate": 0.0004953383689395382, "loss": 20.7311, "step": 14840 }, { "epoch": 0.028976605193754234, "grad_norm": 11.6875, "learning_rate": 0.0004953318669086289, "loss": 20.6885, "step": 14860 }, { "epoch": 0.029015604662386476, "grad_norm": 12.0, "learning_rate": 0.0004953253648777195, "loss": 20.6885, "step": 14880 }, { "epoch": 0.029054604131018714, "grad_norm": 12.125, "learning_rate": 0.0004953188628468102, "loss": 20.7221, "step": 14900 }, { "epoch": 0.029093603599650953, "grad_norm": 12.6875, "learning_rate": 0.0004953123608159008, "loss": 20.7323, "step": 14920 }, { "epoch": 0.029132603068283195, "grad_norm": 10.625, "learning_rate": 0.0004953058587849915, "loss": 20.6526, "step": 14940 }, { "epoch": 0.029171602536915434, "grad_norm": 11.0625, "learning_rate": 0.0004952993567540822, "loss": 20.754, "step": 14960 }, { "epoch": 0.029210602005547676, "grad_norm": 16.875, "learning_rate": 0.0004952928547231728, "loss": 20.5829, "step": 14980 }, { "epoch": 0.029249601474179914, "grad_norm": 13.1875, "learning_rate": 0.0004952863526922635, "loss": 20.6348, "step": 15000 }, { "epoch": 0.029288600942812153, "grad_norm": 11.0, "learning_rate": 0.000495279850661354, "loss": 20.6052, "step": 15020 }, { "epoch": 0.029327600411444395, "grad_norm": 11.125, "learning_rate": 0.0004952733486304447, "loss": 20.6334, "step": 15040 }, { "epoch": 0.029366599880076633, "grad_norm": 11.3125, "learning_rate": 0.0004952668465995353, "loss": 20.6751, "step": 15060 }, { "epoch": 0.029405599348708875, "grad_norm": 14.75, "learning_rate": 0.000495260344568626, "loss": 20.5701, "step": 15080 }, { "epoch": 0.029444598817341114, "grad_norm": 12.0, "learning_rate": 0.0004952538425377166, "loss": 20.5664, "step": 15100 }, { "epoch": 0.029483598285973352, "grad_norm": 12.625, "learning_rate": 0.0004952473405068073, "loss": 20.5255, "step": 15120 }, { "epoch": 0.029522597754605594, "grad_norm": 11.0, "learning_rate": 0.000495240838475898, "loss": 20.5584, "step": 15140 }, { "epoch": 0.029561597223237833, "grad_norm": 13.0625, "learning_rate": 0.0004952343364449886, "loss": 20.6137, "step": 15160 }, { "epoch": 0.029600596691870075, "grad_norm": 11.125, "learning_rate": 0.0004952278344140793, "loss": 20.5632, "step": 15180 }, { "epoch": 0.029639596160502314, "grad_norm": 10.8125, "learning_rate": 0.0004952213323831699, "loss": 20.5356, "step": 15200 }, { "epoch": 0.029678595629134552, "grad_norm": 12.25, "learning_rate": 0.0004952148303522606, "loss": 20.5935, "step": 15220 }, { "epoch": 0.029717595097766794, "grad_norm": 9.875, "learning_rate": 0.0004952083283213512, "loss": 20.6133, "step": 15240 }, { "epoch": 0.029756594566399033, "grad_norm": 12.75, "learning_rate": 0.0004952018262904418, "loss": 20.6443, "step": 15260 }, { "epoch": 0.02979559403503127, "grad_norm": 13.4375, "learning_rate": 0.0004951953242595325, "loss": 20.6207, "step": 15280 }, { "epoch": 0.029834593503663513, "grad_norm": 10.0625, "learning_rate": 0.0004951888222286231, "loss": 20.5027, "step": 15300 }, { "epoch": 0.029873592972295752, "grad_norm": 11.8125, "learning_rate": 0.0004951823201977138, "loss": 20.534, "step": 15320 }, { "epoch": 0.029912592440927994, "grad_norm": 11.0625, "learning_rate": 0.0004951758181668044, "loss": 20.5885, "step": 15340 }, { "epoch": 0.029951591909560232, "grad_norm": 11.1875, "learning_rate": 0.0004951693161358951, "loss": 20.6046, "step": 15360 }, { "epoch": 0.02999059137819247, "grad_norm": 11.6875, "learning_rate": 0.0004951628141049857, "loss": 20.4537, "step": 15380 }, { "epoch": 0.030029590846824713, "grad_norm": 11.1875, "learning_rate": 0.0004951563120740764, "loss": 20.4366, "step": 15400 }, { "epoch": 0.03006859031545695, "grad_norm": 12.3125, "learning_rate": 0.000495149810043167, "loss": 20.5313, "step": 15420 }, { "epoch": 0.030107589784089193, "grad_norm": 12.5, "learning_rate": 0.0004951433080122577, "loss": 20.5595, "step": 15440 }, { "epoch": 0.030146589252721432, "grad_norm": 10.75, "learning_rate": 0.0004951368059813483, "loss": 20.4318, "step": 15460 }, { "epoch": 0.03018558872135367, "grad_norm": 12.8125, "learning_rate": 0.0004951303039504389, "loss": 20.5356, "step": 15480 }, { "epoch": 0.030224588189985913, "grad_norm": 11.75, "learning_rate": 0.0004951238019195296, "loss": 20.594, "step": 15500 }, { "epoch": 0.03026358765861815, "grad_norm": 11.125, "learning_rate": 0.0004951172998886202, "loss": 20.5289, "step": 15520 }, { "epoch": 0.03030258712725039, "grad_norm": 12.4375, "learning_rate": 0.0004951107978577109, "loss": 20.4482, "step": 15540 }, { "epoch": 0.03034158659588263, "grad_norm": 11.1875, "learning_rate": 0.0004951042958268015, "loss": 20.4001, "step": 15560 }, { "epoch": 0.03038058606451487, "grad_norm": 13.0625, "learning_rate": 0.0004950977937958922, "loss": 20.3405, "step": 15580 }, { "epoch": 0.030419585533147112, "grad_norm": 13.5625, "learning_rate": 0.0004950912917649827, "loss": 20.435, "step": 15600 }, { "epoch": 0.03045858500177935, "grad_norm": 11.25, "learning_rate": 0.0004950847897340734, "loss": 20.4817, "step": 15620 }, { "epoch": 0.03049758447041159, "grad_norm": 10.75, "learning_rate": 0.0004950782877031641, "loss": 20.4889, "step": 15640 }, { "epoch": 0.03053658393904383, "grad_norm": 12.25, "learning_rate": 0.0004950717856722547, "loss": 20.4209, "step": 15660 }, { "epoch": 0.03057558340767607, "grad_norm": 11.125, "learning_rate": 0.0004950652836413454, "loss": 20.401, "step": 15680 }, { "epoch": 0.030614582876308312, "grad_norm": 12.0, "learning_rate": 0.000495058781610436, "loss": 20.4579, "step": 15700 }, { "epoch": 0.03065358234494055, "grad_norm": 10.9375, "learning_rate": 0.0004950522795795267, "loss": 20.4935, "step": 15720 }, { "epoch": 0.03069258181357279, "grad_norm": 11.75, "learning_rate": 0.0004950457775486173, "loss": 20.4301, "step": 15740 }, { "epoch": 0.03073158128220503, "grad_norm": 11.6875, "learning_rate": 0.0004950392755177079, "loss": 20.3754, "step": 15760 }, { "epoch": 0.03077058075083727, "grad_norm": 10.625, "learning_rate": 0.0004950327734867985, "loss": 20.4608, "step": 15780 }, { "epoch": 0.030809580219469508, "grad_norm": 11.875, "learning_rate": 0.0004950262714558892, "loss": 20.5408, "step": 15800 }, { "epoch": 0.03084857968810175, "grad_norm": 11.125, "learning_rate": 0.0004950197694249799, "loss": 20.3624, "step": 15820 }, { "epoch": 0.03088757915673399, "grad_norm": 10.75, "learning_rate": 0.0004950132673940705, "loss": 20.3549, "step": 15840 }, { "epoch": 0.03092657862536623, "grad_norm": 13.6875, "learning_rate": 0.0004950067653631612, "loss": 20.3933, "step": 15860 }, { "epoch": 0.03096557809399847, "grad_norm": 12.6875, "learning_rate": 0.0004950002633322518, "loss": 20.3452, "step": 15880 }, { "epoch": 0.031004577562630708, "grad_norm": 11.0625, "learning_rate": 0.0004949937613013425, "loss": 20.4437, "step": 15900 }, { "epoch": 0.03104357703126295, "grad_norm": 9.6875, "learning_rate": 0.0004949872592704331, "loss": 20.3318, "step": 15920 }, { "epoch": 0.03108257649989519, "grad_norm": 10.375, "learning_rate": 0.0004949807572395238, "loss": 20.3704, "step": 15940 }, { "epoch": 0.03112157596852743, "grad_norm": 11.625, "learning_rate": 0.0004949742552086145, "loss": 20.3497, "step": 15960 }, { "epoch": 0.03116057543715967, "grad_norm": 11.9375, "learning_rate": 0.0004949677531777051, "loss": 20.4226, "step": 15980 }, { "epoch": 0.031199574905791908, "grad_norm": 14.125, "learning_rate": 0.0004949612511467957, "loss": 20.3333, "step": 16000 }, { "epoch": 0.03123857437442415, "grad_norm": 11.4375, "learning_rate": 0.0004949547491158863, "loss": 20.2811, "step": 16020 }, { "epoch": 0.03127757384305639, "grad_norm": 11.1875, "learning_rate": 0.000494948247084977, "loss": 20.367, "step": 16040 }, { "epoch": 0.03131657331168863, "grad_norm": 11.75, "learning_rate": 0.0004949417450540676, "loss": 20.3134, "step": 16060 }, { "epoch": 0.03135557278032087, "grad_norm": 10.25, "learning_rate": 0.0004949352430231583, "loss": 20.3922, "step": 16080 }, { "epoch": 0.03139457224895311, "grad_norm": 10.375, "learning_rate": 0.000494928740992249, "loss": 20.3097, "step": 16100 }, { "epoch": 0.031433571717585346, "grad_norm": 11.375, "learning_rate": 0.0004949222389613396, "loss": 20.3737, "step": 16120 }, { "epoch": 0.03147257118621759, "grad_norm": 10.1875, "learning_rate": 0.0004949157369304303, "loss": 20.3886, "step": 16140 }, { "epoch": 0.03151157065484983, "grad_norm": 11.1875, "learning_rate": 0.0004949092348995209, "loss": 20.2403, "step": 16160 }, { "epoch": 0.031550570123482065, "grad_norm": 11.625, "learning_rate": 0.0004949027328686116, "loss": 20.3402, "step": 16180 }, { "epoch": 0.03158956959211431, "grad_norm": 14.6875, "learning_rate": 0.0004948962308377022, "loss": 20.3529, "step": 16200 }, { "epoch": 0.03162856906074655, "grad_norm": 11.5, "learning_rate": 0.0004948897288067928, "loss": 20.2767, "step": 16220 }, { "epoch": 0.031667568529378784, "grad_norm": 9.6875, "learning_rate": 0.0004948832267758834, "loss": 20.2271, "step": 16240 }, { "epoch": 0.031706567998011026, "grad_norm": 11.25, "learning_rate": 0.0004948767247449741, "loss": 20.3672, "step": 16260 }, { "epoch": 0.03174556746664327, "grad_norm": 11.8125, "learning_rate": 0.0004948702227140648, "loss": 20.3693, "step": 16280 }, { "epoch": 0.03178456693527551, "grad_norm": 13.6875, "learning_rate": 0.0004948637206831554, "loss": 20.2767, "step": 16300 }, { "epoch": 0.031823566403907745, "grad_norm": 11.25, "learning_rate": 0.0004948572186522461, "loss": 20.2559, "step": 16320 }, { "epoch": 0.03186256587253999, "grad_norm": 12.1875, "learning_rate": 0.0004948507166213367, "loss": 20.2962, "step": 16340 }, { "epoch": 0.03190156534117223, "grad_norm": 11.4375, "learning_rate": 0.0004948442145904274, "loss": 20.2648, "step": 16360 }, { "epoch": 0.031940564809804464, "grad_norm": 10.9375, "learning_rate": 0.0004948377125595179, "loss": 20.2703, "step": 16380 }, { "epoch": 0.031979564278436706, "grad_norm": 12.1875, "learning_rate": 0.0004948312105286086, "loss": 20.3281, "step": 16400 }, { "epoch": 0.03201856374706895, "grad_norm": 12.1875, "learning_rate": 0.0004948247084976992, "loss": 20.2317, "step": 16420 }, { "epoch": 0.03205756321570118, "grad_norm": 11.375, "learning_rate": 0.0004948182064667899, "loss": 20.2883, "step": 16440 }, { "epoch": 0.032096562684333425, "grad_norm": 12.8125, "learning_rate": 0.0004948117044358806, "loss": 20.2294, "step": 16460 }, { "epoch": 0.03213556215296567, "grad_norm": 12.5625, "learning_rate": 0.0004948052024049712, "loss": 20.1226, "step": 16480 }, { "epoch": 0.0321745616215979, "grad_norm": 11.375, "learning_rate": 0.0004947987003740619, "loss": 20.2422, "step": 16500 }, { "epoch": 0.032213561090230144, "grad_norm": 11.375, "learning_rate": 0.0004947921983431524, "loss": 20.2142, "step": 16520 }, { "epoch": 0.032252560558862386, "grad_norm": 12.5, "learning_rate": 0.0004947856963122431, "loss": 20.2658, "step": 16540 }, { "epoch": 0.03229156002749463, "grad_norm": 9.8125, "learning_rate": 0.0004947791942813337, "loss": 20.1552, "step": 16560 }, { "epoch": 0.032330559496126864, "grad_norm": 10.0625, "learning_rate": 0.0004947726922504244, "loss": 20.1369, "step": 16580 }, { "epoch": 0.032369558964759106, "grad_norm": 11.375, "learning_rate": 0.000494766190219515, "loss": 20.1965, "step": 16600 }, { "epoch": 0.03240855843339135, "grad_norm": 10.875, "learning_rate": 0.0004947596881886057, "loss": 20.2377, "step": 16620 }, { "epoch": 0.03244755790202358, "grad_norm": 10.375, "learning_rate": 0.0004947531861576964, "loss": 20.2204, "step": 16640 }, { "epoch": 0.032486557370655825, "grad_norm": 11.375, "learning_rate": 0.000494746684126787, "loss": 20.1081, "step": 16660 }, { "epoch": 0.03252555683928807, "grad_norm": 11.1875, "learning_rate": 0.0004947401820958777, "loss": 20.3024, "step": 16680 }, { "epoch": 0.0325645563079203, "grad_norm": 11.3125, "learning_rate": 0.0004947336800649683, "loss": 20.1351, "step": 16700 }, { "epoch": 0.032603555776552544, "grad_norm": 11.1875, "learning_rate": 0.0004947271780340589, "loss": 20.1989, "step": 16720 }, { "epoch": 0.032642555245184786, "grad_norm": 9.6875, "learning_rate": 0.0004947206760031495, "loss": 20.1502, "step": 16740 }, { "epoch": 0.03268155471381702, "grad_norm": 11.125, "learning_rate": 0.0004947141739722402, "loss": 20.0948, "step": 16760 }, { "epoch": 0.03272055418244926, "grad_norm": 11.4375, "learning_rate": 0.0004947076719413309, "loss": 20.1084, "step": 16780 }, { "epoch": 0.032759553651081505, "grad_norm": 10.3125, "learning_rate": 0.0004947011699104215, "loss": 20.1207, "step": 16800 }, { "epoch": 0.03279855311971375, "grad_norm": 11.1875, "learning_rate": 0.0004946946678795122, "loss": 20.0984, "step": 16820 }, { "epoch": 0.03283755258834598, "grad_norm": 10.8125, "learning_rate": 0.0004946881658486028, "loss": 20.1778, "step": 16840 }, { "epoch": 0.032876552056978224, "grad_norm": 10.8125, "learning_rate": 0.0004946816638176935, "loss": 20.2415, "step": 16860 }, { "epoch": 0.032915551525610466, "grad_norm": 10.25, "learning_rate": 0.0004946751617867841, "loss": 20.1135, "step": 16880 }, { "epoch": 0.0329545509942427, "grad_norm": 10.875, "learning_rate": 0.0004946686597558748, "loss": 20.1361, "step": 16900 }, { "epoch": 0.03299355046287494, "grad_norm": 11.4375, "learning_rate": 0.0004946621577249655, "loss": 20.0907, "step": 16920 }, { "epoch": 0.033032549931507185, "grad_norm": 14.5, "learning_rate": 0.0004946556556940561, "loss": 20.1267, "step": 16940 }, { "epoch": 0.03307154940013942, "grad_norm": 12.875, "learning_rate": 0.0004946491536631467, "loss": 20.0818, "step": 16960 }, { "epoch": 0.03311054886877166, "grad_norm": 10.875, "learning_rate": 0.0004946426516322373, "loss": 20.1085, "step": 16980 }, { "epoch": 0.033149548337403904, "grad_norm": 10.5625, "learning_rate": 0.000494636149601328, "loss": 20.0712, "step": 17000 }, { "epoch": 0.03318854780603614, "grad_norm": 11.0, "learning_rate": 0.0004946296475704186, "loss": 20.1068, "step": 17020 }, { "epoch": 0.03322754727466838, "grad_norm": 10.1875, "learning_rate": 0.0004946231455395093, "loss": 20.0552, "step": 17040 }, { "epoch": 0.03326654674330062, "grad_norm": 12.5, "learning_rate": 0.0004946166435085999, "loss": 20.0382, "step": 17060 }, { "epoch": 0.033305546211932865, "grad_norm": 11.125, "learning_rate": 0.0004946101414776906, "loss": 20.1285, "step": 17080 }, { "epoch": 0.0333445456805651, "grad_norm": 10.375, "learning_rate": 0.0004946036394467813, "loss": 20.0373, "step": 17100 }, { "epoch": 0.03338354514919734, "grad_norm": 9.9375, "learning_rate": 0.0004945971374158719, "loss": 20.1946, "step": 17120 }, { "epoch": 0.033422544617829585, "grad_norm": 10.6875, "learning_rate": 0.0004945906353849625, "loss": 20.1412, "step": 17140 }, { "epoch": 0.03346154408646182, "grad_norm": 9.5, "learning_rate": 0.0004945841333540531, "loss": 20.078, "step": 17160 }, { "epoch": 0.03350054355509406, "grad_norm": 10.5, "learning_rate": 0.0004945776313231438, "loss": 20.0913, "step": 17180 }, { "epoch": 0.033539543023726304, "grad_norm": 11.75, "learning_rate": 0.0004945711292922344, "loss": 20.1428, "step": 17200 }, { "epoch": 0.03357854249235854, "grad_norm": 10.8125, "learning_rate": 0.0004945646272613251, "loss": 20.0407, "step": 17220 }, { "epoch": 0.03361754196099078, "grad_norm": 10.5625, "learning_rate": 0.0004945581252304157, "loss": 20.1396, "step": 17240 }, { "epoch": 0.03365654142962302, "grad_norm": 10.0, "learning_rate": 0.0004945516231995064, "loss": 20.0334, "step": 17260 }, { "epoch": 0.03369554089825526, "grad_norm": 9.875, "learning_rate": 0.0004945451211685971, "loss": 19.9909, "step": 17280 }, { "epoch": 0.0337345403668875, "grad_norm": 12.4375, "learning_rate": 0.0004945386191376876, "loss": 20.0374, "step": 17300 }, { "epoch": 0.03377353983551974, "grad_norm": 9.75, "learning_rate": 0.0004945321171067783, "loss": 20.0703, "step": 17320 }, { "epoch": 0.033812539304151984, "grad_norm": 11.375, "learning_rate": 0.0004945256150758689, "loss": 19.9489, "step": 17340 }, { "epoch": 0.03385153877278422, "grad_norm": 11.3125, "learning_rate": 0.0004945191130449596, "loss": 19.9904, "step": 17360 }, { "epoch": 0.03389053824141646, "grad_norm": 10.3125, "learning_rate": 0.0004945126110140502, "loss": 19.9895, "step": 17380 }, { "epoch": 0.0339295377100487, "grad_norm": 11.0, "learning_rate": 0.0004945061089831409, "loss": 20.0525, "step": 17400 }, { "epoch": 0.03396853717868094, "grad_norm": 10.0, "learning_rate": 0.0004944996069522316, "loss": 20.0451, "step": 17420 }, { "epoch": 0.03400753664731318, "grad_norm": 10.625, "learning_rate": 0.0004944931049213222, "loss": 20.0506, "step": 17440 }, { "epoch": 0.03404653611594542, "grad_norm": 10.4375, "learning_rate": 0.0004944866028904128, "loss": 19.9625, "step": 17460 }, { "epoch": 0.03408553558457766, "grad_norm": 11.625, "learning_rate": 0.0004944801008595034, "loss": 19.995, "step": 17480 }, { "epoch": 0.0341245350532099, "grad_norm": 11.0625, "learning_rate": 0.0004944735988285941, "loss": 20.1062, "step": 17500 }, { "epoch": 0.03416353452184214, "grad_norm": 10.4375, "learning_rate": 0.0004944670967976847, "loss": 19.9454, "step": 17520 }, { "epoch": 0.034202533990474376, "grad_norm": 9.3125, "learning_rate": 0.0004944605947667754, "loss": 19.8752, "step": 17540 }, { "epoch": 0.03424153345910662, "grad_norm": 10.375, "learning_rate": 0.000494454092735866, "loss": 19.9649, "step": 17560 }, { "epoch": 0.03428053292773886, "grad_norm": 10.125, "learning_rate": 0.0004944475907049567, "loss": 19.9261, "step": 17580 }, { "epoch": 0.0343195323963711, "grad_norm": 9.875, "learning_rate": 0.0004944410886740474, "loss": 19.909, "step": 17600 }, { "epoch": 0.03435853186500334, "grad_norm": 10.75, "learning_rate": 0.000494434586643138, "loss": 19.9778, "step": 17620 }, { "epoch": 0.03439753133363558, "grad_norm": 11.5, "learning_rate": 0.0004944280846122287, "loss": 19.9709, "step": 17640 }, { "epoch": 0.03443653080226782, "grad_norm": 11.125, "learning_rate": 0.0004944215825813193, "loss": 19.9898, "step": 17660 }, { "epoch": 0.03447553027090006, "grad_norm": 10.5625, "learning_rate": 0.00049441508055041, "loss": 19.9979, "step": 17680 }, { "epoch": 0.0345145297395323, "grad_norm": 9.5625, "learning_rate": 0.0004944085785195005, "loss": 19.9206, "step": 17700 }, { "epoch": 0.03455352920816454, "grad_norm": 11.0, "learning_rate": 0.0004944020764885912, "loss": 19.9701, "step": 17720 }, { "epoch": 0.034592528676796776, "grad_norm": 10.75, "learning_rate": 0.0004943955744576818, "loss": 19.9937, "step": 17740 }, { "epoch": 0.03463152814542902, "grad_norm": 12.5625, "learning_rate": 0.0004943890724267725, "loss": 20.0349, "step": 17760 }, { "epoch": 0.03467052761406126, "grad_norm": 11.3125, "learning_rate": 0.0004943825703958632, "loss": 19.8582, "step": 17780 }, { "epoch": 0.034709527082693495, "grad_norm": 12.125, "learning_rate": 0.0004943760683649538, "loss": 19.9185, "step": 17800 }, { "epoch": 0.03474852655132574, "grad_norm": 10.625, "learning_rate": 0.0004943695663340445, "loss": 19.9073, "step": 17820 }, { "epoch": 0.03478752601995798, "grad_norm": 9.8125, "learning_rate": 0.0004943630643031351, "loss": 19.8189, "step": 17840 }, { "epoch": 0.03482652548859022, "grad_norm": 12.375, "learning_rate": 0.0004943565622722258, "loss": 20.0152, "step": 17860 }, { "epoch": 0.034865524957222456, "grad_norm": 10.0, "learning_rate": 0.0004943500602413164, "loss": 19.9768, "step": 17880 }, { "epoch": 0.0349045244258547, "grad_norm": 10.5, "learning_rate": 0.0004943435582104071, "loss": 19.9124, "step": 17900 }, { "epoch": 0.03494352389448694, "grad_norm": 9.5, "learning_rate": 0.0004943370561794977, "loss": 19.8925, "step": 17920 }, { "epoch": 0.034982523363119175, "grad_norm": 8.75, "learning_rate": 0.0004943305541485883, "loss": 19.9456, "step": 17940 }, { "epoch": 0.03502152283175142, "grad_norm": 10.625, "learning_rate": 0.000494324052117679, "loss": 19.8603, "step": 17960 }, { "epoch": 0.03506052230038366, "grad_norm": 10.25, "learning_rate": 0.0004943175500867696, "loss": 19.909, "step": 17980 }, { "epoch": 0.035099521769015894, "grad_norm": 9.5625, "learning_rate": 0.0004943110480558603, "loss": 19.8528, "step": 18000 }, { "epoch": 0.035138521237648136, "grad_norm": 12.25, "learning_rate": 0.0004943045460249509, "loss": 19.9118, "step": 18020 }, { "epoch": 0.03517752070628038, "grad_norm": 12.0625, "learning_rate": 0.0004942980439940416, "loss": 19.7998, "step": 18040 }, { "epoch": 0.03521652017491262, "grad_norm": 10.5625, "learning_rate": 0.0004942915419631321, "loss": 19.8966, "step": 18060 }, { "epoch": 0.035255519643544855, "grad_norm": 10.375, "learning_rate": 0.0004942850399322228, "loss": 19.8631, "step": 18080 }, { "epoch": 0.0352945191121771, "grad_norm": 10.3125, "learning_rate": 0.0004942785379013135, "loss": 19.8226, "step": 18100 }, { "epoch": 0.03533351858080934, "grad_norm": 9.9375, "learning_rate": 0.0004942720358704041, "loss": 19.8466, "step": 18120 }, { "epoch": 0.035372518049441574, "grad_norm": 10.875, "learning_rate": 0.0004942655338394948, "loss": 19.8142, "step": 18140 }, { "epoch": 0.035411517518073816, "grad_norm": 10.0, "learning_rate": 0.0004942590318085854, "loss": 19.8257, "step": 18160 }, { "epoch": 0.03545051698670606, "grad_norm": 10.125, "learning_rate": 0.0004942525297776761, "loss": 19.7939, "step": 18180 }, { "epoch": 0.035489516455338294, "grad_norm": 9.5, "learning_rate": 0.0004942460277467667, "loss": 19.8764, "step": 18200 }, { "epoch": 0.035528515923970536, "grad_norm": 10.375, "learning_rate": 0.0004942395257158573, "loss": 19.8394, "step": 18220 }, { "epoch": 0.03556751539260278, "grad_norm": 10.375, "learning_rate": 0.000494233023684948, "loss": 19.7666, "step": 18240 }, { "epoch": 0.03560651486123501, "grad_norm": 10.625, "learning_rate": 0.0004942265216540386, "loss": 19.8165, "step": 18260 }, { "epoch": 0.035645514329867255, "grad_norm": 9.8125, "learning_rate": 0.0004942200196231293, "loss": 19.9201, "step": 18280 }, { "epoch": 0.0356845137984995, "grad_norm": 11.0625, "learning_rate": 0.0004942135175922199, "loss": 19.8705, "step": 18300 }, { "epoch": 0.03572351326713174, "grad_norm": 10.9375, "learning_rate": 0.0004942070155613106, "loss": 19.7906, "step": 18320 }, { "epoch": 0.035762512735763974, "grad_norm": 10.375, "learning_rate": 0.0004942005135304012, "loss": 19.7983, "step": 18340 }, { "epoch": 0.035801512204396216, "grad_norm": 10.5, "learning_rate": 0.0004941940114994919, "loss": 19.7921, "step": 18360 }, { "epoch": 0.03584051167302846, "grad_norm": 11.1875, "learning_rate": 0.0004941875094685825, "loss": 19.6906, "step": 18380 }, { "epoch": 0.03587951114166069, "grad_norm": 10.375, "learning_rate": 0.0004941810074376732, "loss": 19.8046, "step": 18400 }, { "epoch": 0.035918510610292935, "grad_norm": 10.5625, "learning_rate": 0.0004941745054067638, "loss": 19.8274, "step": 18420 }, { "epoch": 0.03595751007892518, "grad_norm": 11.25, "learning_rate": 0.0004941680033758544, "loss": 19.7977, "step": 18440 }, { "epoch": 0.03599650954755741, "grad_norm": 11.1875, "learning_rate": 0.0004941615013449451, "loss": 19.7892, "step": 18460 }, { "epoch": 0.036035509016189654, "grad_norm": 10.625, "learning_rate": 0.0004941549993140357, "loss": 19.6819, "step": 18480 }, { "epoch": 0.036074508484821896, "grad_norm": 12.3125, "learning_rate": 0.0004941484972831264, "loss": 19.7501, "step": 18500 }, { "epoch": 0.03611350795345413, "grad_norm": 10.0625, "learning_rate": 0.000494141995252217, "loss": 19.7792, "step": 18520 }, { "epoch": 0.03615250742208637, "grad_norm": 9.3125, "learning_rate": 0.0004941354932213077, "loss": 19.7753, "step": 18540 }, { "epoch": 0.036191506890718615, "grad_norm": 10.4375, "learning_rate": 0.0004941289911903983, "loss": 19.8244, "step": 18560 }, { "epoch": 0.03623050635935086, "grad_norm": 10.75, "learning_rate": 0.000494122489159489, "loss": 19.7036, "step": 18580 }, { "epoch": 0.03626950582798309, "grad_norm": 8.5625, "learning_rate": 0.0004941159871285797, "loss": 19.7776, "step": 18600 }, { "epoch": 0.036308505296615334, "grad_norm": 10.8125, "learning_rate": 0.0004941094850976703, "loss": 19.8757, "step": 18620 }, { "epoch": 0.036347504765247576, "grad_norm": 9.625, "learning_rate": 0.000494102983066761, "loss": 19.8473, "step": 18640 }, { "epoch": 0.03638650423387981, "grad_norm": 14.3125, "learning_rate": 0.0004940964810358516, "loss": 19.6959, "step": 18660 }, { "epoch": 0.03642550370251205, "grad_norm": 12.25, "learning_rate": 0.0004940899790049422, "loss": 19.7052, "step": 18680 }, { "epoch": 0.036464503171144295, "grad_norm": 10.625, "learning_rate": 0.0004940834769740328, "loss": 19.7221, "step": 18700 }, { "epoch": 0.03650350263977653, "grad_norm": 9.125, "learning_rate": 0.0004940769749431235, "loss": 19.636, "step": 18720 }, { "epoch": 0.03654250210840877, "grad_norm": 9.8125, "learning_rate": 0.0004940704729122142, "loss": 19.7428, "step": 18740 }, { "epoch": 0.036581501577041015, "grad_norm": 10.5, "learning_rate": 0.0004940639708813048, "loss": 19.7076, "step": 18760 }, { "epoch": 0.03662050104567325, "grad_norm": 10.125, "learning_rate": 0.0004940574688503955, "loss": 19.6721, "step": 18780 }, { "epoch": 0.03665950051430549, "grad_norm": 12.4375, "learning_rate": 0.0004940509668194861, "loss": 19.7135, "step": 18800 }, { "epoch": 0.036698499982937734, "grad_norm": 9.5, "learning_rate": 0.0004940444647885768, "loss": 19.591, "step": 18820 }, { "epoch": 0.036737499451569976, "grad_norm": 9.3125, "learning_rate": 0.0004940379627576673, "loss": 19.595, "step": 18840 }, { "epoch": 0.03677649892020221, "grad_norm": 10.375, "learning_rate": 0.000494031460726758, "loss": 19.7223, "step": 18860 }, { "epoch": 0.03681549838883445, "grad_norm": 12.4375, "learning_rate": 0.0004940249586958486, "loss": 19.6684, "step": 18880 }, { "epoch": 0.036854497857466695, "grad_norm": 10.0625, "learning_rate": 0.0004940184566649393, "loss": 19.6797, "step": 18900 }, { "epoch": 0.03689349732609893, "grad_norm": 10.0, "learning_rate": 0.00049401195463403, "loss": 19.712, "step": 18920 }, { "epoch": 0.03693249679473117, "grad_norm": 10.25, "learning_rate": 0.0004940054526031206, "loss": 19.5788, "step": 18940 }, { "epoch": 0.036971496263363414, "grad_norm": 11.8125, "learning_rate": 0.0004939989505722113, "loss": 19.6803, "step": 18960 }, { "epoch": 0.03701049573199565, "grad_norm": 10.5, "learning_rate": 0.0004939924485413019, "loss": 19.705, "step": 18980 }, { "epoch": 0.03704949520062789, "grad_norm": 9.25, "learning_rate": 0.0004939859465103925, "loss": 19.6594, "step": 19000 }, { "epoch": 0.03708849466926013, "grad_norm": 11.0625, "learning_rate": 0.0004939794444794831, "loss": 19.7557, "step": 19020 }, { "epoch": 0.03712749413789237, "grad_norm": 8.5, "learning_rate": 0.0004939729424485738, "loss": 19.6508, "step": 19040 }, { "epoch": 0.03716649360652461, "grad_norm": 12.375, "learning_rate": 0.0004939664404176644, "loss": 19.6447, "step": 19060 }, { "epoch": 0.03720549307515685, "grad_norm": 9.5625, "learning_rate": 0.0004939599383867551, "loss": 19.5852, "step": 19080 }, { "epoch": 0.037244492543789094, "grad_norm": 9.5625, "learning_rate": 0.0004939534363558458, "loss": 19.6278, "step": 19100 }, { "epoch": 0.03728349201242133, "grad_norm": 11.25, "learning_rate": 0.0004939469343249364, "loss": 19.7012, "step": 19120 }, { "epoch": 0.03732249148105357, "grad_norm": 10.9375, "learning_rate": 0.0004939404322940271, "loss": 19.6584, "step": 19140 }, { "epoch": 0.03736149094968581, "grad_norm": 10.375, "learning_rate": 0.0004939339302631176, "loss": 19.6233, "step": 19160 }, { "epoch": 0.03740049041831805, "grad_norm": 11.625, "learning_rate": 0.0004939274282322083, "loss": 19.5714, "step": 19180 }, { "epoch": 0.03743948988695029, "grad_norm": 10.0, "learning_rate": 0.0004939209262012989, "loss": 19.5819, "step": 19200 }, { "epoch": 0.03747848935558253, "grad_norm": 10.375, "learning_rate": 0.0004939144241703896, "loss": 19.605, "step": 19220 }, { "epoch": 0.03751748882421477, "grad_norm": 10.6875, "learning_rate": 0.0004939079221394803, "loss": 19.6348, "step": 19240 }, { "epoch": 0.03755648829284701, "grad_norm": 9.3125, "learning_rate": 0.0004939014201085709, "loss": 19.6152, "step": 19260 }, { "epoch": 0.03759548776147925, "grad_norm": 10.1875, "learning_rate": 0.0004938949180776616, "loss": 19.6556, "step": 19280 }, { "epoch": 0.03763448723011149, "grad_norm": 9.125, "learning_rate": 0.0004938884160467522, "loss": 19.603, "step": 19300 }, { "epoch": 0.03767348669874373, "grad_norm": 10.0625, "learning_rate": 0.0004938819140158429, "loss": 19.6083, "step": 19320 }, { "epoch": 0.03771248616737597, "grad_norm": 11.8125, "learning_rate": 0.0004938754119849335, "loss": 19.6945, "step": 19340 }, { "epoch": 0.03775148563600821, "grad_norm": 10.125, "learning_rate": 0.0004938689099540242, "loss": 19.5438, "step": 19360 }, { "epoch": 0.03779048510464045, "grad_norm": 9.5625, "learning_rate": 0.0004938624079231149, "loss": 19.6158, "step": 19380 }, { "epoch": 0.03782948457327269, "grad_norm": 10.1875, "learning_rate": 0.0004938559058922054, "loss": 19.6023, "step": 19400 }, { "epoch": 0.03786848404190493, "grad_norm": 9.75, "learning_rate": 0.0004938494038612961, "loss": 19.6143, "step": 19420 }, { "epoch": 0.03790748351053717, "grad_norm": 9.875, "learning_rate": 0.0004938429018303867, "loss": 19.5367, "step": 19440 }, { "epoch": 0.03794648297916941, "grad_norm": 9.375, "learning_rate": 0.0004938363997994774, "loss": 19.5761, "step": 19460 }, { "epoch": 0.03798548244780165, "grad_norm": 10.1875, "learning_rate": 0.000493829897768568, "loss": 19.5939, "step": 19480 }, { "epoch": 0.038024481916433886, "grad_norm": 10.0, "learning_rate": 0.0004938233957376587, "loss": 19.5595, "step": 19500 }, { "epoch": 0.03806348138506613, "grad_norm": 12.75, "learning_rate": 0.0004938168937067493, "loss": 19.5722, "step": 19520 }, { "epoch": 0.03810248085369837, "grad_norm": 10.375, "learning_rate": 0.00049381039167584, "loss": 19.5889, "step": 19540 }, { "epoch": 0.038141480322330605, "grad_norm": 10.5, "learning_rate": 0.0004938038896449307, "loss": 19.5379, "step": 19560 }, { "epoch": 0.03818047979096285, "grad_norm": 9.875, "learning_rate": 0.0004937973876140213, "loss": 19.5243, "step": 19580 }, { "epoch": 0.03821947925959509, "grad_norm": 9.1875, "learning_rate": 0.0004937908855831119, "loss": 19.6067, "step": 19600 }, { "epoch": 0.03825847872822733, "grad_norm": 10.25, "learning_rate": 0.0004937843835522025, "loss": 19.6051, "step": 19620 }, { "epoch": 0.038297478196859566, "grad_norm": 10.5, "learning_rate": 0.0004937778815212932, "loss": 19.5555, "step": 19640 }, { "epoch": 0.03833647766549181, "grad_norm": 10.75, "learning_rate": 0.0004937713794903838, "loss": 19.601, "step": 19660 }, { "epoch": 0.03837547713412405, "grad_norm": 9.5, "learning_rate": 0.0004937648774594745, "loss": 19.5818, "step": 19680 }, { "epoch": 0.038414476602756285, "grad_norm": 10.625, "learning_rate": 0.0004937583754285651, "loss": 19.5565, "step": 19700 }, { "epoch": 0.03845347607138853, "grad_norm": 9.9375, "learning_rate": 0.0004937518733976558, "loss": 19.5719, "step": 19720 }, { "epoch": 0.03849247554002077, "grad_norm": 10.3125, "learning_rate": 0.0004937453713667465, "loss": 19.5583, "step": 19740 }, { "epoch": 0.038531475008653004, "grad_norm": 10.4375, "learning_rate": 0.000493738869335837, "loss": 19.5279, "step": 19760 }, { "epoch": 0.038570474477285246, "grad_norm": 9.4375, "learning_rate": 0.0004937323673049277, "loss": 19.4711, "step": 19780 }, { "epoch": 0.03860947394591749, "grad_norm": 11.9375, "learning_rate": 0.0004937258652740183, "loss": 19.5244, "step": 19800 }, { "epoch": 0.038648473414549724, "grad_norm": 9.8125, "learning_rate": 0.000493719363243109, "loss": 19.5652, "step": 19820 }, { "epoch": 0.038687472883181966, "grad_norm": 11.4375, "learning_rate": 0.0004937128612121996, "loss": 19.5002, "step": 19840 }, { "epoch": 0.03872647235181421, "grad_norm": 10.1875, "learning_rate": 0.0004937063591812903, "loss": 19.4699, "step": 19860 }, { "epoch": 0.03876547182044645, "grad_norm": 10.8125, "learning_rate": 0.000493699857150381, "loss": 19.486, "step": 19880 }, { "epoch": 0.038804471289078685, "grad_norm": 9.0625, "learning_rate": 0.0004936933551194716, "loss": 19.5072, "step": 19900 }, { "epoch": 0.03884347075771093, "grad_norm": 9.625, "learning_rate": 0.0004936868530885622, "loss": 19.434, "step": 19920 }, { "epoch": 0.03888247022634317, "grad_norm": 9.5, "learning_rate": 0.0004936803510576528, "loss": 19.4787, "step": 19940 }, { "epoch": 0.038921469694975404, "grad_norm": 10.0625, "learning_rate": 0.0004936738490267435, "loss": 19.4656, "step": 19960 }, { "epoch": 0.038960469163607646, "grad_norm": 9.125, "learning_rate": 0.0004936673469958341, "loss": 19.5858, "step": 19980 }, { "epoch": 0.03899946863223989, "grad_norm": 8.875, "learning_rate": 0.0004936608449649248, "loss": 19.5272, "step": 20000 }, { "epoch": 0.03903846810087212, "grad_norm": 11.0, "learning_rate": 0.0004936543429340154, "loss": 19.4447, "step": 20020 }, { "epoch": 0.039077467569504365, "grad_norm": 10.0, "learning_rate": 0.0004936478409031061, "loss": 19.5607, "step": 20040 }, { "epoch": 0.03911646703813661, "grad_norm": 9.875, "learning_rate": 0.0004936413388721968, "loss": 19.4836, "step": 20060 }, { "epoch": 0.03915546650676884, "grad_norm": 10.75, "learning_rate": 0.0004936348368412874, "loss": 19.5538, "step": 20080 }, { "epoch": 0.039194465975401084, "grad_norm": 11.25, "learning_rate": 0.0004936283348103781, "loss": 19.3946, "step": 20100 }, { "epoch": 0.039233465444033326, "grad_norm": 9.8125, "learning_rate": 0.0004936218327794687, "loss": 19.4751, "step": 20120 }, { "epoch": 0.03927246491266557, "grad_norm": 11.0, "learning_rate": 0.0004936153307485593, "loss": 19.4678, "step": 20140 }, { "epoch": 0.0393114643812978, "grad_norm": 11.1875, "learning_rate": 0.0004936088287176499, "loss": 19.3942, "step": 20160 }, { "epoch": 0.039350463849930045, "grad_norm": 11.25, "learning_rate": 0.0004936023266867406, "loss": 19.4169, "step": 20180 }, { "epoch": 0.03938946331856229, "grad_norm": 10.1875, "learning_rate": 0.0004935958246558312, "loss": 19.4035, "step": 20200 }, { "epoch": 0.03942846278719452, "grad_norm": 10.125, "learning_rate": 0.0004935893226249219, "loss": 19.4074, "step": 20220 }, { "epoch": 0.039467462255826764, "grad_norm": 9.5625, "learning_rate": 0.0004935828205940126, "loss": 19.4228, "step": 20240 }, { "epoch": 0.039506461724459006, "grad_norm": 9.625, "learning_rate": 0.0004935763185631032, "loss": 19.4486, "step": 20260 }, { "epoch": 0.03954546119309124, "grad_norm": 11.0, "learning_rate": 0.0004935698165321939, "loss": 19.4736, "step": 20280 }, { "epoch": 0.03958446066172348, "grad_norm": 9.75, "learning_rate": 0.0004935633145012845, "loss": 19.5127, "step": 20300 }, { "epoch": 0.039623460130355725, "grad_norm": 10.625, "learning_rate": 0.0004935568124703752, "loss": 19.458, "step": 20320 }, { "epoch": 0.03966245959898796, "grad_norm": 9.25, "learning_rate": 0.0004935503104394658, "loss": 19.399, "step": 20340 }, { "epoch": 0.0397014590676202, "grad_norm": 10.4375, "learning_rate": 0.0004935438084085565, "loss": 19.3554, "step": 20360 }, { "epoch": 0.039740458536252445, "grad_norm": 10.3125, "learning_rate": 0.000493537306377647, "loss": 19.4926, "step": 20380 }, { "epoch": 0.03977945800488469, "grad_norm": 9.625, "learning_rate": 0.0004935308043467377, "loss": 19.3903, "step": 20400 }, { "epoch": 0.03981845747351692, "grad_norm": 10.0625, "learning_rate": 0.0004935243023158284, "loss": 19.4067, "step": 20420 }, { "epoch": 0.039857456942149164, "grad_norm": 10.0625, "learning_rate": 0.000493517800284919, "loss": 19.5267, "step": 20440 }, { "epoch": 0.039896456410781406, "grad_norm": 10.0, "learning_rate": 0.0004935112982540097, "loss": 19.3879, "step": 20460 }, { "epoch": 0.03993545587941364, "grad_norm": 9.8125, "learning_rate": 0.0004935047962231003, "loss": 19.3781, "step": 20480 }, { "epoch": 0.03997445534804588, "grad_norm": 9.9375, "learning_rate": 0.000493498294192191, "loss": 19.4575, "step": 20500 }, { "epoch": 0.040013454816678125, "grad_norm": 10.6875, "learning_rate": 0.0004934917921612816, "loss": 19.393, "step": 20520 }, { "epoch": 0.04005245428531036, "grad_norm": 9.875, "learning_rate": 0.0004934852901303722, "loss": 19.3753, "step": 20540 }, { "epoch": 0.0400914537539426, "grad_norm": 10.375, "learning_rate": 0.0004934787880994629, "loss": 19.3748, "step": 20560 }, { "epoch": 0.040130453222574844, "grad_norm": 9.0625, "learning_rate": 0.0004934722860685535, "loss": 19.3817, "step": 20580 }, { "epoch": 0.04016945269120708, "grad_norm": 11.3125, "learning_rate": 0.0004934657840376442, "loss": 19.4017, "step": 20600 }, { "epoch": 0.04020845215983932, "grad_norm": 9.8125, "learning_rate": 0.0004934592820067348, "loss": 19.4036, "step": 20620 }, { "epoch": 0.04024745162847156, "grad_norm": 10.125, "learning_rate": 0.0004934527799758255, "loss": 19.3128, "step": 20640 }, { "epoch": 0.040286451097103805, "grad_norm": 10.8125, "learning_rate": 0.0004934462779449161, "loss": 19.3186, "step": 20660 }, { "epoch": 0.04032545056573604, "grad_norm": 11.25, "learning_rate": 0.0004934397759140067, "loss": 19.2757, "step": 20680 }, { "epoch": 0.04036445003436828, "grad_norm": 11.0, "learning_rate": 0.0004934332738830973, "loss": 19.4058, "step": 20700 }, { "epoch": 0.040403449503000524, "grad_norm": 9.6875, "learning_rate": 0.000493426771852188, "loss": 19.2796, "step": 20720 }, { "epoch": 0.04044244897163276, "grad_norm": 10.0, "learning_rate": 0.0004934202698212787, "loss": 19.3668, "step": 20740 }, { "epoch": 0.040481448440265, "grad_norm": 10.5, "learning_rate": 0.0004934137677903693, "loss": 19.3881, "step": 20760 }, { "epoch": 0.04052044790889724, "grad_norm": 9.0, "learning_rate": 0.00049340726575946, "loss": 19.3452, "step": 20780 }, { "epoch": 0.04055944737752948, "grad_norm": 9.375, "learning_rate": 0.0004934007637285506, "loss": 19.4051, "step": 20800 }, { "epoch": 0.04059844684616172, "grad_norm": 10.0625, "learning_rate": 0.0004933942616976413, "loss": 19.335, "step": 20820 }, { "epoch": 0.04063744631479396, "grad_norm": 10.1875, "learning_rate": 0.0004933877596667319, "loss": 19.2674, "step": 20840 }, { "epoch": 0.0406764457834262, "grad_norm": 10.625, "learning_rate": 0.0004933812576358225, "loss": 19.3727, "step": 20860 }, { "epoch": 0.04071544525205844, "grad_norm": 10.25, "learning_rate": 0.0004933747556049131, "loss": 19.2964, "step": 20880 }, { "epoch": 0.04075444472069068, "grad_norm": 9.5, "learning_rate": 0.0004933682535740038, "loss": 19.3364, "step": 20900 }, { "epoch": 0.040793444189322924, "grad_norm": 10.1875, "learning_rate": 0.0004933617515430945, "loss": 19.2485, "step": 20920 }, { "epoch": 0.04083244365795516, "grad_norm": 9.875, "learning_rate": 0.0004933552495121851, "loss": 19.4376, "step": 20940 }, { "epoch": 0.0408714431265874, "grad_norm": 10.125, "learning_rate": 0.0004933487474812758, "loss": 19.3453, "step": 20960 }, { "epoch": 0.04091044259521964, "grad_norm": 9.75, "learning_rate": 0.0004933422454503664, "loss": 19.3064, "step": 20980 }, { "epoch": 0.04094944206385188, "grad_norm": 10.4375, "learning_rate": 0.0004933357434194571, "loss": 19.2934, "step": 21000 }, { "epoch": 0.04098844153248412, "grad_norm": 9.6875, "learning_rate": 0.0004933292413885477, "loss": 19.3075, "step": 21020 }, { "epoch": 0.04102744100111636, "grad_norm": 8.875, "learning_rate": 0.0004933227393576384, "loss": 19.3206, "step": 21040 }, { "epoch": 0.0410664404697486, "grad_norm": 8.5625, "learning_rate": 0.0004933162373267291, "loss": 19.217, "step": 21060 }, { "epoch": 0.04110543993838084, "grad_norm": 10.8125, "learning_rate": 0.0004933097352958197, "loss": 19.2804, "step": 21080 }, { "epoch": 0.04114443940701308, "grad_norm": 9.1875, "learning_rate": 0.0004933032332649103, "loss": 19.3051, "step": 21100 }, { "epoch": 0.041183438875645316, "grad_norm": 9.875, "learning_rate": 0.0004932967312340009, "loss": 19.3063, "step": 21120 }, { "epoch": 0.04122243834427756, "grad_norm": 10.4375, "learning_rate": 0.0004932902292030916, "loss": 19.2625, "step": 21140 }, { "epoch": 0.0412614378129098, "grad_norm": 9.9375, "learning_rate": 0.0004932837271721822, "loss": 19.3668, "step": 21160 }, { "epoch": 0.04130043728154204, "grad_norm": 10.5625, "learning_rate": 0.0004932772251412729, "loss": 19.2292, "step": 21180 }, { "epoch": 0.04133943675017428, "grad_norm": 9.5, "learning_rate": 0.0004932707231103636, "loss": 19.2596, "step": 21200 }, { "epoch": 0.04137843621880652, "grad_norm": 9.75, "learning_rate": 0.0004932642210794542, "loss": 19.327, "step": 21220 }, { "epoch": 0.04141743568743876, "grad_norm": 9.9375, "learning_rate": 0.0004932577190485449, "loss": 19.242, "step": 21240 }, { "epoch": 0.041456435156070996, "grad_norm": 9.125, "learning_rate": 0.0004932512170176355, "loss": 19.2428, "step": 21260 }, { "epoch": 0.04149543462470324, "grad_norm": 10.4375, "learning_rate": 0.0004932447149867262, "loss": 19.2661, "step": 21280 }, { "epoch": 0.04153443409333548, "grad_norm": 10.0, "learning_rate": 0.0004932382129558167, "loss": 19.2137, "step": 21300 }, { "epoch": 0.041573433561967715, "grad_norm": 10.5, "learning_rate": 0.0004932317109249074, "loss": 19.285, "step": 21320 }, { "epoch": 0.04161243303059996, "grad_norm": 10.25, "learning_rate": 0.000493225208893998, "loss": 19.2309, "step": 21340 }, { "epoch": 0.0416514324992322, "grad_norm": 10.625, "learning_rate": 0.0004932187068630887, "loss": 19.2034, "step": 21360 }, { "epoch": 0.041690431967864434, "grad_norm": 9.5625, "learning_rate": 0.0004932122048321794, "loss": 19.2531, "step": 21380 }, { "epoch": 0.041729431436496676, "grad_norm": 10.125, "learning_rate": 0.00049320570280127, "loss": 19.3111, "step": 21400 }, { "epoch": 0.04176843090512892, "grad_norm": 9.875, "learning_rate": 0.0004931992007703607, "loss": 19.3105, "step": 21420 }, { "epoch": 0.04180743037376116, "grad_norm": 8.8125, "learning_rate": 0.0004931926987394513, "loss": 19.2722, "step": 21440 }, { "epoch": 0.041846429842393396, "grad_norm": 9.9375, "learning_rate": 0.0004931861967085419, "loss": 19.2908, "step": 21460 }, { "epoch": 0.04188542931102564, "grad_norm": 9.375, "learning_rate": 0.0004931796946776325, "loss": 19.2104, "step": 21480 }, { "epoch": 0.04192442877965788, "grad_norm": 9.8125, "learning_rate": 0.0004931731926467232, "loss": 19.1942, "step": 21500 }, { "epoch": 0.041963428248290115, "grad_norm": 8.8125, "learning_rate": 0.0004931666906158138, "loss": 19.2486, "step": 21520 }, { "epoch": 0.04200242771692236, "grad_norm": 9.6875, "learning_rate": 0.0004931601885849045, "loss": 19.2424, "step": 21540 }, { "epoch": 0.0420414271855546, "grad_norm": 9.125, "learning_rate": 0.0004931536865539952, "loss": 19.3564, "step": 21560 }, { "epoch": 0.042080426654186834, "grad_norm": 8.6875, "learning_rate": 0.0004931471845230858, "loss": 19.2134, "step": 21580 }, { "epoch": 0.042119426122819076, "grad_norm": 8.75, "learning_rate": 0.0004931406824921764, "loss": 19.2137, "step": 21600 }, { "epoch": 0.04215842559145132, "grad_norm": 11.125, "learning_rate": 0.000493134180461267, "loss": 19.1758, "step": 21620 }, { "epoch": 0.04219742506008355, "grad_norm": 9.1875, "learning_rate": 0.0004931276784303577, "loss": 19.1901, "step": 21640 }, { "epoch": 0.042236424528715795, "grad_norm": 10.0, "learning_rate": 0.0004931211763994483, "loss": 19.2101, "step": 21660 }, { "epoch": 0.04227542399734804, "grad_norm": 10.25, "learning_rate": 0.000493114674368539, "loss": 19.198, "step": 21680 }, { "epoch": 0.04231442346598028, "grad_norm": 10.125, "learning_rate": 0.0004931081723376296, "loss": 19.1118, "step": 21700 }, { "epoch": 0.042353422934612514, "grad_norm": 9.25, "learning_rate": 0.0004931016703067203, "loss": 19.2593, "step": 21720 }, { "epoch": 0.042392422403244756, "grad_norm": 10.125, "learning_rate": 0.000493095168275811, "loss": 19.1892, "step": 21740 }, { "epoch": 0.042431421871877, "grad_norm": 9.6875, "learning_rate": 0.0004930886662449016, "loss": 19.1734, "step": 21760 }, { "epoch": 0.04247042134050923, "grad_norm": 8.9375, "learning_rate": 0.0004930821642139923, "loss": 19.156, "step": 21780 }, { "epoch": 0.042509420809141475, "grad_norm": 9.3125, "learning_rate": 0.0004930756621830829, "loss": 19.2072, "step": 21800 }, { "epoch": 0.04254842027777372, "grad_norm": 10.75, "learning_rate": 0.0004930691601521736, "loss": 19.1777, "step": 21820 }, { "epoch": 0.04258741974640595, "grad_norm": 8.875, "learning_rate": 0.0004930626581212641, "loss": 19.2238, "step": 21840 }, { "epoch": 0.042626419215038194, "grad_norm": 8.9375, "learning_rate": 0.0004930561560903548, "loss": 19.1595, "step": 21860 }, { "epoch": 0.042665418683670436, "grad_norm": 10.4375, "learning_rate": 0.0004930496540594455, "loss": 19.218, "step": 21880 }, { "epoch": 0.04270441815230267, "grad_norm": 9.375, "learning_rate": 0.0004930431520285361, "loss": 19.1414, "step": 21900 }, { "epoch": 0.04274341762093491, "grad_norm": 10.125, "learning_rate": 0.0004930366499976268, "loss": 19.1606, "step": 21920 }, { "epoch": 0.042782417089567155, "grad_norm": 7.75, "learning_rate": 0.0004930301479667174, "loss": 19.2346, "step": 21940 }, { "epoch": 0.0428214165581994, "grad_norm": 9.8125, "learning_rate": 0.0004930236459358081, "loss": 19.1725, "step": 21960 }, { "epoch": 0.04286041602683163, "grad_norm": 10.875, "learning_rate": 0.0004930171439048987, "loss": 19.1802, "step": 21980 }, { "epoch": 0.042899415495463875, "grad_norm": 9.3125, "learning_rate": 0.0004930106418739894, "loss": 19.0717, "step": 22000 }, { "epoch": 0.04293841496409612, "grad_norm": 8.9375, "learning_rate": 0.00049300413984308, "loss": 19.228, "step": 22020 }, { "epoch": 0.04297741443272835, "grad_norm": 10.3125, "learning_rate": 0.0004929976378121707, "loss": 19.1446, "step": 22040 }, { "epoch": 0.043016413901360594, "grad_norm": 10.125, "learning_rate": 0.0004929911357812614, "loss": 19.1183, "step": 22060 }, { "epoch": 0.043055413369992836, "grad_norm": 10.0625, "learning_rate": 0.0004929846337503519, "loss": 19.1411, "step": 22080 }, { "epoch": 0.04309441283862507, "grad_norm": 9.8125, "learning_rate": 0.0004929781317194426, "loss": 19.1389, "step": 22100 }, { "epoch": 0.04313341230725731, "grad_norm": 8.625, "learning_rate": 0.0004929716296885332, "loss": 19.1494, "step": 22120 }, { "epoch": 0.043172411775889555, "grad_norm": 8.8125, "learning_rate": 0.0004929651276576239, "loss": 19.1132, "step": 22140 }, { "epoch": 0.0432114112445218, "grad_norm": 9.375, "learning_rate": 0.0004929586256267145, "loss": 19.1119, "step": 22160 }, { "epoch": 0.04325041071315403, "grad_norm": 9.4375, "learning_rate": 0.0004929521235958052, "loss": 19.1737, "step": 22180 }, { "epoch": 0.043289410181786274, "grad_norm": 9.9375, "learning_rate": 0.0004929456215648959, "loss": 19.1642, "step": 22200 }, { "epoch": 0.043328409650418516, "grad_norm": 10.5625, "learning_rate": 0.0004929391195339864, "loss": 19.1708, "step": 22220 }, { "epoch": 0.04336740911905075, "grad_norm": 10.0, "learning_rate": 0.0004929326175030771, "loss": 19.0321, "step": 22240 }, { "epoch": 0.04340640858768299, "grad_norm": 9.4375, "learning_rate": 0.0004929261154721677, "loss": 19.1433, "step": 22260 }, { "epoch": 0.043445408056315235, "grad_norm": 8.625, "learning_rate": 0.0004929196134412584, "loss": 19.1147, "step": 22280 }, { "epoch": 0.04348440752494747, "grad_norm": 8.8125, "learning_rate": 0.000492913111410349, "loss": 19.062, "step": 22300 }, { "epoch": 0.04352340699357971, "grad_norm": 10.1875, "learning_rate": 0.0004929066093794397, "loss": 19.0993, "step": 22320 }, { "epoch": 0.043562406462211954, "grad_norm": 10.5625, "learning_rate": 0.0004929001073485303, "loss": 19.1562, "step": 22340 }, { "epoch": 0.04360140593084419, "grad_norm": 8.75, "learning_rate": 0.000492893605317621, "loss": 19.0602, "step": 22360 }, { "epoch": 0.04364040539947643, "grad_norm": 12.0, "learning_rate": 0.0004928871032867116, "loss": 19.1426, "step": 22380 }, { "epoch": 0.04367940486810867, "grad_norm": 8.5625, "learning_rate": 0.0004928806012558022, "loss": 19.033, "step": 22400 }, { "epoch": 0.043718404336740915, "grad_norm": 8.8125, "learning_rate": 0.0004928740992248929, "loss": 19.1072, "step": 22420 }, { "epoch": 0.04375740380537315, "grad_norm": 10.25, "learning_rate": 0.0004928675971939835, "loss": 19.1208, "step": 22440 }, { "epoch": 0.04379640327400539, "grad_norm": 9.875, "learning_rate": 0.0004928610951630742, "loss": 19.0827, "step": 22460 }, { "epoch": 0.043835402742637634, "grad_norm": 9.0, "learning_rate": 0.0004928545931321648, "loss": 19.1068, "step": 22480 }, { "epoch": 0.04387440221126987, "grad_norm": 9.6875, "learning_rate": 0.0004928480911012555, "loss": 19.0107, "step": 22500 }, { "epoch": 0.04391340167990211, "grad_norm": 9.25, "learning_rate": 0.0004928415890703462, "loss": 19.0546, "step": 22520 }, { "epoch": 0.043952401148534354, "grad_norm": 9.125, "learning_rate": 0.0004928350870394368, "loss": 19.1011, "step": 22540 }, { "epoch": 0.04399140061716659, "grad_norm": 9.25, "learning_rate": 0.0004928285850085274, "loss": 19.0284, "step": 22560 }, { "epoch": 0.04403040008579883, "grad_norm": 9.625, "learning_rate": 0.000492822082977618, "loss": 19.0703, "step": 22580 }, { "epoch": 0.04406939955443107, "grad_norm": 10.0625, "learning_rate": 0.0004928155809467087, "loss": 19.0894, "step": 22600 }, { "epoch": 0.04410839902306331, "grad_norm": 10.0, "learning_rate": 0.0004928090789157993, "loss": 19.0262, "step": 22620 }, { "epoch": 0.04414739849169555, "grad_norm": 9.3125, "learning_rate": 0.00049280257688489, "loss": 18.9887, "step": 22640 }, { "epoch": 0.04418639796032779, "grad_norm": 9.3125, "learning_rate": 0.0004927960748539806, "loss": 19.0646, "step": 22660 }, { "epoch": 0.044225397428960034, "grad_norm": 9.8125, "learning_rate": 0.0004927895728230713, "loss": 19.0956, "step": 22680 }, { "epoch": 0.04426439689759227, "grad_norm": 8.75, "learning_rate": 0.000492783070792162, "loss": 18.9863, "step": 22700 }, { "epoch": 0.04430339636622451, "grad_norm": 9.4375, "learning_rate": 0.0004927765687612526, "loss": 18.9985, "step": 22720 }, { "epoch": 0.04434239583485675, "grad_norm": 9.8125, "learning_rate": 0.0004927700667303433, "loss": 18.9894, "step": 22740 }, { "epoch": 0.04438139530348899, "grad_norm": 10.3125, "learning_rate": 0.0004927635646994339, "loss": 18.9593, "step": 22760 }, { "epoch": 0.04442039477212123, "grad_norm": 8.8125, "learning_rate": 0.0004927570626685246, "loss": 19.0369, "step": 22780 }, { "epoch": 0.04445939424075347, "grad_norm": 9.1875, "learning_rate": 0.0004927505606376151, "loss": 19.0317, "step": 22800 }, { "epoch": 0.04449839370938571, "grad_norm": 10.1875, "learning_rate": 0.0004927440586067058, "loss": 18.9632, "step": 22820 }, { "epoch": 0.04453739317801795, "grad_norm": 8.9375, "learning_rate": 0.0004927375565757964, "loss": 19.0398, "step": 22840 }, { "epoch": 0.04457639264665019, "grad_norm": 8.9375, "learning_rate": 0.0004927310545448871, "loss": 18.9838, "step": 22860 }, { "epoch": 0.044615392115282426, "grad_norm": 9.5, "learning_rate": 0.0004927245525139778, "loss": 19.0317, "step": 22880 }, { "epoch": 0.04465439158391467, "grad_norm": 9.9375, "learning_rate": 0.0004927180504830684, "loss": 19.0839, "step": 22900 }, { "epoch": 0.04469339105254691, "grad_norm": 9.75, "learning_rate": 0.0004927115484521591, "loss": 19.0401, "step": 22920 }, { "epoch": 0.04473239052117915, "grad_norm": 8.9375, "learning_rate": 0.0004927050464212497, "loss": 19.0111, "step": 22940 }, { "epoch": 0.04477138998981139, "grad_norm": 9.4375, "learning_rate": 0.0004926985443903404, "loss": 19.0218, "step": 22960 }, { "epoch": 0.04481038945844363, "grad_norm": 10.0625, "learning_rate": 0.000492692042359431, "loss": 19.0281, "step": 22980 }, { "epoch": 0.04484938892707587, "grad_norm": 9.0625, "learning_rate": 0.0004926855403285216, "loss": 18.9653, "step": 23000 }, { "epoch": 0.044888388395708106, "grad_norm": 9.0, "learning_rate": 0.0004926790382976123, "loss": 19.081, "step": 23020 }, { "epoch": 0.04492738786434035, "grad_norm": 8.625, "learning_rate": 0.0004926725362667029, "loss": 19.0105, "step": 23040 }, { "epoch": 0.04496638733297259, "grad_norm": 9.4375, "learning_rate": 0.0004926660342357936, "loss": 18.9445, "step": 23060 }, { "epoch": 0.045005386801604826, "grad_norm": 9.0, "learning_rate": 0.0004926595322048842, "loss": 19.0298, "step": 23080 }, { "epoch": 0.04504438627023707, "grad_norm": 9.1875, "learning_rate": 0.0004926530301739749, "loss": 18.9949, "step": 23100 }, { "epoch": 0.04508338573886931, "grad_norm": 9.5, "learning_rate": 0.0004926465281430655, "loss": 19.053, "step": 23120 }, { "epoch": 0.045122385207501545, "grad_norm": 9.0, "learning_rate": 0.0004926400261121561, "loss": 19.0107, "step": 23140 }, { "epoch": 0.04516138467613379, "grad_norm": 9.6875, "learning_rate": 0.0004926335240812467, "loss": 18.8944, "step": 23160 }, { "epoch": 0.04520038414476603, "grad_norm": 9.625, "learning_rate": 0.0004926270220503374, "loss": 18.9518, "step": 23180 }, { "epoch": 0.04523938361339827, "grad_norm": 9.0625, "learning_rate": 0.0004926205200194281, "loss": 18.9757, "step": 23200 }, { "epoch": 0.045278383082030506, "grad_norm": 9.0625, "learning_rate": 0.0004926140179885187, "loss": 18.9609, "step": 23220 }, { "epoch": 0.04531738255066275, "grad_norm": 9.1875, "learning_rate": 0.0004926075159576094, "loss": 18.9258, "step": 23240 }, { "epoch": 0.04535638201929499, "grad_norm": 9.3125, "learning_rate": 0.0004926010139267, "loss": 19.0344, "step": 23260 }, { "epoch": 0.045395381487927225, "grad_norm": 9.125, "learning_rate": 0.0004925945118957907, "loss": 18.9683, "step": 23280 }, { "epoch": 0.04543438095655947, "grad_norm": 9.0, "learning_rate": 0.0004925880098648812, "loss": 18.9852, "step": 23300 }, { "epoch": 0.04547338042519171, "grad_norm": 9.875, "learning_rate": 0.0004925815078339719, "loss": 18.8897, "step": 23320 }, { "epoch": 0.045512379893823944, "grad_norm": 10.3125, "learning_rate": 0.0004925750058030625, "loss": 18.9442, "step": 23340 }, { "epoch": 0.045551379362456186, "grad_norm": 7.9375, "learning_rate": 0.0004925685037721532, "loss": 18.8775, "step": 23360 }, { "epoch": 0.04559037883108843, "grad_norm": 9.625, "learning_rate": 0.0004925620017412439, "loss": 18.8892, "step": 23380 }, { "epoch": 0.04562937829972066, "grad_norm": 8.4375, "learning_rate": 0.0004925554997103345, "loss": 18.8647, "step": 23400 }, { "epoch": 0.045668377768352905, "grad_norm": 9.3125, "learning_rate": 0.0004925489976794252, "loss": 18.8749, "step": 23420 }, { "epoch": 0.04570737723698515, "grad_norm": 9.3125, "learning_rate": 0.0004925424956485158, "loss": 19.0213, "step": 23440 }, { "epoch": 0.04574637670561739, "grad_norm": 9.0, "learning_rate": 0.0004925359936176065, "loss": 18.902, "step": 23460 }, { "epoch": 0.045785376174249624, "grad_norm": 8.5, "learning_rate": 0.0004925294915866971, "loss": 18.9389, "step": 23480 }, { "epoch": 0.045824375642881866, "grad_norm": 9.0, "learning_rate": 0.0004925229895557878, "loss": 18.9333, "step": 23500 }, { "epoch": 0.04586337511151411, "grad_norm": 8.0625, "learning_rate": 0.0004925164875248785, "loss": 18.8777, "step": 23520 }, { "epoch": 0.04590237458014634, "grad_norm": 10.1875, "learning_rate": 0.000492509985493969, "loss": 18.92, "step": 23540 }, { "epoch": 0.045941374048778585, "grad_norm": 8.5625, "learning_rate": 0.0004925034834630597, "loss": 18.8384, "step": 23560 }, { "epoch": 0.04598037351741083, "grad_norm": 9.25, "learning_rate": 0.0004924969814321503, "loss": 18.8315, "step": 23580 }, { "epoch": 0.04601937298604306, "grad_norm": 9.375, "learning_rate": 0.000492490479401241, "loss": 18.9058, "step": 23600 }, { "epoch": 0.046058372454675305, "grad_norm": 9.9375, "learning_rate": 0.0004924839773703316, "loss": 18.9039, "step": 23620 }, { "epoch": 0.04609737192330755, "grad_norm": 9.6875, "learning_rate": 0.0004924774753394223, "loss": 18.9109, "step": 23640 }, { "epoch": 0.04613637139193978, "grad_norm": 9.125, "learning_rate": 0.000492470973308513, "loss": 18.8286, "step": 23660 }, { "epoch": 0.046175370860572024, "grad_norm": 9.0625, "learning_rate": 0.0004924644712776036, "loss": 18.8554, "step": 23680 }, { "epoch": 0.046214370329204266, "grad_norm": 8.5, "learning_rate": 0.0004924579692466943, "loss": 18.9488, "step": 23700 }, { "epoch": 0.04625336979783651, "grad_norm": 8.875, "learning_rate": 0.0004924514672157849, "loss": 18.89, "step": 23720 }, { "epoch": 0.04629236926646874, "grad_norm": 8.0625, "learning_rate": 0.0004924449651848756, "loss": 18.901, "step": 23740 }, { "epoch": 0.046331368735100985, "grad_norm": 9.125, "learning_rate": 0.0004924384631539661, "loss": 18.8675, "step": 23760 }, { "epoch": 0.04637036820373323, "grad_norm": 10.9375, "learning_rate": 0.0004924319611230568, "loss": 18.8375, "step": 23780 }, { "epoch": 0.04640936767236546, "grad_norm": 10.125, "learning_rate": 0.0004924254590921474, "loss": 18.8213, "step": 23800 }, { "epoch": 0.046448367140997704, "grad_norm": 8.5625, "learning_rate": 0.0004924189570612381, "loss": 18.8256, "step": 23820 }, { "epoch": 0.046487366609629946, "grad_norm": 9.9375, "learning_rate": 0.0004924124550303288, "loss": 18.9225, "step": 23840 }, { "epoch": 0.04652636607826218, "grad_norm": 9.375, "learning_rate": 0.0004924059529994194, "loss": 18.8879, "step": 23860 }, { "epoch": 0.04656536554689442, "grad_norm": 9.0625, "learning_rate": 0.0004923994509685101, "loss": 18.7802, "step": 23880 }, { "epoch": 0.046604365015526665, "grad_norm": 10.25, "learning_rate": 0.0004923929489376007, "loss": 18.7438, "step": 23900 }, { "epoch": 0.0466433644841589, "grad_norm": 10.4375, "learning_rate": 0.0004923864469066913, "loss": 18.8514, "step": 23920 }, { "epoch": 0.04668236395279114, "grad_norm": 9.0625, "learning_rate": 0.0004923799448757819, "loss": 18.8762, "step": 23940 }, { "epoch": 0.046721363421423384, "grad_norm": 9.75, "learning_rate": 0.0004923734428448726, "loss": 18.9164, "step": 23960 }, { "epoch": 0.046760362890055626, "grad_norm": 8.9375, "learning_rate": 0.0004923669408139632, "loss": 18.8688, "step": 23980 }, { "epoch": 0.04679936235868786, "grad_norm": 8.625, "learning_rate": 0.0004923604387830539, "loss": 18.9094, "step": 24000 }, { "epoch": 0.0468383618273201, "grad_norm": 8.875, "learning_rate": 0.0004923539367521446, "loss": 18.8687, "step": 24020 }, { "epoch": 0.046877361295952345, "grad_norm": 9.4375, "learning_rate": 0.0004923474347212352, "loss": 18.8865, "step": 24040 }, { "epoch": 0.04691636076458458, "grad_norm": 9.9375, "learning_rate": 0.0004923409326903258, "loss": 18.9538, "step": 24060 }, { "epoch": 0.04695536023321682, "grad_norm": 9.0, "learning_rate": 0.0004923344306594164, "loss": 18.8214, "step": 24080 }, { "epoch": 0.046994359701849064, "grad_norm": 8.9375, "learning_rate": 0.0004923279286285071, "loss": 18.8878, "step": 24100 }, { "epoch": 0.0470333591704813, "grad_norm": 9.5, "learning_rate": 0.0004923214265975977, "loss": 18.8843, "step": 24120 }, { "epoch": 0.04707235863911354, "grad_norm": 9.0, "learning_rate": 0.0004923149245666884, "loss": 18.8643, "step": 24140 }, { "epoch": 0.047111358107745784, "grad_norm": 8.75, "learning_rate": 0.000492308422535779, "loss": 18.8928, "step": 24160 }, { "epoch": 0.04715035757637802, "grad_norm": 9.125, "learning_rate": 0.0004923019205048697, "loss": 18.8347, "step": 24180 }, { "epoch": 0.04718935704501026, "grad_norm": 9.625, "learning_rate": 0.0004922954184739604, "loss": 18.7722, "step": 24200 }, { "epoch": 0.0472283565136425, "grad_norm": 9.25, "learning_rate": 0.000492288916443051, "loss": 18.7743, "step": 24220 }, { "epoch": 0.047267355982274745, "grad_norm": 10.4375, "learning_rate": 0.0004922824144121417, "loss": 18.8431, "step": 24240 }, { "epoch": 0.04730635545090698, "grad_norm": 8.75, "learning_rate": 0.0004922759123812322, "loss": 18.8085, "step": 24260 }, { "epoch": 0.04734535491953922, "grad_norm": 9.1875, "learning_rate": 0.0004922694103503229, "loss": 18.8149, "step": 24280 }, { "epoch": 0.047384354388171464, "grad_norm": 10.125, "learning_rate": 0.0004922629083194135, "loss": 18.8645, "step": 24300 }, { "epoch": 0.0474233538568037, "grad_norm": 8.625, "learning_rate": 0.0004922564062885042, "loss": 18.7898, "step": 24320 }, { "epoch": 0.04746235332543594, "grad_norm": 10.625, "learning_rate": 0.0004922499042575949, "loss": 18.7574, "step": 24340 }, { "epoch": 0.04750135279406818, "grad_norm": 8.5625, "learning_rate": 0.0004922434022266855, "loss": 18.8584, "step": 24360 }, { "epoch": 0.04754035226270042, "grad_norm": 9.1875, "learning_rate": 0.0004922369001957762, "loss": 18.8619, "step": 24380 }, { "epoch": 0.04757935173133266, "grad_norm": 9.0625, "learning_rate": 0.0004922303981648668, "loss": 18.7597, "step": 24400 }, { "epoch": 0.0476183511999649, "grad_norm": 9.0625, "learning_rate": 0.0004922238961339575, "loss": 18.7874, "step": 24420 }, { "epoch": 0.04765735066859714, "grad_norm": 9.0625, "learning_rate": 0.0004922173941030481, "loss": 18.8057, "step": 24440 }, { "epoch": 0.04769635013722938, "grad_norm": 7.78125, "learning_rate": 0.0004922108920721388, "loss": 18.7781, "step": 24460 }, { "epoch": 0.04773534960586162, "grad_norm": 9.1875, "learning_rate": 0.0004922043900412294, "loss": 18.7488, "step": 24480 }, { "epoch": 0.04777434907449386, "grad_norm": 9.125, "learning_rate": 0.0004921978880103201, "loss": 18.7704, "step": 24500 }, { "epoch": 0.0478133485431261, "grad_norm": 8.9375, "learning_rate": 0.0004921913859794107, "loss": 18.7835, "step": 24520 }, { "epoch": 0.04785234801175834, "grad_norm": 9.125, "learning_rate": 0.0004921848839485013, "loss": 18.8031, "step": 24540 }, { "epoch": 0.04789134748039058, "grad_norm": 9.375, "learning_rate": 0.000492178381917592, "loss": 18.7698, "step": 24560 }, { "epoch": 0.04793034694902282, "grad_norm": 10.0625, "learning_rate": 0.0004921718798866826, "loss": 18.8024, "step": 24580 }, { "epoch": 0.04796934641765506, "grad_norm": 8.625, "learning_rate": 0.0004921653778557733, "loss": 18.7878, "step": 24600 }, { "epoch": 0.0480083458862873, "grad_norm": 8.25, "learning_rate": 0.0004921588758248639, "loss": 18.7351, "step": 24620 }, { "epoch": 0.048047345354919536, "grad_norm": 8.9375, "learning_rate": 0.0004921523737939546, "loss": 18.788, "step": 24640 }, { "epoch": 0.04808634482355178, "grad_norm": 9.5625, "learning_rate": 0.0004921458717630453, "loss": 18.7198, "step": 24660 }, { "epoch": 0.04812534429218402, "grad_norm": 9.625, "learning_rate": 0.0004921393697321358, "loss": 18.6881, "step": 24680 }, { "epoch": 0.048164343760816256, "grad_norm": 8.625, "learning_rate": 0.0004921328677012265, "loss": 18.7449, "step": 24700 }, { "epoch": 0.0482033432294485, "grad_norm": 9.125, "learning_rate": 0.0004921263656703171, "loss": 18.8048, "step": 24720 }, { "epoch": 0.04824234269808074, "grad_norm": 8.5, "learning_rate": 0.0004921198636394078, "loss": 18.7728, "step": 24740 }, { "epoch": 0.04828134216671298, "grad_norm": 8.5, "learning_rate": 0.0004921133616084984, "loss": 18.7508, "step": 24760 }, { "epoch": 0.04832034163534522, "grad_norm": 8.75, "learning_rate": 0.0004921068595775891, "loss": 18.7711, "step": 24780 }, { "epoch": 0.04835934110397746, "grad_norm": 8.875, "learning_rate": 0.0004921003575466797, "loss": 18.7284, "step": 24800 }, { "epoch": 0.0483983405726097, "grad_norm": 9.125, "learning_rate": 0.0004920938555157704, "loss": 18.6873, "step": 24820 }, { "epoch": 0.048437340041241936, "grad_norm": 7.65625, "learning_rate": 0.000492087353484861, "loss": 18.7178, "step": 24840 }, { "epoch": 0.04847633950987418, "grad_norm": 8.4375, "learning_rate": 0.0004920808514539516, "loss": 18.7787, "step": 24860 }, { "epoch": 0.04851533897850642, "grad_norm": 8.5625, "learning_rate": 0.0004920743494230423, "loss": 18.7449, "step": 24880 }, { "epoch": 0.048554338447138655, "grad_norm": 10.0, "learning_rate": 0.0004920678473921329, "loss": 18.7312, "step": 24900 }, { "epoch": 0.0485933379157709, "grad_norm": 9.1875, "learning_rate": 0.0004920613453612236, "loss": 18.7688, "step": 24920 }, { "epoch": 0.04863233738440314, "grad_norm": 9.625, "learning_rate": 0.0004920548433303142, "loss": 18.7389, "step": 24940 }, { "epoch": 0.048671336853035374, "grad_norm": 9.1875, "learning_rate": 0.0004920483412994049, "loss": 18.729, "step": 24960 }, { "epoch": 0.048710336321667616, "grad_norm": 9.75, "learning_rate": 0.0004920418392684955, "loss": 18.6635, "step": 24980 }, { "epoch": 0.04874933579029986, "grad_norm": 8.875, "learning_rate": 0.0004920353372375861, "loss": 18.6922, "step": 25000 }, { "epoch": 0.0487883352589321, "grad_norm": 8.3125, "learning_rate": 0.0004920288352066768, "loss": 18.6889, "step": 25020 }, { "epoch": 0.048827334727564335, "grad_norm": 9.125, "learning_rate": 0.0004920223331757674, "loss": 18.7768, "step": 25040 }, { "epoch": 0.04886633419619658, "grad_norm": 8.6875, "learning_rate": 0.0004920158311448581, "loss": 18.6773, "step": 25060 }, { "epoch": 0.04890533366482882, "grad_norm": 9.125, "learning_rate": 0.0004920093291139487, "loss": 18.716, "step": 25080 }, { "epoch": 0.048944333133461054, "grad_norm": 8.0, "learning_rate": 0.0004920028270830394, "loss": 18.7033, "step": 25100 }, { "epoch": 0.048983332602093296, "grad_norm": 9.0625, "learning_rate": 0.00049199632505213, "loss": 18.7625, "step": 25120 }, { "epoch": 0.04902233207072554, "grad_norm": 8.5, "learning_rate": 0.0004919898230212207, "loss": 18.6887, "step": 25140 }, { "epoch": 0.04906133153935777, "grad_norm": 9.0625, "learning_rate": 0.0004919833209903114, "loss": 18.7142, "step": 25160 }, { "epoch": 0.049100331007990015, "grad_norm": 9.375, "learning_rate": 0.000491976818959402, "loss": 18.6888, "step": 25180 }, { "epoch": 0.04913933047662226, "grad_norm": 9.75, "learning_rate": 0.0004919703169284927, "loss": 18.6781, "step": 25200 }, { "epoch": 0.04917832994525449, "grad_norm": 8.25, "learning_rate": 0.0004919638148975833, "loss": 18.7371, "step": 25220 }, { "epoch": 0.049217329413886735, "grad_norm": 8.0, "learning_rate": 0.0004919573128666739, "loss": 18.659, "step": 25240 }, { "epoch": 0.04925632888251898, "grad_norm": 8.3125, "learning_rate": 0.0004919508108357645, "loss": 18.7144, "step": 25260 }, { "epoch": 0.04929532835115122, "grad_norm": 9.0625, "learning_rate": 0.0004919443088048552, "loss": 18.6842, "step": 25280 }, { "epoch": 0.049334327819783454, "grad_norm": 9.6875, "learning_rate": 0.0004919378067739458, "loss": 18.7246, "step": 25300 }, { "epoch": 0.049373327288415696, "grad_norm": 9.0, "learning_rate": 0.0004919313047430365, "loss": 18.7, "step": 25320 }, { "epoch": 0.04941232675704794, "grad_norm": 8.9375, "learning_rate": 0.0004919248027121272, "loss": 18.7226, "step": 25340 }, { "epoch": 0.04945132622568017, "grad_norm": 9.125, "learning_rate": 0.0004919183006812178, "loss": 18.7232, "step": 25360 }, { "epoch": 0.049490325694312415, "grad_norm": 9.1875, "learning_rate": 0.0004919117986503085, "loss": 18.6825, "step": 25380 }, { "epoch": 0.04952932516294466, "grad_norm": 8.5625, "learning_rate": 0.0004919052966193991, "loss": 18.6811, "step": 25400 }, { "epoch": 0.04956832463157689, "grad_norm": 9.125, "learning_rate": 0.0004918987945884898, "loss": 18.6628, "step": 25420 }, { "epoch": 0.049607324100209134, "grad_norm": 9.0, "learning_rate": 0.0004918922925575804, "loss": 18.6094, "step": 25440 }, { "epoch": 0.049646323568841376, "grad_norm": 9.125, "learning_rate": 0.000491885790526671, "loss": 18.6339, "step": 25460 }, { "epoch": 0.04968532303747361, "grad_norm": 8.3125, "learning_rate": 0.0004918792884957616, "loss": 18.6095, "step": 25480 }, { "epoch": 0.04972432250610585, "grad_norm": 8.5625, "learning_rate": 0.0004918727864648523, "loss": 18.6319, "step": 25500 }, { "epoch": 0.049763321974738095, "grad_norm": 7.90625, "learning_rate": 0.000491866284433943, "loss": 18.6632, "step": 25520 }, { "epoch": 0.04980232144337034, "grad_norm": 9.25, "learning_rate": 0.0004918597824030336, "loss": 18.6931, "step": 25540 }, { "epoch": 0.04984132091200257, "grad_norm": 7.71875, "learning_rate": 0.0004918532803721243, "loss": 18.7218, "step": 25560 }, { "epoch": 0.049880320380634814, "grad_norm": 8.75, "learning_rate": 0.0004918467783412149, "loss": 18.6063, "step": 25580 }, { "epoch": 0.049919319849267056, "grad_norm": 9.6875, "learning_rate": 0.0004918402763103055, "loss": 18.6975, "step": 25600 }, { "epoch": 0.04995831931789929, "grad_norm": 8.25, "learning_rate": 0.0004918337742793961, "loss": 18.6349, "step": 25620 }, { "epoch": 0.04999731878653153, "grad_norm": 9.125, "learning_rate": 0.0004918272722484868, "loss": 18.6535, "step": 25640 }, { "epoch": 0.050036318255163775, "grad_norm": 8.9375, "learning_rate": 0.0004918207702175775, "loss": 18.6977, "step": 25660 }, { "epoch": 0.05007531772379601, "grad_norm": 8.8125, "learning_rate": 0.0004918142681866681, "loss": 18.6385, "step": 25680 }, { "epoch": 0.05011431719242825, "grad_norm": 8.9375, "learning_rate": 0.0004918077661557588, "loss": 18.6349, "step": 25700 }, { "epoch": 0.050153316661060494, "grad_norm": 9.5, "learning_rate": 0.0004918012641248494, "loss": 18.66, "step": 25720 }, { "epoch": 0.05019231612969273, "grad_norm": 8.375, "learning_rate": 0.0004917947620939401, "loss": 18.6205, "step": 25740 }, { "epoch": 0.05023131559832497, "grad_norm": 9.5625, "learning_rate": 0.0004917882600630306, "loss": 18.6054, "step": 25760 }, { "epoch": 0.050270315066957214, "grad_norm": 9.1875, "learning_rate": 0.0004917817580321213, "loss": 18.6352, "step": 25780 }, { "epoch": 0.050309314535589456, "grad_norm": 8.5, "learning_rate": 0.0004917752560012119, "loss": 18.5816, "step": 25800 }, { "epoch": 0.05034831400422169, "grad_norm": 8.5625, "learning_rate": 0.0004917687539703026, "loss": 18.6986, "step": 25820 }, { "epoch": 0.05038731347285393, "grad_norm": 8.6875, "learning_rate": 0.0004917622519393933, "loss": 18.5879, "step": 25840 }, { "epoch": 0.050426312941486175, "grad_norm": 8.625, "learning_rate": 0.0004917557499084839, "loss": 18.6589, "step": 25860 }, { "epoch": 0.05046531241011841, "grad_norm": 8.875, "learning_rate": 0.0004917492478775746, "loss": 18.5851, "step": 25880 }, { "epoch": 0.05050431187875065, "grad_norm": 9.125, "learning_rate": 0.0004917427458466652, "loss": 18.5856, "step": 25900 }, { "epoch": 0.050543311347382894, "grad_norm": 8.375, "learning_rate": 0.0004917362438157559, "loss": 18.6408, "step": 25920 }, { "epoch": 0.05058231081601513, "grad_norm": 8.6875, "learning_rate": 0.0004917297417848465, "loss": 18.5127, "step": 25940 }, { "epoch": 0.05062131028464737, "grad_norm": 8.5, "learning_rate": 0.0004917232397539372, "loss": 18.6517, "step": 25960 }, { "epoch": 0.05066030975327961, "grad_norm": 9.0, "learning_rate": 0.0004917167377230277, "loss": 18.5446, "step": 25980 }, { "epoch": 0.050699309221911855, "grad_norm": 9.375, "learning_rate": 0.0004917102356921184, "loss": 18.577, "step": 26000 }, { "epoch": 0.05073830869054409, "grad_norm": 9.3125, "learning_rate": 0.0004917037336612091, "loss": 18.7159, "step": 26020 }, { "epoch": 0.05077730815917633, "grad_norm": 8.0625, "learning_rate": 0.0004916972316302997, "loss": 18.599, "step": 26040 }, { "epoch": 0.050816307627808574, "grad_norm": 8.1875, "learning_rate": 0.0004916907295993904, "loss": 18.613, "step": 26060 }, { "epoch": 0.05085530709644081, "grad_norm": 8.5625, "learning_rate": 0.000491684227568481, "loss": 18.5885, "step": 26080 }, { "epoch": 0.05089430656507305, "grad_norm": 9.1875, "learning_rate": 0.0004916777255375717, "loss": 18.4975, "step": 26100 }, { "epoch": 0.05093330603370529, "grad_norm": 8.5, "learning_rate": 0.0004916712235066623, "loss": 18.6132, "step": 26120 }, { "epoch": 0.05097230550233753, "grad_norm": 8.875, "learning_rate": 0.000491664721475753, "loss": 18.5543, "step": 26140 }, { "epoch": 0.05101130497096977, "grad_norm": 8.375, "learning_rate": 0.0004916582194448437, "loss": 18.595, "step": 26160 }, { "epoch": 0.05105030443960201, "grad_norm": 9.6875, "learning_rate": 0.0004916517174139343, "loss": 18.6015, "step": 26180 }, { "epoch": 0.05108930390823425, "grad_norm": 8.625, "learning_rate": 0.000491645215383025, "loss": 18.5615, "step": 26200 }, { "epoch": 0.05112830337686649, "grad_norm": 8.0, "learning_rate": 0.0004916387133521155, "loss": 18.6121, "step": 26220 }, { "epoch": 0.05116730284549873, "grad_norm": 8.5, "learning_rate": 0.0004916322113212062, "loss": 18.5852, "step": 26240 }, { "epoch": 0.05120630231413097, "grad_norm": 9.4375, "learning_rate": 0.0004916257092902968, "loss": 18.5658, "step": 26260 }, { "epoch": 0.05124530178276321, "grad_norm": 9.375, "learning_rate": 0.0004916192072593875, "loss": 18.5323, "step": 26280 }, { "epoch": 0.05128430125139545, "grad_norm": 9.375, "learning_rate": 0.0004916127052284781, "loss": 18.509, "step": 26300 }, { "epoch": 0.05132330072002769, "grad_norm": 8.4375, "learning_rate": 0.0004916062031975688, "loss": 18.5273, "step": 26320 }, { "epoch": 0.05136230018865993, "grad_norm": 8.3125, "learning_rate": 0.0004915997011666595, "loss": 18.5879, "step": 26340 }, { "epoch": 0.05140129965729217, "grad_norm": 9.8125, "learning_rate": 0.0004915931991357501, "loss": 18.628, "step": 26360 }, { "epoch": 0.05144029912592441, "grad_norm": 8.4375, "learning_rate": 0.0004915866971048407, "loss": 18.4209, "step": 26380 }, { "epoch": 0.05147929859455665, "grad_norm": 9.75, "learning_rate": 0.0004915801950739313, "loss": 18.6012, "step": 26400 }, { "epoch": 0.05151829806318889, "grad_norm": 7.84375, "learning_rate": 0.000491573693043022, "loss": 18.5764, "step": 26420 }, { "epoch": 0.05155729753182113, "grad_norm": 8.375, "learning_rate": 0.0004915671910121126, "loss": 18.5052, "step": 26440 }, { "epoch": 0.051596297000453366, "grad_norm": 9.375, "learning_rate": 0.0004915606889812033, "loss": 18.586, "step": 26460 }, { "epoch": 0.05163529646908561, "grad_norm": 8.6875, "learning_rate": 0.000491554186950294, "loss": 18.6213, "step": 26480 }, { "epoch": 0.05167429593771785, "grad_norm": 8.8125, "learning_rate": 0.0004915476849193846, "loss": 18.513, "step": 26500 }, { "epoch": 0.05171329540635009, "grad_norm": 8.5, "learning_rate": 0.0004915411828884753, "loss": 18.5772, "step": 26520 }, { "epoch": 0.05175229487498233, "grad_norm": 8.5, "learning_rate": 0.0004915346808575658, "loss": 18.5226, "step": 26540 }, { "epoch": 0.05179129434361457, "grad_norm": 8.375, "learning_rate": 0.0004915281788266565, "loss": 18.5342, "step": 26560 }, { "epoch": 0.05183029381224681, "grad_norm": 9.125, "learning_rate": 0.0004915216767957471, "loss": 18.4934, "step": 26580 }, { "epoch": 0.051869293280879046, "grad_norm": 7.75, "learning_rate": 0.0004915151747648378, "loss": 18.4788, "step": 26600 }, { "epoch": 0.05190829274951129, "grad_norm": 7.65625, "learning_rate": 0.0004915086727339284, "loss": 18.5118, "step": 26620 }, { "epoch": 0.05194729221814353, "grad_norm": 8.9375, "learning_rate": 0.0004915021707030191, "loss": 18.4933, "step": 26640 }, { "epoch": 0.051986291686775765, "grad_norm": 9.3125, "learning_rate": 0.0004914956686721098, "loss": 18.5703, "step": 26660 }, { "epoch": 0.05202529115540801, "grad_norm": 8.5, "learning_rate": 0.0004914891666412004, "loss": 18.5237, "step": 26680 }, { "epoch": 0.05206429062404025, "grad_norm": 8.3125, "learning_rate": 0.000491482664610291, "loss": 18.5052, "step": 26700 }, { "epoch": 0.052103290092672484, "grad_norm": 9.5625, "learning_rate": 0.0004914761625793816, "loss": 18.5741, "step": 26720 }, { "epoch": 0.052142289561304726, "grad_norm": 8.9375, "learning_rate": 0.0004914696605484723, "loss": 18.4923, "step": 26740 }, { "epoch": 0.05218128902993697, "grad_norm": 10.3125, "learning_rate": 0.0004914631585175629, "loss": 18.4409, "step": 26760 }, { "epoch": 0.05222028849856921, "grad_norm": 9.8125, "learning_rate": 0.0004914566564866536, "loss": 18.4954, "step": 26780 }, { "epoch": 0.052259287967201445, "grad_norm": 8.5, "learning_rate": 0.0004914501544557442, "loss": 18.4677, "step": 26800 }, { "epoch": 0.05229828743583369, "grad_norm": 9.0625, "learning_rate": 0.0004914436524248349, "loss": 18.5404, "step": 26820 }, { "epoch": 0.05233728690446593, "grad_norm": 8.6875, "learning_rate": 0.0004914371503939256, "loss": 18.5818, "step": 26840 }, { "epoch": 0.052376286373098165, "grad_norm": 8.625, "learning_rate": 0.0004914306483630162, "loss": 18.5217, "step": 26860 }, { "epoch": 0.05241528584173041, "grad_norm": 9.1875, "learning_rate": 0.0004914241463321069, "loss": 18.5355, "step": 26880 }, { "epoch": 0.05245428531036265, "grad_norm": 9.125, "learning_rate": 0.0004914176443011975, "loss": 18.4527, "step": 26900 }, { "epoch": 0.052493284778994884, "grad_norm": 8.25, "learning_rate": 0.0004914111422702882, "loss": 18.4915, "step": 26920 }, { "epoch": 0.052532284247627126, "grad_norm": 10.25, "learning_rate": 0.0004914046402393787, "loss": 18.5, "step": 26940 }, { "epoch": 0.05257128371625937, "grad_norm": 9.1875, "learning_rate": 0.0004913981382084694, "loss": 18.51, "step": 26960 }, { "epoch": 0.0526102831848916, "grad_norm": 8.0, "learning_rate": 0.00049139163617756, "loss": 18.5518, "step": 26980 }, { "epoch": 0.052649282653523845, "grad_norm": 9.0, "learning_rate": 0.0004913851341466507, "loss": 18.4303, "step": 27000 }, { "epoch": 0.05268828212215609, "grad_norm": 9.1875, "learning_rate": 0.0004913786321157414, "loss": 18.495, "step": 27020 }, { "epoch": 0.05272728159078833, "grad_norm": 9.1875, "learning_rate": 0.000491372130084832, "loss": 18.491, "step": 27040 }, { "epoch": 0.052766281059420564, "grad_norm": 7.90625, "learning_rate": 0.0004913656280539227, "loss": 18.4938, "step": 27060 }, { "epoch": 0.052805280528052806, "grad_norm": 8.625, "learning_rate": 0.0004913591260230133, "loss": 18.514, "step": 27080 }, { "epoch": 0.05284427999668505, "grad_norm": 9.4375, "learning_rate": 0.000491352623992104, "loss": 18.4142, "step": 27100 }, { "epoch": 0.05288327946531728, "grad_norm": 8.375, "learning_rate": 0.0004913461219611947, "loss": 18.5517, "step": 27120 }, { "epoch": 0.052922278933949525, "grad_norm": 8.25, "learning_rate": 0.0004913396199302852, "loss": 18.4506, "step": 27140 }, { "epoch": 0.05296127840258177, "grad_norm": 8.9375, "learning_rate": 0.0004913331178993759, "loss": 18.4626, "step": 27160 }, { "epoch": 0.053000277871214, "grad_norm": 8.5, "learning_rate": 0.0004913266158684665, "loss": 18.4563, "step": 27180 }, { "epoch": 0.053039277339846244, "grad_norm": 7.875, "learning_rate": 0.0004913201138375572, "loss": 18.5159, "step": 27200 }, { "epoch": 0.053078276808478486, "grad_norm": 8.375, "learning_rate": 0.0004913136118066478, "loss": 18.4415, "step": 27220 }, { "epoch": 0.05311727627711072, "grad_norm": 9.1875, "learning_rate": 0.0004913071097757385, "loss": 18.4588, "step": 27240 }, { "epoch": 0.05315627574574296, "grad_norm": 9.6875, "learning_rate": 0.0004913006077448291, "loss": 18.508, "step": 27260 }, { "epoch": 0.053195275214375205, "grad_norm": 8.25, "learning_rate": 0.0004912941057139198, "loss": 18.419, "step": 27280 }, { "epoch": 0.05323427468300745, "grad_norm": 9.5, "learning_rate": 0.0004912876036830103, "loss": 18.3969, "step": 27300 }, { "epoch": 0.05327327415163968, "grad_norm": 8.5625, "learning_rate": 0.000491281101652101, "loss": 18.4816, "step": 27320 }, { "epoch": 0.053312273620271924, "grad_norm": 8.875, "learning_rate": 0.0004912745996211917, "loss": 18.4631, "step": 27340 }, { "epoch": 0.053351273088904166, "grad_norm": 9.1875, "learning_rate": 0.0004912680975902823, "loss": 18.3853, "step": 27360 }, { "epoch": 0.0533902725575364, "grad_norm": 8.9375, "learning_rate": 0.000491261595559373, "loss": 18.4279, "step": 27380 }, { "epoch": 0.053429272026168644, "grad_norm": 9.3125, "learning_rate": 0.0004912550935284636, "loss": 18.415, "step": 27400 }, { "epoch": 0.053468271494800886, "grad_norm": 8.25, "learning_rate": 0.0004912485914975543, "loss": 18.4297, "step": 27420 }, { "epoch": 0.05350727096343312, "grad_norm": 9.0, "learning_rate": 0.0004912420894666449, "loss": 18.4647, "step": 27440 }, { "epoch": 0.05354627043206536, "grad_norm": 9.125, "learning_rate": 0.0004912355874357355, "loss": 18.4773, "step": 27460 }, { "epoch": 0.053585269900697605, "grad_norm": 8.8125, "learning_rate": 0.0004912290854048262, "loss": 18.4229, "step": 27480 }, { "epoch": 0.05362426936932984, "grad_norm": 8.75, "learning_rate": 0.0004912225833739168, "loss": 18.4303, "step": 27500 }, { "epoch": 0.05366326883796208, "grad_norm": 8.9375, "learning_rate": 0.0004912160813430075, "loss": 18.5098, "step": 27520 }, { "epoch": 0.053702268306594324, "grad_norm": 8.375, "learning_rate": 0.0004912095793120981, "loss": 18.4295, "step": 27540 }, { "epoch": 0.053741267775226566, "grad_norm": 9.4375, "learning_rate": 0.0004912030772811888, "loss": 18.3608, "step": 27560 }, { "epoch": 0.0537802672438588, "grad_norm": 9.875, "learning_rate": 0.0004911965752502794, "loss": 18.4168, "step": 27580 }, { "epoch": 0.05381926671249104, "grad_norm": 8.125, "learning_rate": 0.0004911900732193701, "loss": 18.3512, "step": 27600 }, { "epoch": 0.053858266181123285, "grad_norm": 8.3125, "learning_rate": 0.0004911835711884607, "loss": 18.3994, "step": 27620 }, { "epoch": 0.05389726564975552, "grad_norm": 8.625, "learning_rate": 0.0004911770691575514, "loss": 18.3586, "step": 27640 }, { "epoch": 0.05393626511838776, "grad_norm": 8.375, "learning_rate": 0.0004911705671266421, "loss": 18.3836, "step": 27660 }, { "epoch": 0.053975264587020004, "grad_norm": 8.625, "learning_rate": 0.0004911640650957326, "loss": 18.366, "step": 27680 }, { "epoch": 0.05401426405565224, "grad_norm": 9.75, "learning_rate": 0.0004911575630648233, "loss": 18.4281, "step": 27700 }, { "epoch": 0.05405326352428448, "grad_norm": 8.4375, "learning_rate": 0.0004911510610339139, "loss": 18.3837, "step": 27720 }, { "epoch": 0.05409226299291672, "grad_norm": 8.1875, "learning_rate": 0.0004911445590030046, "loss": 18.4365, "step": 27740 }, { "epoch": 0.05413126246154896, "grad_norm": 8.5, "learning_rate": 0.0004911380569720952, "loss": 18.357, "step": 27760 }, { "epoch": 0.0541702619301812, "grad_norm": 8.0625, "learning_rate": 0.0004911315549411859, "loss": 18.3518, "step": 27780 }, { "epoch": 0.05420926139881344, "grad_norm": 9.6875, "learning_rate": 0.0004911250529102766, "loss": 18.421, "step": 27800 }, { "epoch": 0.054248260867445684, "grad_norm": 10.0, "learning_rate": 0.0004911185508793672, "loss": 18.3555, "step": 27820 }, { "epoch": 0.05428726033607792, "grad_norm": 7.84375, "learning_rate": 0.0004911120488484579, "loss": 18.3645, "step": 27840 }, { "epoch": 0.05432625980471016, "grad_norm": 8.5625, "learning_rate": 0.0004911055468175485, "loss": 18.3514, "step": 27860 }, { "epoch": 0.0543652592733424, "grad_norm": 7.59375, "learning_rate": 0.0004910990447866392, "loss": 18.4433, "step": 27880 }, { "epoch": 0.05440425874197464, "grad_norm": 8.0625, "learning_rate": 0.0004910925427557298, "loss": 18.3955, "step": 27900 }, { "epoch": 0.05444325821060688, "grad_norm": 7.65625, "learning_rate": 0.0004910860407248204, "loss": 18.3803, "step": 27920 }, { "epoch": 0.05448225767923912, "grad_norm": 8.9375, "learning_rate": 0.000491079538693911, "loss": 18.4133, "step": 27940 }, { "epoch": 0.05452125714787136, "grad_norm": 8.375, "learning_rate": 0.0004910730366630017, "loss": 18.3317, "step": 27960 }, { "epoch": 0.0545602566165036, "grad_norm": 8.9375, "learning_rate": 0.0004910665346320924, "loss": 18.3971, "step": 27980 }, { "epoch": 0.05459925608513584, "grad_norm": 9.3125, "learning_rate": 0.000491060032601183, "loss": 18.3958, "step": 28000 }, { "epoch": 0.05463825555376808, "grad_norm": 8.4375, "learning_rate": 0.0004910535305702737, "loss": 18.3374, "step": 28020 }, { "epoch": 0.05467725502240032, "grad_norm": 8.4375, "learning_rate": 0.0004910470285393643, "loss": 18.3651, "step": 28040 }, { "epoch": 0.05471625449103256, "grad_norm": 7.9375, "learning_rate": 0.000491040526508455, "loss": 18.2815, "step": 28060 }, { "epoch": 0.0547552539596648, "grad_norm": 8.1875, "learning_rate": 0.0004910340244775455, "loss": 18.3387, "step": 28080 }, { "epoch": 0.05479425342829704, "grad_norm": 8.125, "learning_rate": 0.0004910275224466362, "loss": 18.4166, "step": 28100 }, { "epoch": 0.05483325289692928, "grad_norm": 8.3125, "learning_rate": 0.0004910210204157268, "loss": 18.3669, "step": 28120 }, { "epoch": 0.05487225236556152, "grad_norm": 8.1875, "learning_rate": 0.0004910145183848175, "loss": 18.3254, "step": 28140 }, { "epoch": 0.05491125183419376, "grad_norm": 7.90625, "learning_rate": 0.0004910080163539082, "loss": 18.3784, "step": 28160 }, { "epoch": 0.054950251302826, "grad_norm": 8.0625, "learning_rate": 0.0004910015143229988, "loss": 18.3403, "step": 28180 }, { "epoch": 0.05498925077145824, "grad_norm": 9.375, "learning_rate": 0.0004909950122920895, "loss": 18.3102, "step": 28200 }, { "epoch": 0.055028250240090476, "grad_norm": 9.0625, "learning_rate": 0.00049098851026118, "loss": 18.4089, "step": 28220 }, { "epoch": 0.05506724970872272, "grad_norm": 8.5625, "learning_rate": 0.0004909820082302707, "loss": 18.3223, "step": 28240 }, { "epoch": 0.05510624917735496, "grad_norm": 8.5625, "learning_rate": 0.0004909755061993613, "loss": 18.3313, "step": 28260 }, { "epoch": 0.055145248645987195, "grad_norm": 8.6875, "learning_rate": 0.000490969004168452, "loss": 18.3614, "step": 28280 }, { "epoch": 0.05518424811461944, "grad_norm": 8.25, "learning_rate": 0.0004909625021375427, "loss": 18.4159, "step": 28300 }, { "epoch": 0.05522324758325168, "grad_norm": 9.625, "learning_rate": 0.0004909560001066333, "loss": 18.3133, "step": 28320 }, { "epoch": 0.05526224705188392, "grad_norm": 8.625, "learning_rate": 0.000490949498075724, "loss": 18.2357, "step": 28340 }, { "epoch": 0.055301246520516156, "grad_norm": 8.625, "learning_rate": 0.0004909429960448146, "loss": 18.3533, "step": 28360 }, { "epoch": 0.0553402459891484, "grad_norm": 8.0625, "learning_rate": 0.0004909364940139053, "loss": 18.4396, "step": 28380 }, { "epoch": 0.05537924545778064, "grad_norm": 9.25, "learning_rate": 0.0004909299919829958, "loss": 18.298, "step": 28400 }, { "epoch": 0.055418244926412875, "grad_norm": 8.125, "learning_rate": 0.0004909234899520865, "loss": 18.271, "step": 28420 }, { "epoch": 0.05545724439504512, "grad_norm": 7.1875, "learning_rate": 0.0004909169879211771, "loss": 18.3477, "step": 28440 }, { "epoch": 0.05549624386367736, "grad_norm": 9.125, "learning_rate": 0.0004909104858902678, "loss": 18.346, "step": 28460 }, { "epoch": 0.055535243332309595, "grad_norm": 9.125, "learning_rate": 0.0004909039838593585, "loss": 18.3856, "step": 28480 }, { "epoch": 0.05557424280094184, "grad_norm": 8.0625, "learning_rate": 0.0004908974818284491, "loss": 18.3034, "step": 28500 }, { "epoch": 0.05561324226957408, "grad_norm": 9.375, "learning_rate": 0.0004908909797975398, "loss": 18.2843, "step": 28520 }, { "epoch": 0.055652241738206314, "grad_norm": 9.4375, "learning_rate": 0.0004908844777666304, "loss": 18.2606, "step": 28540 }, { "epoch": 0.055691241206838556, "grad_norm": 8.25, "learning_rate": 0.0004908779757357211, "loss": 18.3211, "step": 28560 }, { "epoch": 0.0557302406754708, "grad_norm": 8.0625, "learning_rate": 0.0004908714737048117, "loss": 18.2635, "step": 28580 }, { "epoch": 0.05576924014410304, "grad_norm": 8.4375, "learning_rate": 0.0004908649716739024, "loss": 18.3031, "step": 28600 }, { "epoch": 0.055808239612735275, "grad_norm": 8.1875, "learning_rate": 0.0004908584696429931, "loss": 18.3625, "step": 28620 }, { "epoch": 0.05584723908136752, "grad_norm": 9.125, "learning_rate": 0.0004908519676120837, "loss": 18.2676, "step": 28640 }, { "epoch": 0.05588623854999976, "grad_norm": 8.625, "learning_rate": 0.0004908454655811743, "loss": 18.356, "step": 28660 }, { "epoch": 0.055925238018631994, "grad_norm": 8.5625, "learning_rate": 0.0004908389635502649, "loss": 18.3445, "step": 28680 }, { "epoch": 0.055964237487264236, "grad_norm": 10.4375, "learning_rate": 0.0004908324615193556, "loss": 18.2686, "step": 28700 }, { "epoch": 0.05600323695589648, "grad_norm": 9.125, "learning_rate": 0.0004908259594884462, "loss": 18.3601, "step": 28720 }, { "epoch": 0.05604223642452871, "grad_norm": 8.5625, "learning_rate": 0.0004908194574575369, "loss": 18.2382, "step": 28740 }, { "epoch": 0.056081235893160955, "grad_norm": 8.5, "learning_rate": 0.0004908129554266275, "loss": 18.2971, "step": 28760 }, { "epoch": 0.0561202353617932, "grad_norm": 8.3125, "learning_rate": 0.0004908064533957182, "loss": 18.2238, "step": 28780 }, { "epoch": 0.05615923483042543, "grad_norm": 8.1875, "learning_rate": 0.0004907999513648089, "loss": 18.32, "step": 28800 }, { "epoch": 0.056198234299057674, "grad_norm": 9.25, "learning_rate": 0.0004907934493338995, "loss": 18.2592, "step": 28820 }, { "epoch": 0.056237233767689916, "grad_norm": 8.25, "learning_rate": 0.0004907869473029901, "loss": 18.3136, "step": 28840 }, { "epoch": 0.05627623323632216, "grad_norm": 7.875, "learning_rate": 0.0004907804452720807, "loss": 18.2562, "step": 28860 }, { "epoch": 0.05631523270495439, "grad_norm": 9.25, "learning_rate": 0.0004907739432411714, "loss": 18.2101, "step": 28880 }, { "epoch": 0.056354232173586635, "grad_norm": 8.75, "learning_rate": 0.000490767441210262, "loss": 18.3202, "step": 28900 }, { "epoch": 0.05639323164221888, "grad_norm": 9.5, "learning_rate": 0.0004907609391793527, "loss": 18.3016, "step": 28920 }, { "epoch": 0.05643223111085111, "grad_norm": 8.8125, "learning_rate": 0.0004907544371484434, "loss": 18.2731, "step": 28940 }, { "epoch": 0.056471230579483354, "grad_norm": 8.4375, "learning_rate": 0.000490747935117534, "loss": 18.2778, "step": 28960 }, { "epoch": 0.056510230048115596, "grad_norm": 7.1875, "learning_rate": 0.0004907414330866247, "loss": 18.2984, "step": 28980 }, { "epoch": 0.05654922951674783, "grad_norm": 8.3125, "learning_rate": 0.0004907349310557152, "loss": 18.2915, "step": 29000 }, { "epoch": 0.056588228985380074, "grad_norm": 7.875, "learning_rate": 0.0004907284290248059, "loss": 18.3218, "step": 29020 }, { "epoch": 0.056627228454012316, "grad_norm": 8.4375, "learning_rate": 0.0004907219269938965, "loss": 18.2289, "step": 29040 }, { "epoch": 0.05666622792264455, "grad_norm": 9.125, "learning_rate": 0.0004907154249629872, "loss": 18.2586, "step": 29060 }, { "epoch": 0.05670522739127679, "grad_norm": 8.25, "learning_rate": 0.0004907089229320778, "loss": 18.2226, "step": 29080 }, { "epoch": 0.056744226859909035, "grad_norm": 6.90625, "learning_rate": 0.0004907024209011685, "loss": 18.2936, "step": 29100 }, { "epoch": 0.05678322632854128, "grad_norm": 9.3125, "learning_rate": 0.0004906959188702592, "loss": 18.3193, "step": 29120 }, { "epoch": 0.05682222579717351, "grad_norm": 9.0, "learning_rate": 0.0004906894168393497, "loss": 18.2579, "step": 29140 }, { "epoch": 0.056861225265805754, "grad_norm": 8.6875, "learning_rate": 0.0004906829148084404, "loss": 18.3944, "step": 29160 }, { "epoch": 0.056900224734437996, "grad_norm": 8.25, "learning_rate": 0.000490676412777531, "loss": 18.2326, "step": 29180 }, { "epoch": 0.05693922420307023, "grad_norm": 8.1875, "learning_rate": 0.0004906699107466217, "loss": 18.2686, "step": 29200 }, { "epoch": 0.05697822367170247, "grad_norm": 7.96875, "learning_rate": 0.0004906634087157123, "loss": 18.2266, "step": 29220 }, { "epoch": 0.057017223140334715, "grad_norm": 7.90625, "learning_rate": 0.000490656906684803, "loss": 18.2404, "step": 29240 }, { "epoch": 0.05705622260896695, "grad_norm": 8.75, "learning_rate": 0.0004906504046538936, "loss": 18.2013, "step": 29260 }, { "epoch": 0.05709522207759919, "grad_norm": 7.375, "learning_rate": 0.0004906439026229843, "loss": 18.2322, "step": 29280 }, { "epoch": 0.057134221546231434, "grad_norm": 8.125, "learning_rate": 0.000490637400592075, "loss": 18.2848, "step": 29300 }, { "epoch": 0.05717322101486367, "grad_norm": 8.0, "learning_rate": 0.0004906308985611656, "loss": 18.2395, "step": 29320 }, { "epoch": 0.05721222048349591, "grad_norm": 8.875, "learning_rate": 0.0004906243965302563, "loss": 18.2619, "step": 29340 }, { "epoch": 0.05725121995212815, "grad_norm": 7.5, "learning_rate": 0.0004906178944993469, "loss": 18.2287, "step": 29360 }, { "epoch": 0.057290219420760395, "grad_norm": 8.125, "learning_rate": 0.0004906113924684375, "loss": 18.2756, "step": 29380 }, { "epoch": 0.05732921888939263, "grad_norm": 8.8125, "learning_rate": 0.0004906048904375281, "loss": 18.2243, "step": 29400 }, { "epoch": 0.05736821835802487, "grad_norm": 7.96875, "learning_rate": 0.0004905983884066188, "loss": 18.202, "step": 29420 }, { "epoch": 0.057407217826657114, "grad_norm": 8.0625, "learning_rate": 0.0004905918863757094, "loss": 18.2796, "step": 29440 }, { "epoch": 0.05744621729528935, "grad_norm": 8.0625, "learning_rate": 0.0004905853843448001, "loss": 18.2503, "step": 29460 }, { "epoch": 0.05748521676392159, "grad_norm": 9.25, "learning_rate": 0.0004905788823138908, "loss": 18.2193, "step": 29480 }, { "epoch": 0.05752421623255383, "grad_norm": 9.0, "learning_rate": 0.0004905723802829814, "loss": 18.2248, "step": 29500 }, { "epoch": 0.05756321570118607, "grad_norm": 8.25, "learning_rate": 0.0004905658782520721, "loss": 18.249, "step": 29520 }, { "epoch": 0.05760221516981831, "grad_norm": 8.0625, "learning_rate": 0.0004905593762211627, "loss": 18.261, "step": 29540 }, { "epoch": 0.05764121463845055, "grad_norm": 8.0625, "learning_rate": 0.0004905528741902534, "loss": 18.1378, "step": 29560 }, { "epoch": 0.05768021410708279, "grad_norm": 7.96875, "learning_rate": 0.000490546372159344, "loss": 18.2145, "step": 29580 }, { "epoch": 0.05771921357571503, "grad_norm": 9.125, "learning_rate": 0.0004905398701284347, "loss": 18.2221, "step": 29600 }, { "epoch": 0.05775821304434727, "grad_norm": 8.9375, "learning_rate": 0.0004905333680975253, "loss": 18.2222, "step": 29620 }, { "epoch": 0.057797212512979514, "grad_norm": 8.75, "learning_rate": 0.0004905268660666159, "loss": 18.1924, "step": 29640 }, { "epoch": 0.05783621198161175, "grad_norm": 8.25, "learning_rate": 0.0004905203640357066, "loss": 18.2808, "step": 29660 }, { "epoch": 0.05787521145024399, "grad_norm": 7.65625, "learning_rate": 0.0004905138620047972, "loss": 18.1663, "step": 29680 }, { "epoch": 0.05791421091887623, "grad_norm": 8.0625, "learning_rate": 0.0004905073599738879, "loss": 18.3324, "step": 29700 }, { "epoch": 0.05795321038750847, "grad_norm": 8.0, "learning_rate": 0.0004905008579429785, "loss": 18.1949, "step": 29720 }, { "epoch": 0.05799220985614071, "grad_norm": 8.4375, "learning_rate": 0.0004904943559120692, "loss": 18.2462, "step": 29740 }, { "epoch": 0.05803120932477295, "grad_norm": 9.1875, "learning_rate": 0.0004904878538811597, "loss": 18.2457, "step": 29760 }, { "epoch": 0.05807020879340519, "grad_norm": 9.3125, "learning_rate": 0.0004904813518502504, "loss": 18.2479, "step": 29780 }, { "epoch": 0.05810920826203743, "grad_norm": 7.75, "learning_rate": 0.0004904748498193411, "loss": 18.2079, "step": 29800 }, { "epoch": 0.05814820773066967, "grad_norm": 8.75, "learning_rate": 0.0004904683477884317, "loss": 18.1499, "step": 29820 }, { "epoch": 0.058187207199301906, "grad_norm": 7.15625, "learning_rate": 0.0004904618457575224, "loss": 18.1579, "step": 29840 }, { "epoch": 0.05822620666793415, "grad_norm": 8.125, "learning_rate": 0.000490455343726613, "loss": 18.2203, "step": 29860 }, { "epoch": 0.05826520613656639, "grad_norm": 8.25, "learning_rate": 0.0004904488416957037, "loss": 18.2209, "step": 29880 }, { "epoch": 0.05830420560519863, "grad_norm": 7.96875, "learning_rate": 0.0004904423396647943, "loss": 18.142, "step": 29900 }, { "epoch": 0.05834320507383087, "grad_norm": 7.5625, "learning_rate": 0.0004904358376338849, "loss": 18.1117, "step": 29920 }, { "epoch": 0.05838220454246311, "grad_norm": 7.9375, "learning_rate": 0.0004904293356029755, "loss": 18.119, "step": 29940 }, { "epoch": 0.05842120401109535, "grad_norm": 9.0625, "learning_rate": 0.0004904228335720662, "loss": 18.1707, "step": 29960 }, { "epoch": 0.058460203479727586, "grad_norm": 8.375, "learning_rate": 0.0004904163315411569, "loss": 18.1383, "step": 29980 }, { "epoch": 0.05849920294835983, "grad_norm": 8.875, "learning_rate": 0.0004904098295102475, "loss": 18.1583, "step": 30000 }, { "epoch": 0.05853820241699207, "grad_norm": 9.0, "learning_rate": 0.0004904033274793382, "loss": 18.1581, "step": 30020 }, { "epoch": 0.058577201885624305, "grad_norm": 8.6875, "learning_rate": 0.0004903968254484288, "loss": 18.2354, "step": 30040 }, { "epoch": 0.05861620135425655, "grad_norm": 8.1875, "learning_rate": 0.0004903903234175195, "loss": 18.1351, "step": 30060 }, { "epoch": 0.05865520082288879, "grad_norm": 8.75, "learning_rate": 0.0004903838213866101, "loss": 18.1553, "step": 30080 }, { "epoch": 0.05869420029152103, "grad_norm": 7.625, "learning_rate": 0.0004903773193557008, "loss": 18.1972, "step": 30100 }, { "epoch": 0.05873319976015327, "grad_norm": 8.0625, "learning_rate": 0.0004903708173247914, "loss": 18.1429, "step": 30120 }, { "epoch": 0.05877219922878551, "grad_norm": 9.3125, "learning_rate": 0.000490364315293882, "loss": 18.1449, "step": 30140 }, { "epoch": 0.05881119869741775, "grad_norm": 7.8125, "learning_rate": 0.0004903578132629727, "loss": 18.2032, "step": 30160 }, { "epoch": 0.058850198166049986, "grad_norm": 7.375, "learning_rate": 0.0004903513112320633, "loss": 18.0934, "step": 30180 }, { "epoch": 0.05888919763468223, "grad_norm": 7.78125, "learning_rate": 0.000490344809201154, "loss": 18.1388, "step": 30200 }, { "epoch": 0.05892819710331447, "grad_norm": 7.46875, "learning_rate": 0.0004903383071702446, "loss": 18.1244, "step": 30220 }, { "epoch": 0.058967196571946705, "grad_norm": 8.75, "learning_rate": 0.0004903318051393353, "loss": 18.1777, "step": 30240 }, { "epoch": 0.05900619604057895, "grad_norm": 8.625, "learning_rate": 0.000490325303108426, "loss": 18.1486, "step": 30260 }, { "epoch": 0.05904519550921119, "grad_norm": 7.8125, "learning_rate": 0.0004903188010775166, "loss": 18.1711, "step": 30280 }, { "epoch": 0.059084194977843424, "grad_norm": 8.0625, "learning_rate": 0.0004903122990466073, "loss": 18.1431, "step": 30300 }, { "epoch": 0.059123194446475666, "grad_norm": 8.6875, "learning_rate": 0.0004903057970156979, "loss": 18.1457, "step": 30320 }, { "epoch": 0.05916219391510791, "grad_norm": 7.625, "learning_rate": 0.0004902992949847886, "loss": 18.1494, "step": 30340 }, { "epoch": 0.05920119338374015, "grad_norm": 7.78125, "learning_rate": 0.0004902927929538791, "loss": 18.155, "step": 30360 }, { "epoch": 0.059240192852372385, "grad_norm": 8.0625, "learning_rate": 0.0004902862909229698, "loss": 18.0483, "step": 30380 }, { "epoch": 0.05927919232100463, "grad_norm": 8.25, "learning_rate": 0.0004902797888920604, "loss": 18.1595, "step": 30400 }, { "epoch": 0.05931819178963687, "grad_norm": 8.125, "learning_rate": 0.0004902732868611511, "loss": 18.1439, "step": 30420 }, { "epoch": 0.059357191258269104, "grad_norm": 8.875, "learning_rate": 0.0004902667848302418, "loss": 18.0778, "step": 30440 }, { "epoch": 0.059396190726901346, "grad_norm": 8.25, "learning_rate": 0.0004902602827993324, "loss": 18.1237, "step": 30460 }, { "epoch": 0.05943519019553359, "grad_norm": 8.375, "learning_rate": 0.0004902537807684231, "loss": 18.142, "step": 30480 }, { "epoch": 0.05947418966416582, "grad_norm": 7.90625, "learning_rate": 0.0004902472787375137, "loss": 18.0664, "step": 30500 }, { "epoch": 0.059513189132798065, "grad_norm": 8.625, "learning_rate": 0.0004902407767066044, "loss": 18.1222, "step": 30520 }, { "epoch": 0.05955218860143031, "grad_norm": 7.53125, "learning_rate": 0.0004902342746756949, "loss": 18.0742, "step": 30540 }, { "epoch": 0.05959118807006254, "grad_norm": 7.375, "learning_rate": 0.0004902277726447856, "loss": 18.0627, "step": 30560 }, { "epoch": 0.059630187538694784, "grad_norm": 8.0625, "learning_rate": 0.0004902212706138762, "loss": 18.0427, "step": 30580 }, { "epoch": 0.059669187007327026, "grad_norm": 8.5625, "learning_rate": 0.0004902147685829669, "loss": 18.1599, "step": 30600 }, { "epoch": 0.05970818647595927, "grad_norm": 8.3125, "learning_rate": 0.0004902082665520576, "loss": 18.1158, "step": 30620 }, { "epoch": 0.059747185944591504, "grad_norm": 7.84375, "learning_rate": 0.0004902017645211482, "loss": 18.1183, "step": 30640 }, { "epoch": 0.059786185413223746, "grad_norm": 7.71875, "learning_rate": 0.0004901952624902389, "loss": 18.0588, "step": 30660 }, { "epoch": 0.05982518488185599, "grad_norm": 7.96875, "learning_rate": 0.0004901887604593294, "loss": 18.1681, "step": 30680 }, { "epoch": 0.05986418435048822, "grad_norm": 7.96875, "learning_rate": 0.0004901822584284201, "loss": 18.084, "step": 30700 }, { "epoch": 0.059903183819120465, "grad_norm": 9.375, "learning_rate": 0.0004901757563975107, "loss": 18.0496, "step": 30720 }, { "epoch": 0.05994218328775271, "grad_norm": 9.125, "learning_rate": 0.0004901692543666014, "loss": 18.0668, "step": 30740 }, { "epoch": 0.05998118275638494, "grad_norm": 7.59375, "learning_rate": 0.000490162752335692, "loss": 18.0859, "step": 30760 }, { "epoch": 0.060020182225017184, "grad_norm": 7.78125, "learning_rate": 0.0004901562503047827, "loss": 18.0747, "step": 30780 }, { "epoch": 0.060059181693649426, "grad_norm": 8.1875, "learning_rate": 0.0004901497482738734, "loss": 18.0556, "step": 30800 }, { "epoch": 0.06009818116228166, "grad_norm": 8.125, "learning_rate": 0.000490143246242964, "loss": 18.0919, "step": 30820 }, { "epoch": 0.0601371806309139, "grad_norm": 8.75, "learning_rate": 0.0004901367442120546, "loss": 18.1014, "step": 30840 }, { "epoch": 0.060176180099546145, "grad_norm": 10.0, "learning_rate": 0.0004901302421811452, "loss": 18.1593, "step": 30860 }, { "epoch": 0.06021517956817839, "grad_norm": 7.5625, "learning_rate": 0.0004901237401502359, "loss": 18.0223, "step": 30880 }, { "epoch": 0.06025417903681062, "grad_norm": 8.8125, "learning_rate": 0.0004901172381193265, "loss": 18.0728, "step": 30900 }, { "epoch": 0.060293178505442864, "grad_norm": 8.3125, "learning_rate": 0.0004901107360884172, "loss": 18.0472, "step": 30920 }, { "epoch": 0.060332177974075106, "grad_norm": 7.3125, "learning_rate": 0.0004901042340575079, "loss": 18.0559, "step": 30940 }, { "epoch": 0.06037117744270734, "grad_norm": 8.6875, "learning_rate": 0.0004900977320265985, "loss": 18.0695, "step": 30960 }, { "epoch": 0.06041017691133958, "grad_norm": 8.4375, "learning_rate": 0.0004900912299956892, "loss": 18.0741, "step": 30980 }, { "epoch": 0.060449176379971825, "grad_norm": 9.3125, "learning_rate": 0.0004900847279647798, "loss": 18.1284, "step": 31000 }, { "epoch": 0.06048817584860406, "grad_norm": 7.5625, "learning_rate": 0.0004900782259338705, "loss": 18.0664, "step": 31020 }, { "epoch": 0.0605271753172363, "grad_norm": 8.25, "learning_rate": 0.0004900717239029611, "loss": 18.1452, "step": 31040 }, { "epoch": 0.060566174785868544, "grad_norm": 8.5, "learning_rate": 0.0004900652218720518, "loss": 18.0555, "step": 31060 }, { "epoch": 0.06060517425450078, "grad_norm": 7.375, "learning_rate": 0.0004900587198411423, "loss": 18.1033, "step": 31080 }, { "epoch": 0.06064417372313302, "grad_norm": 7.6875, "learning_rate": 0.000490052217810233, "loss": 17.9969, "step": 31100 }, { "epoch": 0.06068317319176526, "grad_norm": 7.65625, "learning_rate": 0.0004900457157793237, "loss": 18.0857, "step": 31120 }, { "epoch": 0.060722172660397505, "grad_norm": 9.0625, "learning_rate": 0.0004900392137484143, "loss": 18.1473, "step": 31140 }, { "epoch": 0.06076117212902974, "grad_norm": 8.0625, "learning_rate": 0.000490032711717505, "loss": 18.1086, "step": 31160 }, { "epoch": 0.06080017159766198, "grad_norm": 8.0625, "learning_rate": 0.0004900262096865956, "loss": 18.0586, "step": 31180 }, { "epoch": 0.060839171066294224, "grad_norm": 7.78125, "learning_rate": 0.0004900197076556863, "loss": 18.0954, "step": 31200 }, { "epoch": 0.06087817053492646, "grad_norm": 8.3125, "learning_rate": 0.0004900132056247769, "loss": 18.0704, "step": 31220 }, { "epoch": 0.0609171700035587, "grad_norm": 7.59375, "learning_rate": 0.0004900067035938676, "loss": 18.0087, "step": 31240 }, { "epoch": 0.060956169472190944, "grad_norm": 8.0625, "learning_rate": 0.0004900002015629583, "loss": 18.0486, "step": 31260 }, { "epoch": 0.06099516894082318, "grad_norm": 8.3125, "learning_rate": 0.0004899936995320489, "loss": 18.0881, "step": 31280 }, { "epoch": 0.06103416840945542, "grad_norm": 7.4375, "learning_rate": 0.0004899871975011395, "loss": 18.0806, "step": 31300 }, { "epoch": 0.06107316787808766, "grad_norm": 7.90625, "learning_rate": 0.0004899806954702301, "loss": 18.0287, "step": 31320 }, { "epoch": 0.0611121673467199, "grad_norm": 7.59375, "learning_rate": 0.0004899741934393208, "loss": 18.0311, "step": 31340 }, { "epoch": 0.06115116681535214, "grad_norm": 8.4375, "learning_rate": 0.0004899676914084114, "loss": 18.0707, "step": 31360 }, { "epoch": 0.06119016628398438, "grad_norm": 7.71875, "learning_rate": 0.0004899611893775021, "loss": 18.0916, "step": 31380 }, { "epoch": 0.061229165752616624, "grad_norm": 8.125, "learning_rate": 0.0004899546873465927, "loss": 18.0903, "step": 31400 }, { "epoch": 0.06126816522124886, "grad_norm": 8.4375, "learning_rate": 0.0004899481853156834, "loss": 18.0255, "step": 31420 }, { "epoch": 0.0613071646898811, "grad_norm": 8.0, "learning_rate": 0.0004899416832847741, "loss": 18.0232, "step": 31440 }, { "epoch": 0.06134616415851334, "grad_norm": 8.875, "learning_rate": 0.0004899351812538646, "loss": 18.0515, "step": 31460 }, { "epoch": 0.06138516362714558, "grad_norm": 7.84375, "learning_rate": 0.0004899286792229553, "loss": 18.0826, "step": 31480 }, { "epoch": 0.06142416309577782, "grad_norm": 7.5625, "learning_rate": 0.0004899221771920459, "loss": 18.0413, "step": 31500 }, { "epoch": 0.06146316256441006, "grad_norm": 8.8125, "learning_rate": 0.0004899156751611366, "loss": 18.0584, "step": 31520 }, { "epoch": 0.0615021620330423, "grad_norm": 8.0, "learning_rate": 0.0004899091731302272, "loss": 18.1169, "step": 31540 }, { "epoch": 0.06154116150167454, "grad_norm": 8.1875, "learning_rate": 0.0004899026710993179, "loss": 18.0053, "step": 31560 }, { "epoch": 0.06158016097030678, "grad_norm": 8.0, "learning_rate": 0.0004898961690684086, "loss": 18.0199, "step": 31580 }, { "epoch": 0.061619160438939016, "grad_norm": 8.5, "learning_rate": 0.0004898896670374991, "loss": 18.0758, "step": 31600 }, { "epoch": 0.06165815990757126, "grad_norm": 8.625, "learning_rate": 0.0004898831650065898, "loss": 18.0708, "step": 31620 }, { "epoch": 0.0616971593762035, "grad_norm": 7.4375, "learning_rate": 0.0004898766629756804, "loss": 18.027, "step": 31640 }, { "epoch": 0.06173615884483574, "grad_norm": 8.75, "learning_rate": 0.0004898701609447711, "loss": 18.0593, "step": 31660 }, { "epoch": 0.06177515831346798, "grad_norm": 8.25, "learning_rate": 0.0004898636589138617, "loss": 18.0366, "step": 31680 }, { "epoch": 0.06181415778210022, "grad_norm": 8.5625, "learning_rate": 0.0004898571568829524, "loss": 18.0165, "step": 31700 }, { "epoch": 0.06185315725073246, "grad_norm": 7.8125, "learning_rate": 0.000489850654852043, "loss": 18.0843, "step": 31720 }, { "epoch": 0.0618921567193647, "grad_norm": 8.3125, "learning_rate": 0.0004898441528211337, "loss": 18.007, "step": 31740 }, { "epoch": 0.06193115618799694, "grad_norm": 8.125, "learning_rate": 0.0004898376507902244, "loss": 18.0027, "step": 31760 }, { "epoch": 0.06197015565662918, "grad_norm": 8.25, "learning_rate": 0.000489831148759315, "loss": 17.9932, "step": 31780 }, { "epoch": 0.062009155125261416, "grad_norm": 7.90625, "learning_rate": 0.0004898246467284057, "loss": 18.0424, "step": 31800 }, { "epoch": 0.06204815459389366, "grad_norm": 7.71875, "learning_rate": 0.0004898181446974962, "loss": 18.0237, "step": 31820 }, { "epoch": 0.0620871540625259, "grad_norm": 8.1875, "learning_rate": 0.0004898116426665869, "loss": 17.9841, "step": 31840 }, { "epoch": 0.062126153531158135, "grad_norm": 8.1875, "learning_rate": 0.0004898051406356775, "loss": 17.949, "step": 31860 }, { "epoch": 0.06216515299979038, "grad_norm": 7.875, "learning_rate": 0.0004897986386047682, "loss": 17.9183, "step": 31880 }, { "epoch": 0.06220415246842262, "grad_norm": 7.375, "learning_rate": 0.0004897921365738588, "loss": 18.0329, "step": 31900 }, { "epoch": 0.06224315193705486, "grad_norm": 7.90625, "learning_rate": 0.0004897856345429495, "loss": 17.9478, "step": 31920 }, { "epoch": 0.062282151405687096, "grad_norm": 8.5, "learning_rate": 0.0004897791325120402, "loss": 18.0445, "step": 31940 }, { "epoch": 0.06232115087431934, "grad_norm": 7.3125, "learning_rate": 0.0004897726304811308, "loss": 17.9673, "step": 31960 }, { "epoch": 0.06236015034295158, "grad_norm": 8.625, "learning_rate": 0.0004897661284502215, "loss": 18.0014, "step": 31980 }, { "epoch": 0.062399149811583815, "grad_norm": 8.125, "learning_rate": 0.0004897596264193121, "loss": 17.9175, "step": 32000 }, { "epoch": 0.06243814928021606, "grad_norm": 8.3125, "learning_rate": 0.0004897531243884028, "loss": 17.8893, "step": 32020 }, { "epoch": 0.0624771487488483, "grad_norm": 7.875, "learning_rate": 0.0004897466223574934, "loss": 17.948, "step": 32040 }, { "epoch": 0.06251614821748054, "grad_norm": 8.625, "learning_rate": 0.000489740120326584, "loss": 18.045, "step": 32060 }, { "epoch": 0.06255514768611278, "grad_norm": 6.9375, "learning_rate": 0.0004897336182956747, "loss": 17.9409, "step": 32080 }, { "epoch": 0.06259414715474501, "grad_norm": 7.3125, "learning_rate": 0.0004897271162647653, "loss": 17.8784, "step": 32100 }, { "epoch": 0.06263314662337725, "grad_norm": 8.4375, "learning_rate": 0.000489720614233856, "loss": 18.0081, "step": 32120 }, { "epoch": 0.0626721460920095, "grad_norm": 8.1875, "learning_rate": 0.0004897141122029466, "loss": 17.9232, "step": 32140 }, { "epoch": 0.06271114556064174, "grad_norm": 8.25, "learning_rate": 0.0004897076101720373, "loss": 17.9626, "step": 32160 }, { "epoch": 0.06275014502927398, "grad_norm": 8.0, "learning_rate": 0.0004897011081411279, "loss": 18.0572, "step": 32180 }, { "epoch": 0.06278914449790622, "grad_norm": 7.5, "learning_rate": 0.0004896946061102186, "loss": 18.0155, "step": 32200 }, { "epoch": 0.06282814396653845, "grad_norm": 8.625, "learning_rate": 0.0004896881040793091, "loss": 17.9578, "step": 32220 }, { "epoch": 0.06286714343517069, "grad_norm": 8.1875, "learning_rate": 0.0004896816020483998, "loss": 17.9427, "step": 32240 }, { "epoch": 0.06290614290380293, "grad_norm": 8.375, "learning_rate": 0.0004896751000174905, "loss": 17.953, "step": 32260 }, { "epoch": 0.06294514237243518, "grad_norm": 8.3125, "learning_rate": 0.0004896685979865811, "loss": 17.9384, "step": 32280 }, { "epoch": 0.06298414184106742, "grad_norm": 8.1875, "learning_rate": 0.0004896620959556718, "loss": 18.0104, "step": 32300 }, { "epoch": 0.06302314130969966, "grad_norm": 8.125, "learning_rate": 0.0004896555939247624, "loss": 17.926, "step": 32320 }, { "epoch": 0.0630621407783319, "grad_norm": 8.5625, "learning_rate": 0.0004896490918938531, "loss": 17.9719, "step": 32340 }, { "epoch": 0.06310114024696413, "grad_norm": 8.1875, "learning_rate": 0.0004896425898629437, "loss": 17.9948, "step": 32360 }, { "epoch": 0.06314013971559637, "grad_norm": 7.75, "learning_rate": 0.0004896360878320343, "loss": 17.9669, "step": 32380 }, { "epoch": 0.06317913918422861, "grad_norm": 8.0625, "learning_rate": 0.0004896295858011249, "loss": 17.9943, "step": 32400 }, { "epoch": 0.06321813865286086, "grad_norm": 7.65625, "learning_rate": 0.0004896230837702156, "loss": 17.9457, "step": 32420 }, { "epoch": 0.0632571381214931, "grad_norm": 8.5, "learning_rate": 0.0004896165817393063, "loss": 17.9023, "step": 32440 }, { "epoch": 0.06329613759012534, "grad_norm": 7.96875, "learning_rate": 0.0004896100797083969, "loss": 17.9567, "step": 32460 }, { "epoch": 0.06333513705875757, "grad_norm": 8.875, "learning_rate": 0.0004896035776774876, "loss": 17.9666, "step": 32480 }, { "epoch": 0.06337413652738981, "grad_norm": 8.75, "learning_rate": 0.0004895970756465782, "loss": 17.9693, "step": 32500 }, { "epoch": 0.06341313599602205, "grad_norm": 7.40625, "learning_rate": 0.0004895905736156689, "loss": 17.919, "step": 32520 }, { "epoch": 0.0634521354646543, "grad_norm": 8.0, "learning_rate": 0.0004895840715847594, "loss": 17.9174, "step": 32540 }, { "epoch": 0.06349113493328654, "grad_norm": 8.625, "learning_rate": 0.0004895775695538501, "loss": 17.9503, "step": 32560 }, { "epoch": 0.06353013440191878, "grad_norm": 8.375, "learning_rate": 0.0004895710675229407, "loss": 17.9537, "step": 32580 }, { "epoch": 0.06356913387055102, "grad_norm": 7.34375, "learning_rate": 0.0004895645654920314, "loss": 17.8769, "step": 32600 }, { "epoch": 0.06360813333918325, "grad_norm": 8.0625, "learning_rate": 0.0004895580634611221, "loss": 17.9926, "step": 32620 }, { "epoch": 0.06364713280781549, "grad_norm": 8.75, "learning_rate": 0.0004895515614302127, "loss": 17.9385, "step": 32640 }, { "epoch": 0.06368613227644773, "grad_norm": 8.125, "learning_rate": 0.0004895450593993034, "loss": 17.8764, "step": 32660 }, { "epoch": 0.06372513174507997, "grad_norm": 8.375, "learning_rate": 0.000489538557368394, "loss": 17.9011, "step": 32680 }, { "epoch": 0.06376413121371222, "grad_norm": 8.375, "learning_rate": 0.0004895320553374847, "loss": 17.9503, "step": 32700 }, { "epoch": 0.06380313068234446, "grad_norm": 7.25, "learning_rate": 0.0004895255533065753, "loss": 17.8345, "step": 32720 }, { "epoch": 0.06384213015097669, "grad_norm": 8.0, "learning_rate": 0.000489519051275666, "loss": 17.98, "step": 32740 }, { "epoch": 0.06388112961960893, "grad_norm": 8.625, "learning_rate": 0.0004895125492447567, "loss": 17.9959, "step": 32760 }, { "epoch": 0.06392012908824117, "grad_norm": 8.375, "learning_rate": 0.0004895060472138473, "loss": 17.9604, "step": 32780 }, { "epoch": 0.06395912855687341, "grad_norm": 8.25, "learning_rate": 0.0004894995451829379, "loss": 17.9779, "step": 32800 }, { "epoch": 0.06399812802550565, "grad_norm": 8.1875, "learning_rate": 0.0004894930431520285, "loss": 17.8667, "step": 32820 }, { "epoch": 0.0640371274941379, "grad_norm": 7.28125, "learning_rate": 0.0004894865411211192, "loss": 17.9207, "step": 32840 }, { "epoch": 0.06407612696277014, "grad_norm": 7.8125, "learning_rate": 0.0004894800390902098, "loss": 17.8489, "step": 32860 }, { "epoch": 0.06411512643140237, "grad_norm": 7.53125, "learning_rate": 0.0004894735370593005, "loss": 17.9589, "step": 32880 }, { "epoch": 0.06415412590003461, "grad_norm": 8.4375, "learning_rate": 0.0004894670350283912, "loss": 17.9333, "step": 32900 }, { "epoch": 0.06419312536866685, "grad_norm": 8.1875, "learning_rate": 0.0004894605329974818, "loss": 17.9873, "step": 32920 }, { "epoch": 0.06423212483729909, "grad_norm": 10.0, "learning_rate": 0.0004894540309665725, "loss": 17.9491, "step": 32940 }, { "epoch": 0.06427112430593133, "grad_norm": 7.8125, "learning_rate": 0.0004894475289356631, "loss": 17.872, "step": 32960 }, { "epoch": 0.06431012377456358, "grad_norm": 8.8125, "learning_rate": 0.0004894410269047538, "loss": 17.937, "step": 32980 }, { "epoch": 0.0643491232431958, "grad_norm": 7.53125, "learning_rate": 0.0004894345248738443, "loss": 17.8699, "step": 33000 }, { "epoch": 0.06438812271182805, "grad_norm": 7.8125, "learning_rate": 0.000489428022842935, "loss": 17.9012, "step": 33020 }, { "epoch": 0.06442712218046029, "grad_norm": 7.5625, "learning_rate": 0.0004894215208120256, "loss": 17.9211, "step": 33040 }, { "epoch": 0.06446612164909253, "grad_norm": 8.1875, "learning_rate": 0.0004894150187811163, "loss": 17.8865, "step": 33060 }, { "epoch": 0.06450512111772477, "grad_norm": 8.8125, "learning_rate": 0.000489408516750207, "loss": 17.9138, "step": 33080 }, { "epoch": 0.06454412058635701, "grad_norm": 7.625, "learning_rate": 0.0004894020147192976, "loss": 17.9299, "step": 33100 }, { "epoch": 0.06458312005498926, "grad_norm": 7.6875, "learning_rate": 0.0004893955126883883, "loss": 17.8413, "step": 33120 }, { "epoch": 0.06462211952362149, "grad_norm": 8.0, "learning_rate": 0.0004893890106574788, "loss": 17.8209, "step": 33140 }, { "epoch": 0.06466111899225373, "grad_norm": 7.4375, "learning_rate": 0.0004893825086265695, "loss": 17.8805, "step": 33160 }, { "epoch": 0.06470011846088597, "grad_norm": 7.65625, "learning_rate": 0.0004893760065956601, "loss": 17.9232, "step": 33180 }, { "epoch": 0.06473911792951821, "grad_norm": 8.125, "learning_rate": 0.0004893695045647508, "loss": 17.9263, "step": 33200 }, { "epoch": 0.06477811739815045, "grad_norm": 8.0625, "learning_rate": 0.0004893630025338414, "loss": 17.9114, "step": 33220 }, { "epoch": 0.0648171168667827, "grad_norm": 7.71875, "learning_rate": 0.0004893565005029321, "loss": 17.8717, "step": 33240 }, { "epoch": 0.06485611633541492, "grad_norm": 7.96875, "learning_rate": 0.0004893499984720228, "loss": 17.8552, "step": 33260 }, { "epoch": 0.06489511580404717, "grad_norm": 7.96875, "learning_rate": 0.0004893434964411134, "loss": 17.9458, "step": 33280 }, { "epoch": 0.06493411527267941, "grad_norm": 7.875, "learning_rate": 0.000489336994410204, "loss": 17.8794, "step": 33300 }, { "epoch": 0.06497311474131165, "grad_norm": 8.125, "learning_rate": 0.0004893304923792946, "loss": 17.9009, "step": 33320 }, { "epoch": 0.06501211420994389, "grad_norm": 8.0625, "learning_rate": 0.0004893239903483853, "loss": 17.8307, "step": 33340 }, { "epoch": 0.06505111367857613, "grad_norm": 8.375, "learning_rate": 0.0004893174883174759, "loss": 17.8965, "step": 33360 }, { "epoch": 0.06509011314720838, "grad_norm": 8.0625, "learning_rate": 0.0004893109862865666, "loss": 17.9195, "step": 33380 }, { "epoch": 0.0651291126158406, "grad_norm": 7.90625, "learning_rate": 0.0004893044842556573, "loss": 17.7803, "step": 33400 }, { "epoch": 0.06516811208447285, "grad_norm": 8.25, "learning_rate": 0.0004892979822247479, "loss": 17.8673, "step": 33420 }, { "epoch": 0.06520711155310509, "grad_norm": 7.71875, "learning_rate": 0.0004892914801938386, "loss": 17.8586, "step": 33440 }, { "epoch": 0.06524611102173733, "grad_norm": 8.75, "learning_rate": 0.0004892849781629292, "loss": 17.9052, "step": 33460 }, { "epoch": 0.06528511049036957, "grad_norm": 8.9375, "learning_rate": 0.0004892784761320199, "loss": 17.8737, "step": 33480 }, { "epoch": 0.06532410995900181, "grad_norm": 8.0625, "learning_rate": 0.0004892719741011105, "loss": 17.9304, "step": 33500 }, { "epoch": 0.06536310942763404, "grad_norm": 8.6875, "learning_rate": 0.0004892654720702011, "loss": 17.8793, "step": 33520 }, { "epoch": 0.06540210889626628, "grad_norm": 7.5625, "learning_rate": 0.0004892589700392917, "loss": 17.9025, "step": 33540 }, { "epoch": 0.06544110836489853, "grad_norm": 7.96875, "learning_rate": 0.0004892524680083824, "loss": 17.8407, "step": 33560 }, { "epoch": 0.06548010783353077, "grad_norm": 7.78125, "learning_rate": 0.0004892459659774731, "loss": 17.9782, "step": 33580 }, { "epoch": 0.06551910730216301, "grad_norm": 8.0, "learning_rate": 0.0004892394639465637, "loss": 17.9469, "step": 33600 }, { "epoch": 0.06555810677079525, "grad_norm": 7.21875, "learning_rate": 0.0004892329619156544, "loss": 17.9057, "step": 33620 }, { "epoch": 0.0655971062394275, "grad_norm": 8.375, "learning_rate": 0.000489226459884745, "loss": 17.8254, "step": 33640 }, { "epoch": 0.06563610570805972, "grad_norm": 8.4375, "learning_rate": 0.0004892199578538357, "loss": 17.8844, "step": 33660 }, { "epoch": 0.06567510517669196, "grad_norm": 7.53125, "learning_rate": 0.0004892134558229263, "loss": 17.7609, "step": 33680 }, { "epoch": 0.0657141046453242, "grad_norm": 8.25, "learning_rate": 0.000489206953792017, "loss": 17.7972, "step": 33700 }, { "epoch": 0.06575310411395645, "grad_norm": 8.125, "learning_rate": 0.0004892004517611077, "loss": 17.8673, "step": 33720 }, { "epoch": 0.06579210358258869, "grad_norm": 7.46875, "learning_rate": 0.0004891939497301983, "loss": 17.8256, "step": 33740 }, { "epoch": 0.06583110305122093, "grad_norm": 8.375, "learning_rate": 0.0004891874476992889, "loss": 17.8436, "step": 33760 }, { "epoch": 0.06587010251985316, "grad_norm": 8.0625, "learning_rate": 0.0004891809456683795, "loss": 17.8555, "step": 33780 }, { "epoch": 0.0659091019884854, "grad_norm": 8.25, "learning_rate": 0.0004891744436374702, "loss": 17.8169, "step": 33800 }, { "epoch": 0.06594810145711764, "grad_norm": 7.5, "learning_rate": 0.0004891679416065608, "loss": 17.8591, "step": 33820 }, { "epoch": 0.06598710092574989, "grad_norm": 8.1875, "learning_rate": 0.0004891614395756515, "loss": 17.8805, "step": 33840 }, { "epoch": 0.06602610039438213, "grad_norm": 8.1875, "learning_rate": 0.0004891549375447421, "loss": 17.822, "step": 33860 }, { "epoch": 0.06606509986301437, "grad_norm": 7.5, "learning_rate": 0.0004891484355138328, "loss": 17.7957, "step": 33880 }, { "epoch": 0.06610409933164661, "grad_norm": 7.84375, "learning_rate": 0.0004891419334829235, "loss": 17.7377, "step": 33900 }, { "epoch": 0.06614309880027884, "grad_norm": 7.625, "learning_rate": 0.000489135431452014, "loss": 17.8446, "step": 33920 }, { "epoch": 0.06618209826891108, "grad_norm": 8.1875, "learning_rate": 0.0004891289294211047, "loss": 17.8576, "step": 33940 }, { "epoch": 0.06622109773754332, "grad_norm": 7.59375, "learning_rate": 0.0004891224273901953, "loss": 17.8222, "step": 33960 }, { "epoch": 0.06626009720617557, "grad_norm": 8.375, "learning_rate": 0.000489115925359286, "loss": 17.8738, "step": 33980 }, { "epoch": 0.06629909667480781, "grad_norm": 9.5, "learning_rate": 0.0004891094233283766, "loss": 17.7685, "step": 34000 }, { "epoch": 0.06633809614344005, "grad_norm": 7.34375, "learning_rate": 0.0004891029212974673, "loss": 17.7993, "step": 34020 }, { "epoch": 0.06637709561207228, "grad_norm": 7.6875, "learning_rate": 0.000489096419266558, "loss": 17.8716, "step": 34040 }, { "epoch": 0.06641609508070452, "grad_norm": 7.625, "learning_rate": 0.0004890899172356486, "loss": 17.7815, "step": 34060 }, { "epoch": 0.06645509454933676, "grad_norm": 8.1875, "learning_rate": 0.0004890834152047392, "loss": 17.7868, "step": 34080 }, { "epoch": 0.066494094017969, "grad_norm": 7.40625, "learning_rate": 0.0004890769131738298, "loss": 17.8916, "step": 34100 }, { "epoch": 0.06653309348660125, "grad_norm": 7.4375, "learning_rate": 0.0004890704111429205, "loss": 17.8503, "step": 34120 }, { "epoch": 0.06657209295523349, "grad_norm": 7.6875, "learning_rate": 0.0004890639091120111, "loss": 17.9017, "step": 34140 }, { "epoch": 0.06661109242386573, "grad_norm": 7.15625, "learning_rate": 0.0004890574070811018, "loss": 17.82, "step": 34160 }, { "epoch": 0.06665009189249796, "grad_norm": 7.5, "learning_rate": 0.0004890509050501924, "loss": 17.8197, "step": 34180 }, { "epoch": 0.0666890913611302, "grad_norm": 7.1875, "learning_rate": 0.0004890444030192831, "loss": 17.8714, "step": 34200 }, { "epoch": 0.06672809082976244, "grad_norm": 7.34375, "learning_rate": 0.0004890379009883738, "loss": 17.7712, "step": 34220 }, { "epoch": 0.06676709029839469, "grad_norm": 7.78125, "learning_rate": 0.0004890313989574644, "loss": 17.8097, "step": 34240 }, { "epoch": 0.06680608976702693, "grad_norm": 7.84375, "learning_rate": 0.000489024896926555, "loss": 17.7745, "step": 34260 }, { "epoch": 0.06684508923565917, "grad_norm": 7.46875, "learning_rate": 0.0004890183948956456, "loss": 17.7995, "step": 34280 }, { "epoch": 0.0668840887042914, "grad_norm": 7.875, "learning_rate": 0.0004890118928647363, "loss": 17.78, "step": 34300 }, { "epoch": 0.06692308817292364, "grad_norm": 7.6875, "learning_rate": 0.0004890053908338269, "loss": 17.8159, "step": 34320 }, { "epoch": 0.06696208764155588, "grad_norm": 9.0625, "learning_rate": 0.0004889988888029176, "loss": 17.8342, "step": 34340 }, { "epoch": 0.06700108711018812, "grad_norm": 7.40625, "learning_rate": 0.0004889923867720082, "loss": 17.8283, "step": 34360 }, { "epoch": 0.06704008657882037, "grad_norm": 8.875, "learning_rate": 0.0004889858847410989, "loss": 17.8443, "step": 34380 }, { "epoch": 0.06707908604745261, "grad_norm": 7.90625, "learning_rate": 0.0004889793827101896, "loss": 17.8155, "step": 34400 }, { "epoch": 0.06711808551608485, "grad_norm": 7.875, "learning_rate": 0.0004889728806792802, "loss": 17.7699, "step": 34420 }, { "epoch": 0.06715708498471708, "grad_norm": 7.90625, "learning_rate": 0.0004889663786483709, "loss": 17.7513, "step": 34440 }, { "epoch": 0.06719608445334932, "grad_norm": 8.0, "learning_rate": 0.0004889598766174615, "loss": 17.8044, "step": 34460 }, { "epoch": 0.06723508392198156, "grad_norm": 8.125, "learning_rate": 0.0004889533745865522, "loss": 17.7959, "step": 34480 }, { "epoch": 0.0672740833906138, "grad_norm": 7.65625, "learning_rate": 0.0004889468725556427, "loss": 17.8336, "step": 34500 }, { "epoch": 0.06731308285924605, "grad_norm": 8.125, "learning_rate": 0.0004889403705247334, "loss": 17.7959, "step": 34520 }, { "epoch": 0.06735208232787829, "grad_norm": 7.625, "learning_rate": 0.000488933868493824, "loss": 17.8224, "step": 34540 }, { "epoch": 0.06739108179651052, "grad_norm": 7.34375, "learning_rate": 0.0004889273664629147, "loss": 17.7742, "step": 34560 }, { "epoch": 0.06743008126514276, "grad_norm": 7.75, "learning_rate": 0.0004889208644320054, "loss": 17.8346, "step": 34580 }, { "epoch": 0.067469080733775, "grad_norm": 8.25, "learning_rate": 0.000488914362401096, "loss": 17.8009, "step": 34600 }, { "epoch": 0.06750808020240724, "grad_norm": 7.5, "learning_rate": 0.0004889078603701867, "loss": 17.7425, "step": 34620 }, { "epoch": 0.06754707967103948, "grad_norm": 7.625, "learning_rate": 0.0004889013583392773, "loss": 17.8012, "step": 34640 }, { "epoch": 0.06758607913967173, "grad_norm": 8.0, "learning_rate": 0.000488894856308368, "loss": 17.7694, "step": 34660 }, { "epoch": 0.06762507860830397, "grad_norm": 8.3125, "learning_rate": 0.0004888883542774585, "loss": 17.7578, "step": 34680 }, { "epoch": 0.0676640780769362, "grad_norm": 8.0, "learning_rate": 0.0004888818522465492, "loss": 17.6994, "step": 34700 }, { "epoch": 0.06770307754556844, "grad_norm": 7.40625, "learning_rate": 0.0004888753502156399, "loss": 17.7348, "step": 34720 }, { "epoch": 0.06774207701420068, "grad_norm": 9.0, "learning_rate": 0.0004888688481847305, "loss": 17.7287, "step": 34740 }, { "epoch": 0.06778107648283292, "grad_norm": 7.25, "learning_rate": 0.0004888623461538212, "loss": 17.7332, "step": 34760 }, { "epoch": 0.06782007595146516, "grad_norm": 8.3125, "learning_rate": 0.0004888558441229118, "loss": 17.7715, "step": 34780 }, { "epoch": 0.0678590754200974, "grad_norm": 7.59375, "learning_rate": 0.0004888493420920025, "loss": 17.8135, "step": 34800 }, { "epoch": 0.06789807488872963, "grad_norm": 7.40625, "learning_rate": 0.0004888428400610931, "loss": 17.6929, "step": 34820 }, { "epoch": 0.06793707435736188, "grad_norm": 7.875, "learning_rate": 0.0004888363380301837, "loss": 17.8588, "step": 34840 }, { "epoch": 0.06797607382599412, "grad_norm": 7.71875, "learning_rate": 0.0004888298359992743, "loss": 17.7842, "step": 34860 }, { "epoch": 0.06801507329462636, "grad_norm": 7.4375, "learning_rate": 0.000488823333968365, "loss": 17.7183, "step": 34880 }, { "epoch": 0.0680540727632586, "grad_norm": 8.25, "learning_rate": 0.0004888168319374557, "loss": 17.7279, "step": 34900 }, { "epoch": 0.06809307223189084, "grad_norm": 8.0625, "learning_rate": 0.0004888103299065463, "loss": 17.8105, "step": 34920 }, { "epoch": 0.06813207170052309, "grad_norm": 8.1875, "learning_rate": 0.000488803827875637, "loss": 17.7081, "step": 34940 }, { "epoch": 0.06817107116915531, "grad_norm": 8.1875, "learning_rate": 0.0004887973258447276, "loss": 17.7425, "step": 34960 }, { "epoch": 0.06821007063778756, "grad_norm": 8.25, "learning_rate": 0.0004887908238138183, "loss": 17.7773, "step": 34980 }, { "epoch": 0.0682490701064198, "grad_norm": 8.0, "learning_rate": 0.0004887843217829088, "loss": 17.8163, "step": 35000 }, { "epoch": 0.06828806957505204, "grad_norm": 7.75, "learning_rate": 0.0004887778197519995, "loss": 17.6896, "step": 35020 }, { "epoch": 0.06832706904368428, "grad_norm": 8.1875, "learning_rate": 0.0004887713177210901, "loss": 17.7807, "step": 35040 }, { "epoch": 0.06836606851231652, "grad_norm": 7.75, "learning_rate": 0.0004887648156901808, "loss": 17.7322, "step": 35060 }, { "epoch": 0.06840506798094875, "grad_norm": 8.125, "learning_rate": 0.0004887583136592715, "loss": 17.7731, "step": 35080 }, { "epoch": 0.068444067449581, "grad_norm": 7.46875, "learning_rate": 0.0004887518116283621, "loss": 17.7156, "step": 35100 }, { "epoch": 0.06848306691821324, "grad_norm": 7.9375, "learning_rate": 0.0004887453095974528, "loss": 17.7608, "step": 35120 }, { "epoch": 0.06852206638684548, "grad_norm": 8.0, "learning_rate": 0.0004887388075665434, "loss": 17.7159, "step": 35140 }, { "epoch": 0.06856106585547772, "grad_norm": 8.375, "learning_rate": 0.0004887323055356341, "loss": 17.8228, "step": 35160 }, { "epoch": 0.06860006532410996, "grad_norm": 8.0625, "learning_rate": 0.0004887258035047247, "loss": 17.6853, "step": 35180 }, { "epoch": 0.0686390647927422, "grad_norm": 7.65625, "learning_rate": 0.0004887193014738154, "loss": 17.7202, "step": 35200 }, { "epoch": 0.06867806426137443, "grad_norm": 7.40625, "learning_rate": 0.000488712799442906, "loss": 17.7481, "step": 35220 }, { "epoch": 0.06871706373000668, "grad_norm": 8.125, "learning_rate": 0.0004887062974119966, "loss": 17.6651, "step": 35240 }, { "epoch": 0.06875606319863892, "grad_norm": 9.6875, "learning_rate": 0.0004886997953810873, "loss": 17.7758, "step": 35260 }, { "epoch": 0.06879506266727116, "grad_norm": 8.25, "learning_rate": 0.0004886932933501779, "loss": 17.6725, "step": 35280 }, { "epoch": 0.0688340621359034, "grad_norm": 8.6875, "learning_rate": 0.0004886867913192686, "loss": 17.7423, "step": 35300 }, { "epoch": 0.06887306160453564, "grad_norm": 7.4375, "learning_rate": 0.0004886802892883592, "loss": 17.7326, "step": 35320 }, { "epoch": 0.06891206107316787, "grad_norm": 7.65625, "learning_rate": 0.0004886737872574499, "loss": 17.7302, "step": 35340 }, { "epoch": 0.06895106054180011, "grad_norm": 8.125, "learning_rate": 0.0004886672852265405, "loss": 17.7565, "step": 35360 }, { "epoch": 0.06899006001043236, "grad_norm": 7.59375, "learning_rate": 0.0004886607831956312, "loss": 17.6745, "step": 35380 }, { "epoch": 0.0690290594790646, "grad_norm": 7.625, "learning_rate": 0.0004886542811647219, "loss": 17.7291, "step": 35400 }, { "epoch": 0.06906805894769684, "grad_norm": 8.1875, "learning_rate": 0.0004886477791338125, "loss": 17.7244, "step": 35420 }, { "epoch": 0.06910705841632908, "grad_norm": 7.84375, "learning_rate": 0.0004886412771029032, "loss": 17.7304, "step": 35440 }, { "epoch": 0.06914605788496132, "grad_norm": 8.25, "learning_rate": 0.0004886347750719937, "loss": 17.7558, "step": 35460 }, { "epoch": 0.06918505735359355, "grad_norm": 8.125, "learning_rate": 0.0004886282730410844, "loss": 17.7239, "step": 35480 }, { "epoch": 0.0692240568222258, "grad_norm": 7.625, "learning_rate": 0.000488621771010175, "loss": 17.7318, "step": 35500 }, { "epoch": 0.06926305629085804, "grad_norm": 8.5, "learning_rate": 0.0004886152689792657, "loss": 17.7314, "step": 35520 }, { "epoch": 0.06930205575949028, "grad_norm": 7.71875, "learning_rate": 0.0004886087669483564, "loss": 17.7347, "step": 35540 }, { "epoch": 0.06934105522812252, "grad_norm": 7.78125, "learning_rate": 0.000488602264917447, "loss": 17.7191, "step": 35560 }, { "epoch": 0.06938005469675476, "grad_norm": 7.4375, "learning_rate": 0.0004885957628865377, "loss": 17.7288, "step": 35580 }, { "epoch": 0.06941905416538699, "grad_norm": 7.46875, "learning_rate": 0.0004885892608556283, "loss": 17.7054, "step": 35600 }, { "epoch": 0.06945805363401923, "grad_norm": 7.375, "learning_rate": 0.0004885827588247189, "loss": 17.6815, "step": 35620 }, { "epoch": 0.06949705310265147, "grad_norm": 8.125, "learning_rate": 0.0004885762567938095, "loss": 17.7304, "step": 35640 }, { "epoch": 0.06953605257128372, "grad_norm": 8.4375, "learning_rate": 0.0004885697547629002, "loss": 17.664, "step": 35660 }, { "epoch": 0.06957505203991596, "grad_norm": 8.0625, "learning_rate": 0.0004885632527319908, "loss": 17.7926, "step": 35680 }, { "epoch": 0.0696140515085482, "grad_norm": 7.3125, "learning_rate": 0.0004885567507010815, "loss": 17.6294, "step": 35700 }, { "epoch": 0.06965305097718044, "grad_norm": 6.625, "learning_rate": 0.0004885502486701722, "loss": 17.6792, "step": 35720 }, { "epoch": 0.06969205044581267, "grad_norm": 7.78125, "learning_rate": 0.0004885437466392628, "loss": 17.773, "step": 35740 }, { "epoch": 0.06973104991444491, "grad_norm": 8.0, "learning_rate": 0.0004885372446083534, "loss": 17.715, "step": 35760 }, { "epoch": 0.06977004938307715, "grad_norm": 7.875, "learning_rate": 0.000488530742577444, "loss": 17.6879, "step": 35780 }, { "epoch": 0.0698090488517094, "grad_norm": 7.5625, "learning_rate": 0.0004885242405465347, "loss": 17.621, "step": 35800 }, { "epoch": 0.06984804832034164, "grad_norm": 8.1875, "learning_rate": 0.0004885177385156253, "loss": 17.7817, "step": 35820 }, { "epoch": 0.06988704778897388, "grad_norm": 8.375, "learning_rate": 0.000488511236484716, "loss": 17.5651, "step": 35840 }, { "epoch": 0.06992604725760612, "grad_norm": 8.625, "learning_rate": 0.0004885047344538066, "loss": 17.718, "step": 35860 }, { "epoch": 0.06996504672623835, "grad_norm": 7.6875, "learning_rate": 0.0004884982324228973, "loss": 17.7255, "step": 35880 }, { "epoch": 0.07000404619487059, "grad_norm": 7.84375, "learning_rate": 0.000488491730391988, "loss": 17.7409, "step": 35900 }, { "epoch": 0.07004304566350283, "grad_norm": 7.71875, "learning_rate": 0.0004884852283610786, "loss": 17.667, "step": 35920 }, { "epoch": 0.07008204513213508, "grad_norm": 7.625, "learning_rate": 0.0004884787263301693, "loss": 17.6612, "step": 35940 }, { "epoch": 0.07012104460076732, "grad_norm": 8.0625, "learning_rate": 0.0004884722242992598, "loss": 17.5958, "step": 35960 }, { "epoch": 0.07016004406939956, "grad_norm": 7.59375, "learning_rate": 0.0004884657222683505, "loss": 17.6831, "step": 35980 }, { "epoch": 0.07019904353803179, "grad_norm": 7.5625, "learning_rate": 0.0004884592202374411, "loss": 17.6976, "step": 36000 }, { "epoch": 0.07023804300666403, "grad_norm": 7.25, "learning_rate": 0.0004884527182065318, "loss": 17.6678, "step": 36020 }, { "epoch": 0.07027704247529627, "grad_norm": 8.375, "learning_rate": 0.0004884462161756225, "loss": 17.666, "step": 36040 }, { "epoch": 0.07031604194392851, "grad_norm": 8.875, "learning_rate": 0.0004884397141447131, "loss": 17.6397, "step": 36060 }, { "epoch": 0.07035504141256076, "grad_norm": 7.46875, "learning_rate": 0.0004884332121138038, "loss": 17.7174, "step": 36080 }, { "epoch": 0.070394040881193, "grad_norm": 8.0625, "learning_rate": 0.0004884267100828944, "loss": 17.6466, "step": 36100 }, { "epoch": 0.07043304034982524, "grad_norm": 7.8125, "learning_rate": 0.0004884202080519851, "loss": 17.7442, "step": 36120 }, { "epoch": 0.07047203981845747, "grad_norm": 7.5625, "learning_rate": 0.0004884137060210757, "loss": 17.6943, "step": 36140 }, { "epoch": 0.07051103928708971, "grad_norm": 7.34375, "learning_rate": 0.0004884072039901664, "loss": 17.7069, "step": 36160 }, { "epoch": 0.07055003875572195, "grad_norm": 6.78125, "learning_rate": 0.000488400701959257, "loss": 17.6392, "step": 36180 }, { "epoch": 0.0705890382243542, "grad_norm": 8.8125, "learning_rate": 0.0004883941999283476, "loss": 17.6976, "step": 36200 }, { "epoch": 0.07062803769298644, "grad_norm": 8.8125, "learning_rate": 0.0004883876978974383, "loss": 17.623, "step": 36220 }, { "epoch": 0.07066703716161868, "grad_norm": 8.0625, "learning_rate": 0.0004883811958665289, "loss": 17.7023, "step": 36240 }, { "epoch": 0.0707060366302509, "grad_norm": 8.1875, "learning_rate": 0.0004883746938356196, "loss": 17.683, "step": 36260 }, { "epoch": 0.07074503609888315, "grad_norm": 7.40625, "learning_rate": 0.0004883681918047102, "loss": 17.6645, "step": 36280 }, { "epoch": 0.07078403556751539, "grad_norm": 8.0625, "learning_rate": 0.0004883616897738009, "loss": 17.6293, "step": 36300 }, { "epoch": 0.07082303503614763, "grad_norm": 8.125, "learning_rate": 0.0004883551877428915, "loss": 17.6124, "step": 36320 }, { "epoch": 0.07086203450477987, "grad_norm": 8.0625, "learning_rate": 0.0004883486857119822, "loss": 17.6903, "step": 36340 }, { "epoch": 0.07090103397341212, "grad_norm": 7.71875, "learning_rate": 0.0004883421836810729, "loss": 17.5895, "step": 36360 }, { "epoch": 0.07094003344204436, "grad_norm": 8.375, "learning_rate": 0.0004883356816501634, "loss": 17.635, "step": 36380 }, { "epoch": 0.07097903291067659, "grad_norm": 7.59375, "learning_rate": 0.0004883291796192541, "loss": 17.6157, "step": 36400 }, { "epoch": 0.07101803237930883, "grad_norm": 7.875, "learning_rate": 0.0004883226775883447, "loss": 17.6606, "step": 36420 }, { "epoch": 0.07105703184794107, "grad_norm": 6.8125, "learning_rate": 0.0004883161755574354, "loss": 17.6764, "step": 36440 }, { "epoch": 0.07109603131657331, "grad_norm": 7.875, "learning_rate": 0.000488309673526526, "loss": 17.6128, "step": 36460 }, { "epoch": 0.07113503078520556, "grad_norm": 7.53125, "learning_rate": 0.0004883031714956167, "loss": 17.6536, "step": 36480 }, { "epoch": 0.0711740302538378, "grad_norm": 7.90625, "learning_rate": 0.0004882966694647073, "loss": 17.5757, "step": 36500 }, { "epoch": 0.07121302972247003, "grad_norm": 7.5, "learning_rate": 0.000488290167433798, "loss": 17.6263, "step": 36520 }, { "epoch": 0.07125202919110227, "grad_norm": 7.875, "learning_rate": 0.0004882836654028886, "loss": 17.5946, "step": 36540 }, { "epoch": 0.07129102865973451, "grad_norm": 7.3125, "learning_rate": 0.00048827716337197926, "loss": 17.6368, "step": 36560 }, { "epoch": 0.07133002812836675, "grad_norm": 7.4375, "learning_rate": 0.0004882706613410699, "loss": 17.6413, "step": 36580 }, { "epoch": 0.071369027596999, "grad_norm": 7.15625, "learning_rate": 0.0004882641593101606, "loss": 17.6304, "step": 36600 }, { "epoch": 0.07140802706563124, "grad_norm": 8.0625, "learning_rate": 0.0004882576572792512, "loss": 17.6497, "step": 36620 }, { "epoch": 0.07144702653426348, "grad_norm": 8.0625, "learning_rate": 0.00048825115524834184, "loss": 17.6365, "step": 36640 }, { "epoch": 0.0714860260028957, "grad_norm": 7.53125, "learning_rate": 0.0004882446532174325, "loss": 17.6154, "step": 36660 }, { "epoch": 0.07152502547152795, "grad_norm": 7.125, "learning_rate": 0.0004882381511865231, "loss": 17.6131, "step": 36680 }, { "epoch": 0.07156402494016019, "grad_norm": 7.4375, "learning_rate": 0.00048823164915561375, "loss": 17.6818, "step": 36700 }, { "epoch": 0.07160302440879243, "grad_norm": 7.6875, "learning_rate": 0.0004882251471247044, "loss": 17.6542, "step": 36720 }, { "epoch": 0.07164202387742467, "grad_norm": 7.6875, "learning_rate": 0.00048821864509379507, "loss": 17.6518, "step": 36740 }, { "epoch": 0.07168102334605692, "grad_norm": 7.875, "learning_rate": 0.00048821214306288567, "loss": 17.628, "step": 36760 }, { "epoch": 0.07172002281468914, "grad_norm": 6.96875, "learning_rate": 0.0004882056410319763, "loss": 17.6274, "step": 36780 }, { "epoch": 0.07175902228332139, "grad_norm": 7.75, "learning_rate": 0.000488199139001067, "loss": 17.6418, "step": 36800 }, { "epoch": 0.07179802175195363, "grad_norm": 7.5625, "learning_rate": 0.00048819263697015764, "loss": 17.6341, "step": 36820 }, { "epoch": 0.07183702122058587, "grad_norm": 8.4375, "learning_rate": 0.0004881861349392483, "loss": 17.632, "step": 36840 }, { "epoch": 0.07187602068921811, "grad_norm": 8.3125, "learning_rate": 0.00048817963290833895, "loss": 17.6495, "step": 36860 }, { "epoch": 0.07191502015785035, "grad_norm": 7.5625, "learning_rate": 0.0004881731308774296, "loss": 17.6071, "step": 36880 }, { "epoch": 0.0719540196264826, "grad_norm": 7.40625, "learning_rate": 0.00048816662884652027, "loss": 17.6029, "step": 36900 }, { "epoch": 0.07199301909511482, "grad_norm": 7.5, "learning_rate": 0.0004881601268156108, "loss": 17.6303, "step": 36920 }, { "epoch": 0.07203201856374707, "grad_norm": 7.59375, "learning_rate": 0.0004881536247847015, "loss": 17.6263, "step": 36940 }, { "epoch": 0.07207101803237931, "grad_norm": 8.375, "learning_rate": 0.00048814712275379213, "loss": 17.6397, "step": 36960 }, { "epoch": 0.07211001750101155, "grad_norm": 7.78125, "learning_rate": 0.0004881406207228828, "loss": 17.5967, "step": 36980 }, { "epoch": 0.07214901696964379, "grad_norm": 7.71875, "learning_rate": 0.00048813411869197344, "loss": 17.6339, "step": 37000 }, { "epoch": 0.07218801643827603, "grad_norm": 6.90625, "learning_rate": 0.0004881276166610641, "loss": 17.5884, "step": 37020 }, { "epoch": 0.07222701590690826, "grad_norm": 8.75, "learning_rate": 0.00048812111463015476, "loss": 17.5697, "step": 37040 }, { "epoch": 0.0722660153755405, "grad_norm": 7.09375, "learning_rate": 0.0004881146125992454, "loss": 17.5832, "step": 37060 }, { "epoch": 0.07230501484417275, "grad_norm": 6.65625, "learning_rate": 0.000488108110568336, "loss": 17.6558, "step": 37080 }, { "epoch": 0.07234401431280499, "grad_norm": 7.5, "learning_rate": 0.0004881016085374267, "loss": 17.5931, "step": 37100 }, { "epoch": 0.07238301378143723, "grad_norm": 9.375, "learning_rate": 0.00048809510650651733, "loss": 17.5871, "step": 37120 }, { "epoch": 0.07242201325006947, "grad_norm": 7.28125, "learning_rate": 0.000488088604475608, "loss": 17.5591, "step": 37140 }, { "epoch": 0.07246101271870171, "grad_norm": 7.9375, "learning_rate": 0.00048808210244469865, "loss": 17.5695, "step": 37160 }, { "epoch": 0.07250001218733394, "grad_norm": 8.625, "learning_rate": 0.00048807560041378925, "loss": 17.5908, "step": 37180 }, { "epoch": 0.07253901165596618, "grad_norm": 8.1875, "learning_rate": 0.0004880690983828799, "loss": 17.5873, "step": 37200 }, { "epoch": 0.07257801112459843, "grad_norm": 8.75, "learning_rate": 0.0004880625963519705, "loss": 17.5924, "step": 37220 }, { "epoch": 0.07261701059323067, "grad_norm": 6.9375, "learning_rate": 0.00048805609432106117, "loss": 17.5926, "step": 37240 }, { "epoch": 0.07265601006186291, "grad_norm": 7.875, "learning_rate": 0.0004880495922901518, "loss": 17.6146, "step": 37260 }, { "epoch": 0.07269500953049515, "grad_norm": 8.0, "learning_rate": 0.0004880430902592425, "loss": 17.6691, "step": 37280 }, { "epoch": 0.07273400899912738, "grad_norm": 7.59375, "learning_rate": 0.00048803658822833314, "loss": 17.5355, "step": 37300 }, { "epoch": 0.07277300846775962, "grad_norm": 8.75, "learning_rate": 0.0004880300861974238, "loss": 17.5701, "step": 37320 }, { "epoch": 0.07281200793639186, "grad_norm": 8.3125, "learning_rate": 0.00048802358416651445, "loss": 17.6153, "step": 37340 }, { "epoch": 0.0728510074050241, "grad_norm": 8.375, "learning_rate": 0.0004880170821356051, "loss": 17.5791, "step": 37360 }, { "epoch": 0.07289000687365635, "grad_norm": 7.65625, "learning_rate": 0.0004880105801046957, "loss": 17.5809, "step": 37380 }, { "epoch": 0.07292900634228859, "grad_norm": 8.375, "learning_rate": 0.00048800407807378637, "loss": 17.5233, "step": 37400 }, { "epoch": 0.07296800581092083, "grad_norm": 7.65625, "learning_rate": 0.00048799757604287697, "loss": 17.6682, "step": 37420 }, { "epoch": 0.07300700527955306, "grad_norm": 7.9375, "learning_rate": 0.0004879910740119676, "loss": 17.5864, "step": 37440 }, { "epoch": 0.0730460047481853, "grad_norm": 7.875, "learning_rate": 0.0004879845719810583, "loss": 17.5676, "step": 37460 }, { "epoch": 0.07308500421681755, "grad_norm": 7.09375, "learning_rate": 0.00048797806995014894, "loss": 17.5966, "step": 37480 }, { "epoch": 0.07312400368544979, "grad_norm": 6.78125, "learning_rate": 0.0004879715679192396, "loss": 17.6169, "step": 37500 }, { "epoch": 0.07316300315408203, "grad_norm": 7.125, "learning_rate": 0.00048796506588833025, "loss": 17.613, "step": 37520 }, { "epoch": 0.07320200262271427, "grad_norm": 7.34375, "learning_rate": 0.00048795856385742086, "loss": 17.5937, "step": 37540 }, { "epoch": 0.0732410020913465, "grad_norm": 7.6875, "learning_rate": 0.0004879520618265115, "loss": 17.5639, "step": 37560 }, { "epoch": 0.07328000155997874, "grad_norm": 7.375, "learning_rate": 0.00048794555979560217, "loss": 17.6464, "step": 37580 }, { "epoch": 0.07331900102861098, "grad_norm": 8.3125, "learning_rate": 0.00048793905776469283, "loss": 17.5679, "step": 37600 }, { "epoch": 0.07335800049724323, "grad_norm": 7.78125, "learning_rate": 0.0004879325557337835, "loss": 17.529, "step": 37620 }, { "epoch": 0.07339699996587547, "grad_norm": 8.0, "learning_rate": 0.00048792605370287414, "loss": 17.5366, "step": 37640 }, { "epoch": 0.07343599943450771, "grad_norm": 7.375, "learning_rate": 0.00048791955167196474, "loss": 17.6042, "step": 37660 }, { "epoch": 0.07347499890313995, "grad_norm": 7.59375, "learning_rate": 0.00048791304964105535, "loss": 17.5386, "step": 37680 }, { "epoch": 0.07351399837177218, "grad_norm": 8.25, "learning_rate": 0.000487906547610146, "loss": 17.6019, "step": 37700 }, { "epoch": 0.07355299784040442, "grad_norm": 7.59375, "learning_rate": 0.00048790004557923666, "loss": 17.5235, "step": 37720 }, { "epoch": 0.07359199730903666, "grad_norm": 8.125, "learning_rate": 0.0004878935435483273, "loss": 17.5871, "step": 37740 }, { "epoch": 0.0736309967776689, "grad_norm": 8.4375, "learning_rate": 0.000487887041517418, "loss": 17.5027, "step": 37760 }, { "epoch": 0.07366999624630115, "grad_norm": 7.1875, "learning_rate": 0.00048788053948650863, "loss": 17.5144, "step": 37780 }, { "epoch": 0.07370899571493339, "grad_norm": 7.34375, "learning_rate": 0.0004878740374555993, "loss": 17.607, "step": 37800 }, { "epoch": 0.07374799518356562, "grad_norm": 7.46875, "learning_rate": 0.00048786753542468995, "loss": 17.5635, "step": 37820 }, { "epoch": 0.07378699465219786, "grad_norm": 7.4375, "learning_rate": 0.00048786103339378055, "loss": 17.5481, "step": 37840 }, { "epoch": 0.0738259941208301, "grad_norm": 7.8125, "learning_rate": 0.0004878545313628712, "loss": 17.4915, "step": 37860 }, { "epoch": 0.07386499358946234, "grad_norm": 7.40625, "learning_rate": 0.00048784802933196186, "loss": 17.5403, "step": 37880 }, { "epoch": 0.07390399305809459, "grad_norm": 8.125, "learning_rate": 0.00048784152730105247, "loss": 17.4857, "step": 37900 }, { "epoch": 0.07394299252672683, "grad_norm": 7.375, "learning_rate": 0.0004878350252701431, "loss": 17.5252, "step": 37920 }, { "epoch": 0.07398199199535907, "grad_norm": 6.875, "learning_rate": 0.0004878285232392338, "loss": 17.5321, "step": 37940 }, { "epoch": 0.0740209914639913, "grad_norm": 7.875, "learning_rate": 0.00048782202120832444, "loss": 17.5051, "step": 37960 }, { "epoch": 0.07405999093262354, "grad_norm": 7.78125, "learning_rate": 0.0004878155191774151, "loss": 17.5022, "step": 37980 }, { "epoch": 0.07409899040125578, "grad_norm": 7.03125, "learning_rate": 0.0004878090171465057, "loss": 17.5364, "step": 38000 }, { "epoch": 0.07413798986988802, "grad_norm": 7.09375, "learning_rate": 0.00048780251511559635, "loss": 17.5594, "step": 38020 }, { "epoch": 0.07417698933852027, "grad_norm": 6.6875, "learning_rate": 0.000487796013084687, "loss": 17.5479, "step": 38040 }, { "epoch": 0.07421598880715251, "grad_norm": 8.0625, "learning_rate": 0.00048778951105377767, "loss": 17.5123, "step": 38060 }, { "epoch": 0.07425498827578474, "grad_norm": 8.3125, "learning_rate": 0.0004877830090228683, "loss": 17.5579, "step": 38080 }, { "epoch": 0.07429398774441698, "grad_norm": 7.9375, "learning_rate": 0.000487776506991959, "loss": 17.5385, "step": 38100 }, { "epoch": 0.07433298721304922, "grad_norm": 7.25, "learning_rate": 0.00048777000496104964, "loss": 17.5784, "step": 38120 }, { "epoch": 0.07437198668168146, "grad_norm": 7.46875, "learning_rate": 0.0004877635029301402, "loss": 17.5357, "step": 38140 }, { "epoch": 0.0744109861503137, "grad_norm": 8.1875, "learning_rate": 0.00048775700089923084, "loss": 17.5541, "step": 38160 }, { "epoch": 0.07444998561894595, "grad_norm": 8.25, "learning_rate": 0.0004877504988683215, "loss": 17.5268, "step": 38180 }, { "epoch": 0.07448898508757819, "grad_norm": 7.4375, "learning_rate": 0.00048774399683741216, "loss": 17.5801, "step": 38200 }, { "epoch": 0.07452798455621042, "grad_norm": 9.6875, "learning_rate": 0.0004877374948065028, "loss": 17.5608, "step": 38220 }, { "epoch": 0.07456698402484266, "grad_norm": 7.71875, "learning_rate": 0.00048773099277559347, "loss": 17.5161, "step": 38240 }, { "epoch": 0.0746059834934749, "grad_norm": 7.8125, "learning_rate": 0.00048772449074468413, "loss": 17.5025, "step": 38260 }, { "epoch": 0.07464498296210714, "grad_norm": 8.125, "learning_rate": 0.0004877179887137748, "loss": 17.5424, "step": 38280 }, { "epoch": 0.07468398243073938, "grad_norm": 7.75, "learning_rate": 0.0004877114866828654, "loss": 17.5151, "step": 38300 }, { "epoch": 0.07472298189937163, "grad_norm": 8.1875, "learning_rate": 0.00048770498465195604, "loss": 17.5797, "step": 38320 }, { "epoch": 0.07476198136800385, "grad_norm": 8.0625, "learning_rate": 0.0004876984826210467, "loss": 17.5485, "step": 38340 }, { "epoch": 0.0748009808366361, "grad_norm": 7.5625, "learning_rate": 0.00048769198059013736, "loss": 17.5852, "step": 38360 }, { "epoch": 0.07483998030526834, "grad_norm": 6.875, "learning_rate": 0.00048768547855922796, "loss": 17.4899, "step": 38380 }, { "epoch": 0.07487897977390058, "grad_norm": 7.375, "learning_rate": 0.0004876789765283186, "loss": 17.4866, "step": 38400 }, { "epoch": 0.07491797924253282, "grad_norm": 7.78125, "learning_rate": 0.0004876724744974093, "loss": 17.5962, "step": 38420 }, { "epoch": 0.07495697871116506, "grad_norm": 6.84375, "learning_rate": 0.00048766597246649993, "loss": 17.5256, "step": 38440 }, { "epoch": 0.0749959781797973, "grad_norm": 8.1875, "learning_rate": 0.00048765947043559054, "loss": 17.4639, "step": 38460 }, { "epoch": 0.07503497764842953, "grad_norm": 7.625, "learning_rate": 0.0004876529684046812, "loss": 17.491, "step": 38480 }, { "epoch": 0.07507397711706178, "grad_norm": 7.53125, "learning_rate": 0.00048764646637377185, "loss": 17.5147, "step": 38500 }, { "epoch": 0.07511297658569402, "grad_norm": 7.65625, "learning_rate": 0.0004876399643428625, "loss": 17.5155, "step": 38520 }, { "epoch": 0.07515197605432626, "grad_norm": 7.5, "learning_rate": 0.00048763346231195316, "loss": 17.5834, "step": 38540 }, { "epoch": 0.0751909755229585, "grad_norm": 7.90625, "learning_rate": 0.0004876269602810438, "loss": 17.4959, "step": 38560 }, { "epoch": 0.07522997499159075, "grad_norm": 7.65625, "learning_rate": 0.0004876204582501345, "loss": 17.4307, "step": 38580 }, { "epoch": 0.07526897446022297, "grad_norm": 7.5, "learning_rate": 0.00048761395621922513, "loss": 17.4006, "step": 38600 }, { "epoch": 0.07530797392885522, "grad_norm": 8.0, "learning_rate": 0.00048760745418831574, "loss": 17.5772, "step": 38620 }, { "epoch": 0.07534697339748746, "grad_norm": 7.90625, "learning_rate": 0.00048760095215740634, "loss": 17.5366, "step": 38640 }, { "epoch": 0.0753859728661197, "grad_norm": 7.96875, "learning_rate": 0.000487594450126497, "loss": 17.5255, "step": 38660 }, { "epoch": 0.07542497233475194, "grad_norm": 6.4375, "learning_rate": 0.00048758794809558765, "loss": 17.4143, "step": 38680 }, { "epoch": 0.07546397180338418, "grad_norm": 7.34375, "learning_rate": 0.0004875814460646783, "loss": 17.5458, "step": 38700 }, { "epoch": 0.07550297127201643, "grad_norm": 6.6875, "learning_rate": 0.00048757494403376897, "loss": 17.4743, "step": 38720 }, { "epoch": 0.07554197074064865, "grad_norm": 7.53125, "learning_rate": 0.0004875684420028596, "loss": 17.5268, "step": 38740 }, { "epoch": 0.0755809702092809, "grad_norm": 7.84375, "learning_rate": 0.0004875619399719502, "loss": 17.4896, "step": 38760 }, { "epoch": 0.07561996967791314, "grad_norm": 7.84375, "learning_rate": 0.0004875554379410409, "loss": 17.524, "step": 38780 }, { "epoch": 0.07565896914654538, "grad_norm": 7.0, "learning_rate": 0.00048754893591013154, "loss": 17.4771, "step": 38800 }, { "epoch": 0.07569796861517762, "grad_norm": 7.625, "learning_rate": 0.0004875424338792222, "loss": 17.4548, "step": 38820 }, { "epoch": 0.07573696808380986, "grad_norm": 6.96875, "learning_rate": 0.00048753593184831285, "loss": 17.4071, "step": 38840 }, { "epoch": 0.07577596755244209, "grad_norm": 9.0, "learning_rate": 0.0004875294298174035, "loss": 17.4683, "step": 38860 }, { "epoch": 0.07581496702107433, "grad_norm": 7.21875, "learning_rate": 0.0004875229277864941, "loss": 17.5265, "step": 38880 }, { "epoch": 0.07585396648970658, "grad_norm": 8.0, "learning_rate": 0.00048751642575558477, "loss": 17.4815, "step": 38900 }, { "epoch": 0.07589296595833882, "grad_norm": 7.1875, "learning_rate": 0.0004875099237246754, "loss": 17.5534, "step": 38920 }, { "epoch": 0.07593196542697106, "grad_norm": 7.09375, "learning_rate": 0.00048750342169376603, "loss": 17.4998, "step": 38940 }, { "epoch": 0.0759709648956033, "grad_norm": 7.9375, "learning_rate": 0.0004874969196628567, "loss": 17.5084, "step": 38960 }, { "epoch": 0.07600996436423554, "grad_norm": 7.9375, "learning_rate": 0.00048749041763194735, "loss": 17.4243, "step": 38980 }, { "epoch": 0.07604896383286777, "grad_norm": 7.3125, "learning_rate": 0.000487483915601038, "loss": 17.4458, "step": 39000 }, { "epoch": 0.07608796330150001, "grad_norm": 7.75, "learning_rate": 0.00048747741357012866, "loss": 17.4376, "step": 39020 }, { "epoch": 0.07612696277013226, "grad_norm": 6.875, "learning_rate": 0.0004874709115392193, "loss": 17.5146, "step": 39040 }, { "epoch": 0.0761659622387645, "grad_norm": 7.34375, "learning_rate": 0.00048746440950831, "loss": 17.4769, "step": 39060 }, { "epoch": 0.07620496170739674, "grad_norm": 7.4375, "learning_rate": 0.0004874579074774006, "loss": 17.4756, "step": 39080 }, { "epoch": 0.07624396117602898, "grad_norm": 7.875, "learning_rate": 0.00048745140544649123, "loss": 17.4909, "step": 39100 }, { "epoch": 0.07628296064466121, "grad_norm": 7.40625, "learning_rate": 0.00048744490341558184, "loss": 17.44, "step": 39120 }, { "epoch": 0.07632196011329345, "grad_norm": 7.65625, "learning_rate": 0.0004874384013846725, "loss": 17.4913, "step": 39140 }, { "epoch": 0.0763609595819257, "grad_norm": 7.375, "learning_rate": 0.00048743189935376315, "loss": 17.4587, "step": 39160 }, { "epoch": 0.07639995905055794, "grad_norm": 8.0625, "learning_rate": 0.0004874253973228538, "loss": 17.4052, "step": 39180 }, { "epoch": 0.07643895851919018, "grad_norm": 7.4375, "learning_rate": 0.00048741889529194446, "loss": 17.5159, "step": 39200 }, { "epoch": 0.07647795798782242, "grad_norm": 7.59375, "learning_rate": 0.00048741239326103507, "loss": 17.3985, "step": 39220 }, { "epoch": 0.07651695745645466, "grad_norm": 8.0, "learning_rate": 0.0004874058912301257, "loss": 17.4473, "step": 39240 }, { "epoch": 0.07655595692508689, "grad_norm": 7.5, "learning_rate": 0.0004873993891992164, "loss": 17.543, "step": 39260 }, { "epoch": 0.07659495639371913, "grad_norm": 8.3125, "learning_rate": 0.00048739288716830704, "loss": 17.467, "step": 39280 }, { "epoch": 0.07663395586235137, "grad_norm": 7.40625, "learning_rate": 0.0004873863851373977, "loss": 17.3856, "step": 39300 }, { "epoch": 0.07667295533098362, "grad_norm": 7.34375, "learning_rate": 0.00048737988310648835, "loss": 17.5054, "step": 39320 }, { "epoch": 0.07671195479961586, "grad_norm": 8.5, "learning_rate": 0.000487373381075579, "loss": 17.4604, "step": 39340 }, { "epoch": 0.0767509542682481, "grad_norm": 7.5625, "learning_rate": 0.0004873668790446696, "loss": 17.418, "step": 39360 }, { "epoch": 0.07678995373688033, "grad_norm": 7.0625, "learning_rate": 0.0004873603770137602, "loss": 17.4087, "step": 39380 }, { "epoch": 0.07682895320551257, "grad_norm": 7.65625, "learning_rate": 0.00048735387498285087, "loss": 17.3862, "step": 39400 }, { "epoch": 0.07686795267414481, "grad_norm": 7.84375, "learning_rate": 0.00048734737295194153, "loss": 17.4867, "step": 39420 }, { "epoch": 0.07690695214277705, "grad_norm": 8.4375, "learning_rate": 0.0004873408709210322, "loss": 17.4322, "step": 39440 }, { "epoch": 0.0769459516114093, "grad_norm": 8.1875, "learning_rate": 0.00048733436889012284, "loss": 17.4459, "step": 39460 }, { "epoch": 0.07698495108004154, "grad_norm": 7.5625, "learning_rate": 0.0004873278668592135, "loss": 17.4287, "step": 39480 }, { "epoch": 0.07702395054867378, "grad_norm": 7.34375, "learning_rate": 0.00048732136482830416, "loss": 17.4515, "step": 39500 }, { "epoch": 0.07706295001730601, "grad_norm": 7.34375, "learning_rate": 0.0004873148627973948, "loss": 17.4768, "step": 39520 }, { "epoch": 0.07710194948593825, "grad_norm": 8.875, "learning_rate": 0.0004873083607664854, "loss": 17.4483, "step": 39540 }, { "epoch": 0.07714094895457049, "grad_norm": 7.4375, "learning_rate": 0.00048730185873557607, "loss": 17.4138, "step": 39560 }, { "epoch": 0.07717994842320273, "grad_norm": 7.34375, "learning_rate": 0.00048729535670466673, "loss": 17.4202, "step": 39580 }, { "epoch": 0.07721894789183498, "grad_norm": 7.59375, "learning_rate": 0.00048728885467375733, "loss": 17.4368, "step": 39600 }, { "epoch": 0.07725794736046722, "grad_norm": 7.625, "learning_rate": 0.000487282352642848, "loss": 17.4454, "step": 39620 }, { "epoch": 0.07729694682909945, "grad_norm": 7.46875, "learning_rate": 0.00048727585061193865, "loss": 17.4631, "step": 39640 }, { "epoch": 0.07733594629773169, "grad_norm": 6.90625, "learning_rate": 0.0004872693485810293, "loss": 17.3724, "step": 39660 }, { "epoch": 0.07737494576636393, "grad_norm": 7.0625, "learning_rate": 0.0004872628465501199, "loss": 17.4071, "step": 39680 }, { "epoch": 0.07741394523499617, "grad_norm": 7.5, "learning_rate": 0.00048725634451921056, "loss": 17.4003, "step": 39700 }, { "epoch": 0.07745294470362842, "grad_norm": 7.65625, "learning_rate": 0.0004872498424883012, "loss": 17.4861, "step": 39720 }, { "epoch": 0.07749194417226066, "grad_norm": 8.0625, "learning_rate": 0.0004872433404573919, "loss": 17.4075, "step": 39740 }, { "epoch": 0.0775309436408929, "grad_norm": 7.625, "learning_rate": 0.00048723683842648253, "loss": 17.3513, "step": 39760 }, { "epoch": 0.07756994310952513, "grad_norm": 8.5, "learning_rate": 0.0004872303363955732, "loss": 17.4451, "step": 39780 }, { "epoch": 0.07760894257815737, "grad_norm": 7.46875, "learning_rate": 0.00048722383436466385, "loss": 17.4493, "step": 39800 }, { "epoch": 0.07764794204678961, "grad_norm": 7.0625, "learning_rate": 0.0004872173323337545, "loss": 17.498, "step": 39820 }, { "epoch": 0.07768694151542185, "grad_norm": 7.3125, "learning_rate": 0.00048721083030284505, "loss": 17.4646, "step": 39840 }, { "epoch": 0.0777259409840541, "grad_norm": 7.78125, "learning_rate": 0.0004872043282719357, "loss": 17.4262, "step": 39860 }, { "epoch": 0.07776494045268634, "grad_norm": 8.6875, "learning_rate": 0.00048719782624102637, "loss": 17.4143, "step": 39880 }, { "epoch": 0.07780393992131857, "grad_norm": 7.15625, "learning_rate": 0.000487191324210117, "loss": 17.424, "step": 39900 }, { "epoch": 0.07784293938995081, "grad_norm": 7.84375, "learning_rate": 0.0004871848221792077, "loss": 17.4227, "step": 39920 }, { "epoch": 0.07788193885858305, "grad_norm": 6.90625, "learning_rate": 0.00048717832014829834, "loss": 17.3149, "step": 39940 }, { "epoch": 0.07792093832721529, "grad_norm": 8.6875, "learning_rate": 0.000487171818117389, "loss": 17.4423, "step": 39960 }, { "epoch": 0.07795993779584753, "grad_norm": 7.65625, "learning_rate": 0.00048716531608647965, "loss": 17.4271, "step": 39980 }, { "epoch": 0.07799893726447978, "grad_norm": 7.375, "learning_rate": 0.00048715881405557025, "loss": 17.4514, "step": 40000 }, { "epoch": 0.07803793673311202, "grad_norm": 7.0, "learning_rate": 0.0004871523120246609, "loss": 17.5223, "step": 40020 }, { "epoch": 0.07807693620174425, "grad_norm": 7.375, "learning_rate": 0.00048714580999375157, "loss": 17.4452, "step": 40040 }, { "epoch": 0.07811593567037649, "grad_norm": 6.78125, "learning_rate": 0.0004871393079628422, "loss": 17.4569, "step": 40060 }, { "epoch": 0.07815493513900873, "grad_norm": 7.53125, "learning_rate": 0.0004871328059319329, "loss": 17.4341, "step": 40080 }, { "epoch": 0.07819393460764097, "grad_norm": 7.8125, "learning_rate": 0.0004871263039010235, "loss": 17.4084, "step": 40100 }, { "epoch": 0.07823293407627321, "grad_norm": 6.90625, "learning_rate": 0.00048711980187011414, "loss": 17.4024, "step": 40120 }, { "epoch": 0.07827193354490546, "grad_norm": 8.3125, "learning_rate": 0.00048711329983920474, "loss": 17.3594, "step": 40140 }, { "epoch": 0.07831093301353768, "grad_norm": 7.5, "learning_rate": 0.0004871067978082954, "loss": 17.4, "step": 40160 }, { "epoch": 0.07834993248216993, "grad_norm": 7.3125, "learning_rate": 0.00048710029577738606, "loss": 17.5072, "step": 40180 }, { "epoch": 0.07838893195080217, "grad_norm": 7.125, "learning_rate": 0.0004870937937464767, "loss": 17.4434, "step": 40200 }, { "epoch": 0.07842793141943441, "grad_norm": 7.59375, "learning_rate": 0.00048708729171556737, "loss": 17.3972, "step": 40220 }, { "epoch": 0.07846693088806665, "grad_norm": 7.875, "learning_rate": 0.00048708078968465803, "loss": 17.4241, "step": 40240 }, { "epoch": 0.0785059303566989, "grad_norm": 7.875, "learning_rate": 0.0004870742876537487, "loss": 17.4109, "step": 40260 }, { "epoch": 0.07854492982533114, "grad_norm": 6.75, "learning_rate": 0.00048706778562283934, "loss": 17.445, "step": 40280 }, { "epoch": 0.07858392929396336, "grad_norm": 7.71875, "learning_rate": 0.00048706128359192995, "loss": 17.4201, "step": 40300 }, { "epoch": 0.0786229287625956, "grad_norm": 7.15625, "learning_rate": 0.0004870547815610206, "loss": 17.3314, "step": 40320 }, { "epoch": 0.07866192823122785, "grad_norm": 6.96875, "learning_rate": 0.0004870482795301112, "loss": 17.3957, "step": 40340 }, { "epoch": 0.07870092769986009, "grad_norm": 7.375, "learning_rate": 0.00048704177749920186, "loss": 17.388, "step": 40360 }, { "epoch": 0.07873992716849233, "grad_norm": 7.46875, "learning_rate": 0.0004870352754682925, "loss": 17.3137, "step": 40380 }, { "epoch": 0.07877892663712457, "grad_norm": 6.9375, "learning_rate": 0.0004870287734373832, "loss": 17.3636, "step": 40400 }, { "epoch": 0.0788179261057568, "grad_norm": 8.4375, "learning_rate": 0.00048702227140647383, "loss": 17.4375, "step": 40420 }, { "epoch": 0.07885692557438904, "grad_norm": 7.34375, "learning_rate": 0.0004870157693755645, "loss": 17.3497, "step": 40440 }, { "epoch": 0.07889592504302129, "grad_norm": 6.8125, "learning_rate": 0.0004870092673446551, "loss": 17.337, "step": 40460 }, { "epoch": 0.07893492451165353, "grad_norm": 7.0, "learning_rate": 0.00048700276531374575, "loss": 17.4373, "step": 40480 }, { "epoch": 0.07897392398028577, "grad_norm": 7.71875, "learning_rate": 0.0004869962632828364, "loss": 17.399, "step": 40500 }, { "epoch": 0.07901292344891801, "grad_norm": 7.71875, "learning_rate": 0.00048698976125192706, "loss": 17.5203, "step": 40520 }, { "epoch": 0.07905192291755025, "grad_norm": 6.65625, "learning_rate": 0.0004869832592210177, "loss": 17.3644, "step": 40540 }, { "epoch": 0.07909092238618248, "grad_norm": 7.5, "learning_rate": 0.0004869767571901084, "loss": 17.4363, "step": 40560 }, { "epoch": 0.07912992185481472, "grad_norm": 7.375, "learning_rate": 0.000486970255159199, "loss": 17.4796, "step": 40580 }, { "epoch": 0.07916892132344697, "grad_norm": 7.40625, "learning_rate": 0.0004869637531282896, "loss": 17.4018, "step": 40600 }, { "epoch": 0.07920792079207921, "grad_norm": 7.9375, "learning_rate": 0.00048695725109738024, "loss": 17.3607, "step": 40620 }, { "epoch": 0.07924692026071145, "grad_norm": 7.46875, "learning_rate": 0.0004869507490664709, "loss": 17.3579, "step": 40640 }, { "epoch": 0.07928591972934369, "grad_norm": 7.59375, "learning_rate": 0.00048694424703556155, "loss": 17.3613, "step": 40660 }, { "epoch": 0.07932491919797592, "grad_norm": 7.8125, "learning_rate": 0.0004869377450046522, "loss": 17.4083, "step": 40680 }, { "epoch": 0.07936391866660816, "grad_norm": 8.1875, "learning_rate": 0.00048693124297374287, "loss": 17.3168, "step": 40700 }, { "epoch": 0.0794029181352404, "grad_norm": 7.21875, "learning_rate": 0.0004869247409428335, "loss": 17.3113, "step": 40720 }, { "epoch": 0.07944191760387265, "grad_norm": 7.46875, "learning_rate": 0.0004869182389119242, "loss": 17.3607, "step": 40740 }, { "epoch": 0.07948091707250489, "grad_norm": 7.34375, "learning_rate": 0.0004869117368810148, "loss": 17.4655, "step": 40760 }, { "epoch": 0.07951991654113713, "grad_norm": 7.5, "learning_rate": 0.00048690523485010544, "loss": 17.3979, "step": 40780 }, { "epoch": 0.07955891600976937, "grad_norm": 7.9375, "learning_rate": 0.0004868987328191961, "loss": 17.3986, "step": 40800 }, { "epoch": 0.0795979154784016, "grad_norm": 7.5, "learning_rate": 0.0004868922307882867, "loss": 17.4116, "step": 40820 }, { "epoch": 0.07963691494703384, "grad_norm": 6.96875, "learning_rate": 0.00048688572875737736, "loss": 17.3509, "step": 40840 }, { "epoch": 0.07967591441566609, "grad_norm": 7.09375, "learning_rate": 0.000486879226726468, "loss": 17.4211, "step": 40860 }, { "epoch": 0.07971491388429833, "grad_norm": 7.71875, "learning_rate": 0.00048687272469555867, "loss": 17.4283, "step": 40880 }, { "epoch": 0.07975391335293057, "grad_norm": 7.25, "learning_rate": 0.00048686622266464933, "loss": 17.3431, "step": 40900 }, { "epoch": 0.07979291282156281, "grad_norm": 7.3125, "learning_rate": 0.00048685972063373993, "loss": 17.3584, "step": 40920 }, { "epoch": 0.07983191229019504, "grad_norm": 6.59375, "learning_rate": 0.0004868532186028306, "loss": 17.3646, "step": 40940 }, { "epoch": 0.07987091175882728, "grad_norm": 7.53125, "learning_rate": 0.00048684671657192125, "loss": 17.2867, "step": 40960 }, { "epoch": 0.07990991122745952, "grad_norm": 6.9375, "learning_rate": 0.0004868402145410119, "loss": 17.3464, "step": 40980 }, { "epoch": 0.07994891069609177, "grad_norm": 7.5625, "learning_rate": 0.00048683371251010256, "loss": 17.3874, "step": 41000 }, { "epoch": 0.07998791016472401, "grad_norm": 7.6875, "learning_rate": 0.0004868272104791932, "loss": 17.303, "step": 41020 }, { "epoch": 0.08002690963335625, "grad_norm": 6.9375, "learning_rate": 0.0004868207084482839, "loss": 17.2858, "step": 41040 }, { "epoch": 0.08006590910198849, "grad_norm": 6.9375, "learning_rate": 0.0004868142064173745, "loss": 17.3162, "step": 41060 }, { "epoch": 0.08010490857062072, "grad_norm": 7.40625, "learning_rate": 0.0004868077043864651, "loss": 17.3105, "step": 41080 }, { "epoch": 0.08014390803925296, "grad_norm": 7.5625, "learning_rate": 0.00048680120235555574, "loss": 17.4019, "step": 41100 }, { "epoch": 0.0801829075078852, "grad_norm": 6.9375, "learning_rate": 0.0004867947003246464, "loss": 17.3628, "step": 41120 }, { "epoch": 0.08022190697651745, "grad_norm": 8.25, "learning_rate": 0.00048678819829373705, "loss": 17.3319, "step": 41140 }, { "epoch": 0.08026090644514969, "grad_norm": 7.6875, "learning_rate": 0.0004867816962628277, "loss": 17.3767, "step": 41160 }, { "epoch": 0.08029990591378193, "grad_norm": 6.90625, "learning_rate": 0.00048677519423191836, "loss": 17.3502, "step": 41180 }, { "epoch": 0.08033890538241416, "grad_norm": 8.8125, "learning_rate": 0.000486768692201009, "loss": 17.3584, "step": 41200 }, { "epoch": 0.0803779048510464, "grad_norm": 9.1875, "learning_rate": 0.0004867621901700996, "loss": 17.3189, "step": 41220 }, { "epoch": 0.08041690431967864, "grad_norm": 8.125, "learning_rate": 0.0004867556881391903, "loss": 17.3642, "step": 41240 }, { "epoch": 0.08045590378831088, "grad_norm": 6.96875, "learning_rate": 0.00048674918610828094, "loss": 17.3477, "step": 41260 }, { "epoch": 0.08049490325694313, "grad_norm": 8.75, "learning_rate": 0.0004867426840773716, "loss": 17.3003, "step": 41280 }, { "epoch": 0.08053390272557537, "grad_norm": 7.59375, "learning_rate": 0.00048673618204646225, "loss": 17.3271, "step": 41300 }, { "epoch": 0.08057290219420761, "grad_norm": 7.28125, "learning_rate": 0.00048672968001555285, "loss": 17.3954, "step": 41320 }, { "epoch": 0.08061190166283984, "grad_norm": 7.1875, "learning_rate": 0.0004867231779846435, "loss": 17.252, "step": 41340 }, { "epoch": 0.08065090113147208, "grad_norm": 7.9375, "learning_rate": 0.00048671667595373417, "loss": 17.387, "step": 41360 }, { "epoch": 0.08068990060010432, "grad_norm": 7.0, "learning_rate": 0.00048671017392282477, "loss": 17.3246, "step": 41380 }, { "epoch": 0.08072890006873656, "grad_norm": 8.0, "learning_rate": 0.00048670367189191543, "loss": 17.2779, "step": 41400 }, { "epoch": 0.0807678995373688, "grad_norm": 7.09375, "learning_rate": 0.0004866971698610061, "loss": 17.4244, "step": 41420 }, { "epoch": 0.08080689900600105, "grad_norm": 7.15625, "learning_rate": 0.00048669066783009674, "loss": 17.2992, "step": 41440 }, { "epoch": 0.08084589847463328, "grad_norm": 7.03125, "learning_rate": 0.0004866841657991874, "loss": 17.4073, "step": 41460 }, { "epoch": 0.08088489794326552, "grad_norm": 7.53125, "learning_rate": 0.00048667766376827806, "loss": 17.2692, "step": 41480 }, { "epoch": 0.08092389741189776, "grad_norm": 6.9375, "learning_rate": 0.0004866711617373687, "loss": 17.3557, "step": 41500 }, { "epoch": 0.08096289688053, "grad_norm": 6.6875, "learning_rate": 0.00048666465970645937, "loss": 17.3957, "step": 41520 }, { "epoch": 0.08100189634916224, "grad_norm": 6.96875, "learning_rate": 0.00048665815767554997, "loss": 17.3048, "step": 41540 }, { "epoch": 0.08104089581779449, "grad_norm": 6.59375, "learning_rate": 0.0004866516556446406, "loss": 17.3182, "step": 41560 }, { "epoch": 0.08107989528642673, "grad_norm": 7.5625, "learning_rate": 0.00048664515361373123, "loss": 17.3626, "step": 41580 }, { "epoch": 0.08111889475505896, "grad_norm": 7.53125, "learning_rate": 0.0004866386515828219, "loss": 17.3518, "step": 41600 }, { "epoch": 0.0811578942236912, "grad_norm": 7.46875, "learning_rate": 0.00048663214955191255, "loss": 17.3003, "step": 41620 }, { "epoch": 0.08119689369232344, "grad_norm": 8.625, "learning_rate": 0.0004866256475210032, "loss": 17.3656, "step": 41640 }, { "epoch": 0.08123589316095568, "grad_norm": 6.9375, "learning_rate": 0.00048661914549009386, "loss": 17.3252, "step": 41660 }, { "epoch": 0.08127489262958792, "grad_norm": 7.125, "learning_rate": 0.00048661264345918446, "loss": 17.2411, "step": 41680 }, { "epoch": 0.08131389209822017, "grad_norm": 7.625, "learning_rate": 0.0004866061414282751, "loss": 17.3029, "step": 41700 }, { "epoch": 0.0813528915668524, "grad_norm": 7.5625, "learning_rate": 0.0004865996393973658, "loss": 17.2706, "step": 41720 }, { "epoch": 0.08139189103548464, "grad_norm": 6.375, "learning_rate": 0.00048659313736645643, "loss": 17.4546, "step": 41740 }, { "epoch": 0.08143089050411688, "grad_norm": 7.6875, "learning_rate": 0.0004865866353355471, "loss": 17.3023, "step": 41760 }, { "epoch": 0.08146988997274912, "grad_norm": 7.28125, "learning_rate": 0.00048658013330463775, "loss": 17.3162, "step": 41780 }, { "epoch": 0.08150888944138136, "grad_norm": 8.6875, "learning_rate": 0.00048657363127372835, "loss": 17.3536, "step": 41800 }, { "epoch": 0.0815478889100136, "grad_norm": 7.96875, "learning_rate": 0.000486567129242819, "loss": 17.3163, "step": 41820 }, { "epoch": 0.08158688837864585, "grad_norm": 7.125, "learning_rate": 0.0004865606272119096, "loss": 17.2893, "step": 41840 }, { "epoch": 0.08162588784727808, "grad_norm": 8.5625, "learning_rate": 0.00048655412518100027, "loss": 17.336, "step": 41860 }, { "epoch": 0.08166488731591032, "grad_norm": 6.625, "learning_rate": 0.0004865476231500909, "loss": 17.2439, "step": 41880 }, { "epoch": 0.08170388678454256, "grad_norm": 6.78125, "learning_rate": 0.0004865411211191816, "loss": 17.3177, "step": 41900 }, { "epoch": 0.0817428862531748, "grad_norm": 7.28125, "learning_rate": 0.00048653461908827224, "loss": 17.2565, "step": 41920 }, { "epoch": 0.08178188572180704, "grad_norm": 7.125, "learning_rate": 0.0004865281170573629, "loss": 17.2576, "step": 41940 }, { "epoch": 0.08182088519043929, "grad_norm": 7.5, "learning_rate": 0.00048652161502645355, "loss": 17.3228, "step": 41960 }, { "epoch": 0.08185988465907151, "grad_norm": 6.90625, "learning_rate": 0.0004865151129955442, "loss": 17.3949, "step": 41980 }, { "epoch": 0.08189888412770376, "grad_norm": 6.84375, "learning_rate": 0.0004865086109646348, "loss": 17.292, "step": 42000 }, { "epoch": 0.081937883596336, "grad_norm": 7.0625, "learning_rate": 0.00048650210893372547, "loss": 17.3624, "step": 42020 }, { "epoch": 0.08197688306496824, "grad_norm": 7.25, "learning_rate": 0.00048649560690281607, "loss": 17.2483, "step": 42040 }, { "epoch": 0.08201588253360048, "grad_norm": 7.15625, "learning_rate": 0.00048648910487190673, "loss": 17.2416, "step": 42060 }, { "epoch": 0.08205488200223272, "grad_norm": 7.9375, "learning_rate": 0.0004864826028409974, "loss": 17.3319, "step": 42080 }, { "epoch": 0.08209388147086497, "grad_norm": 8.125, "learning_rate": 0.00048647610081008804, "loss": 17.2649, "step": 42100 }, { "epoch": 0.0821328809394972, "grad_norm": 7.0, "learning_rate": 0.0004864695987791787, "loss": 17.2337, "step": 42120 }, { "epoch": 0.08217188040812944, "grad_norm": 8.6875, "learning_rate": 0.0004864630967482693, "loss": 17.1953, "step": 42140 }, { "epoch": 0.08221087987676168, "grad_norm": 7.59375, "learning_rate": 0.00048645659471735996, "loss": 17.2625, "step": 42160 }, { "epoch": 0.08224987934539392, "grad_norm": 7.1875, "learning_rate": 0.0004864500926864506, "loss": 17.3246, "step": 42180 }, { "epoch": 0.08228887881402616, "grad_norm": 7.75, "learning_rate": 0.0004864435906555413, "loss": 17.2691, "step": 42200 }, { "epoch": 0.0823278782826584, "grad_norm": 7.0, "learning_rate": 0.00048643708862463193, "loss": 17.3746, "step": 42220 }, { "epoch": 0.08236687775129063, "grad_norm": 7.03125, "learning_rate": 0.0004864305865937226, "loss": 17.3057, "step": 42240 }, { "epoch": 0.08240587721992287, "grad_norm": 7.40625, "learning_rate": 0.00048642408456281324, "loss": 17.3146, "step": 42260 }, { "epoch": 0.08244487668855512, "grad_norm": 7.125, "learning_rate": 0.00048641758253190385, "loss": 17.3207, "step": 42280 }, { "epoch": 0.08248387615718736, "grad_norm": 7.4375, "learning_rate": 0.00048641108050099445, "loss": 17.3296, "step": 42300 }, { "epoch": 0.0825228756258196, "grad_norm": 7.375, "learning_rate": 0.0004864045784700851, "loss": 17.2988, "step": 42320 }, { "epoch": 0.08256187509445184, "grad_norm": 7.6875, "learning_rate": 0.00048639807643917576, "loss": 17.2477, "step": 42340 }, { "epoch": 0.08260087456308408, "grad_norm": 7.65625, "learning_rate": 0.0004863915744082664, "loss": 17.3086, "step": 42360 }, { "epoch": 0.08263987403171631, "grad_norm": 7.4375, "learning_rate": 0.0004863850723773571, "loss": 17.2836, "step": 42380 }, { "epoch": 0.08267887350034855, "grad_norm": 7.03125, "learning_rate": 0.00048637857034644773, "loss": 17.2822, "step": 42400 }, { "epoch": 0.0827178729689808, "grad_norm": 7.125, "learning_rate": 0.0004863720683155384, "loss": 17.2869, "step": 42420 }, { "epoch": 0.08275687243761304, "grad_norm": 7.25, "learning_rate": 0.00048636556628462905, "loss": 17.2606, "step": 42440 }, { "epoch": 0.08279587190624528, "grad_norm": 6.71875, "learning_rate": 0.00048635906425371965, "loss": 17.2897, "step": 42460 }, { "epoch": 0.08283487137487752, "grad_norm": 7.375, "learning_rate": 0.0004863525622228103, "loss": 17.3211, "step": 42480 }, { "epoch": 0.08287387084350975, "grad_norm": 6.8125, "learning_rate": 0.00048634606019190096, "loss": 17.2688, "step": 42500 }, { "epoch": 0.08291287031214199, "grad_norm": 8.3125, "learning_rate": 0.00048633955816099157, "loss": 17.3131, "step": 42520 }, { "epoch": 0.08295186978077423, "grad_norm": 6.9375, "learning_rate": 0.0004863330561300822, "loss": 17.2749, "step": 42540 }, { "epoch": 0.08299086924940648, "grad_norm": 7.0625, "learning_rate": 0.0004863265540991729, "loss": 17.2178, "step": 42560 }, { "epoch": 0.08302986871803872, "grad_norm": 7.375, "learning_rate": 0.00048632005206826354, "loss": 17.2386, "step": 42580 }, { "epoch": 0.08306886818667096, "grad_norm": 6.71875, "learning_rate": 0.0004863135500373542, "loss": 17.2432, "step": 42600 }, { "epoch": 0.0831078676553032, "grad_norm": 7.6875, "learning_rate": 0.0004863070480064448, "loss": 17.2513, "step": 42620 }, { "epoch": 0.08314686712393543, "grad_norm": 7.15625, "learning_rate": 0.00048630054597553546, "loss": 17.3046, "step": 42640 }, { "epoch": 0.08318586659256767, "grad_norm": 7.78125, "learning_rate": 0.0004862940439446261, "loss": 17.2278, "step": 42660 }, { "epoch": 0.08322486606119991, "grad_norm": 8.0, "learning_rate": 0.00048628754191371677, "loss": 17.2885, "step": 42680 }, { "epoch": 0.08326386552983216, "grad_norm": 7.1875, "learning_rate": 0.0004862810398828074, "loss": 17.268, "step": 42700 }, { "epoch": 0.0833028649984644, "grad_norm": 7.9375, "learning_rate": 0.0004862745378518981, "loss": 17.3266, "step": 42720 }, { "epoch": 0.08334186446709664, "grad_norm": 7.5, "learning_rate": 0.00048626803582098874, "loss": 17.331, "step": 42740 }, { "epoch": 0.08338086393572887, "grad_norm": 8.4375, "learning_rate": 0.00048626153379007934, "loss": 17.326, "step": 42760 }, { "epoch": 0.08341986340436111, "grad_norm": 7.03125, "learning_rate": 0.00048625503175916995, "loss": 17.2474, "step": 42780 }, { "epoch": 0.08345886287299335, "grad_norm": 8.0, "learning_rate": 0.0004862485297282606, "loss": 17.2174, "step": 42800 }, { "epoch": 0.0834978623416256, "grad_norm": 7.4375, "learning_rate": 0.00048624202769735126, "loss": 17.2893, "step": 42820 }, { "epoch": 0.08353686181025784, "grad_norm": 6.71875, "learning_rate": 0.0004862355256664419, "loss": 17.2931, "step": 42840 }, { "epoch": 0.08357586127889008, "grad_norm": 8.8125, "learning_rate": 0.0004862290236355326, "loss": 17.2065, "step": 42860 }, { "epoch": 0.08361486074752232, "grad_norm": 7.46875, "learning_rate": 0.00048622252160462323, "loss": 17.3003, "step": 42880 }, { "epoch": 0.08365386021615455, "grad_norm": 6.71875, "learning_rate": 0.0004862160195737139, "loss": 17.2048, "step": 42900 }, { "epoch": 0.08369285968478679, "grad_norm": 7.3125, "learning_rate": 0.0004862095175428045, "loss": 17.2964, "step": 42920 }, { "epoch": 0.08373185915341903, "grad_norm": 7.3125, "learning_rate": 0.00048620301551189515, "loss": 17.2578, "step": 42940 }, { "epoch": 0.08377085862205128, "grad_norm": 7.0, "learning_rate": 0.0004861965134809858, "loss": 17.2819, "step": 42960 }, { "epoch": 0.08380985809068352, "grad_norm": 8.3125, "learning_rate": 0.00048619001145007646, "loss": 17.3285, "step": 42980 }, { "epoch": 0.08384885755931576, "grad_norm": 7.34375, "learning_rate": 0.0004861835094191671, "loss": 17.2176, "step": 43000 }, { "epoch": 0.08388785702794799, "grad_norm": 7.3125, "learning_rate": 0.0004861770073882577, "loss": 17.3057, "step": 43020 }, { "epoch": 0.08392685649658023, "grad_norm": 6.6875, "learning_rate": 0.0004861705053573484, "loss": 17.1772, "step": 43040 }, { "epoch": 0.08396585596521247, "grad_norm": 7.125, "learning_rate": 0.00048616400332643903, "loss": 17.2241, "step": 43060 }, { "epoch": 0.08400485543384471, "grad_norm": 7.09375, "learning_rate": 0.00048615750129552964, "loss": 17.2328, "step": 43080 }, { "epoch": 0.08404385490247696, "grad_norm": 7.34375, "learning_rate": 0.0004861509992646203, "loss": 17.3095, "step": 43100 }, { "epoch": 0.0840828543711092, "grad_norm": 7.65625, "learning_rate": 0.00048614449723371095, "loss": 17.3058, "step": 43120 }, { "epoch": 0.08412185383974144, "grad_norm": 7.1875, "learning_rate": 0.0004861379952028016, "loss": 17.1983, "step": 43140 }, { "epoch": 0.08416085330837367, "grad_norm": 7.875, "learning_rate": 0.00048613149317189227, "loss": 17.2054, "step": 43160 }, { "epoch": 0.08419985277700591, "grad_norm": 7.9375, "learning_rate": 0.0004861249911409829, "loss": 17.2308, "step": 43180 }, { "epoch": 0.08423885224563815, "grad_norm": 6.75, "learning_rate": 0.0004861184891100736, "loss": 17.2112, "step": 43200 }, { "epoch": 0.0842778517142704, "grad_norm": 7.5, "learning_rate": 0.0004861119870791642, "loss": 17.1816, "step": 43220 }, { "epoch": 0.08431685118290264, "grad_norm": 7.625, "learning_rate": 0.00048610548504825484, "loss": 17.2321, "step": 43240 }, { "epoch": 0.08435585065153488, "grad_norm": 7.40625, "learning_rate": 0.00048609898301734544, "loss": 17.2305, "step": 43260 }, { "epoch": 0.0843948501201671, "grad_norm": 8.125, "learning_rate": 0.0004860924809864361, "loss": 17.2229, "step": 43280 }, { "epoch": 0.08443384958879935, "grad_norm": 7.84375, "learning_rate": 0.00048608597895552676, "loss": 17.2063, "step": 43300 }, { "epoch": 0.08447284905743159, "grad_norm": 6.8125, "learning_rate": 0.0004860794769246174, "loss": 17.1544, "step": 43320 }, { "epoch": 0.08451184852606383, "grad_norm": 7.65625, "learning_rate": 0.00048607297489370807, "loss": 17.2307, "step": 43340 }, { "epoch": 0.08455084799469607, "grad_norm": 8.125, "learning_rate": 0.0004860664728627987, "loss": 17.1579, "step": 43360 }, { "epoch": 0.08458984746332832, "grad_norm": 6.9375, "learning_rate": 0.00048605997083188933, "loss": 17.2618, "step": 43380 }, { "epoch": 0.08462884693196056, "grad_norm": 7.59375, "learning_rate": 0.00048605346880098, "loss": 17.1997, "step": 43400 }, { "epoch": 0.08466784640059279, "grad_norm": 7.71875, "learning_rate": 0.00048604696677007064, "loss": 17.2368, "step": 43420 }, { "epoch": 0.08470684586922503, "grad_norm": 6.65625, "learning_rate": 0.0004860404647391613, "loss": 17.2207, "step": 43440 }, { "epoch": 0.08474584533785727, "grad_norm": 7.65625, "learning_rate": 0.00048603396270825196, "loss": 17.2276, "step": 43460 }, { "epoch": 0.08478484480648951, "grad_norm": 7.25, "learning_rate": 0.0004860274606773426, "loss": 17.2381, "step": 43480 }, { "epoch": 0.08482384427512175, "grad_norm": 7.375, "learning_rate": 0.0004860209586464332, "loss": 17.2022, "step": 43500 }, { "epoch": 0.084862843743754, "grad_norm": 6.875, "learning_rate": 0.0004860144566155239, "loss": 17.2265, "step": 43520 }, { "epoch": 0.08490184321238622, "grad_norm": 6.9375, "learning_rate": 0.0004860079545846145, "loss": 17.298, "step": 43540 }, { "epoch": 0.08494084268101847, "grad_norm": 7.28125, "learning_rate": 0.00048600145255370513, "loss": 17.2676, "step": 43560 }, { "epoch": 0.08497984214965071, "grad_norm": 6.96875, "learning_rate": 0.0004859949505227958, "loss": 17.1672, "step": 43580 }, { "epoch": 0.08501884161828295, "grad_norm": 7.78125, "learning_rate": 0.00048598844849188645, "loss": 17.2163, "step": 43600 }, { "epoch": 0.08505784108691519, "grad_norm": 7.5, "learning_rate": 0.0004859819464609771, "loss": 17.1593, "step": 43620 }, { "epoch": 0.08509684055554743, "grad_norm": 7.40625, "learning_rate": 0.00048597544443006776, "loss": 17.208, "step": 43640 }, { "epoch": 0.08513584002417968, "grad_norm": 7.9375, "learning_rate": 0.0004859689423991584, "loss": 17.2611, "step": 43660 }, { "epoch": 0.0851748394928119, "grad_norm": 7.53125, "learning_rate": 0.000485962440368249, "loss": 17.2204, "step": 43680 }, { "epoch": 0.08521383896144415, "grad_norm": 6.9375, "learning_rate": 0.0004859559383373397, "loss": 17.1997, "step": 43700 }, { "epoch": 0.08525283843007639, "grad_norm": 7.125, "learning_rate": 0.00048594943630643033, "loss": 17.2255, "step": 43720 }, { "epoch": 0.08529183789870863, "grad_norm": 7.875, "learning_rate": 0.00048594293427552094, "loss": 17.0983, "step": 43740 }, { "epoch": 0.08533083736734087, "grad_norm": 7.09375, "learning_rate": 0.0004859364322446116, "loss": 17.2287, "step": 43760 }, { "epoch": 0.08536983683597311, "grad_norm": 7.1875, "learning_rate": 0.00048592993021370225, "loss": 17.2182, "step": 43780 }, { "epoch": 0.08540883630460534, "grad_norm": 7.40625, "learning_rate": 0.0004859234281827929, "loss": 17.1895, "step": 43800 }, { "epoch": 0.08544783577323758, "grad_norm": 7.40625, "learning_rate": 0.00048591692615188357, "loss": 17.2419, "step": 43820 }, { "epoch": 0.08548683524186983, "grad_norm": 7.1875, "learning_rate": 0.00048591042412097417, "loss": 17.1576, "step": 43840 }, { "epoch": 0.08552583471050207, "grad_norm": 6.90625, "learning_rate": 0.0004859039220900648, "loss": 17.1859, "step": 43860 }, { "epoch": 0.08556483417913431, "grad_norm": 6.9375, "learning_rate": 0.0004858974200591555, "loss": 17.1235, "step": 43880 }, { "epoch": 0.08560383364776655, "grad_norm": 6.71875, "learning_rate": 0.00048589091802824614, "loss": 17.1363, "step": 43900 }, { "epoch": 0.0856428331163988, "grad_norm": 6.875, "learning_rate": 0.0004858844159973368, "loss": 17.2101, "step": 43920 }, { "epoch": 0.08568183258503102, "grad_norm": 7.53125, "learning_rate": 0.00048587791396642745, "loss": 17.1428, "step": 43940 }, { "epoch": 0.08572083205366327, "grad_norm": 7.3125, "learning_rate": 0.0004858714119355181, "loss": 17.1413, "step": 43960 }, { "epoch": 0.08575983152229551, "grad_norm": 7.09375, "learning_rate": 0.0004858649099046087, "loss": 17.2241, "step": 43980 }, { "epoch": 0.08579883099092775, "grad_norm": 7.53125, "learning_rate": 0.0004858584078736993, "loss": 17.2386, "step": 44000 }, { "epoch": 0.0652074729363805, "grad_norm": 7.21875, "learning_rate": 0.0004892531243702858, "loss": 18.4759, "step": 44020 }, { "epoch": 0.06523709923030889, "grad_norm": 7.5, "learning_rate": 0.0004892481854352782, "loss": 18.408, "step": 44040 }, { "epoch": 0.06526672552423728, "grad_norm": 7.90625, "learning_rate": 0.0004892432465002707, "loss": 18.2598, "step": 44060 }, { "epoch": 0.06529635181816566, "grad_norm": 7.625, "learning_rate": 0.0004892383075652631, "loss": 18.2414, "step": 44080 }, { "epoch": 0.06532597811209405, "grad_norm": 7.5, "learning_rate": 0.0004892333686302555, "loss": 18.2764, "step": 44100 }, { "epoch": 0.06535560440602244, "grad_norm": 6.59375, "learning_rate": 0.000489228429695248, "loss": 18.2161, "step": 44120 }, { "epoch": 0.06538523069995082, "grad_norm": 7.125, "learning_rate": 0.0004892234907602404, "loss": 18.2661, "step": 44140 }, { "epoch": 0.06541485699387921, "grad_norm": 7.0625, "learning_rate": 0.0004892185518252329, "loss": 18.2414, "step": 44160 }, { "epoch": 0.0654444832878076, "grad_norm": 7.34375, "learning_rate": 0.0004892136128902253, "loss": 18.1743, "step": 44180 }, { "epoch": 0.06547410958173598, "grad_norm": 7.34375, "learning_rate": 0.0004892086739552177, "loss": 18.1813, "step": 44200 }, { "epoch": 0.06550373587566437, "grad_norm": 7.03125, "learning_rate": 0.0004892037350202102, "loss": 18.239, "step": 44220 }, { "epoch": 0.06553336216959275, "grad_norm": 7.375, "learning_rate": 0.0004891987960852026, "loss": 18.084, "step": 44240 }, { "epoch": 0.06556298846352114, "grad_norm": 7.375, "learning_rate": 0.000489193857150195, "loss": 18.1419, "step": 44260 }, { "epoch": 0.06559261475744953, "grad_norm": 8.1875, "learning_rate": 0.0004891889182151874, "loss": 18.1111, "step": 44280 }, { "epoch": 0.06562224105137791, "grad_norm": 7.09375, "learning_rate": 0.0004891839792801799, "loss": 18.0775, "step": 44300 }, { "epoch": 0.0656518673453063, "grad_norm": 7.34375, "learning_rate": 0.0004891790403451723, "loss": 18.1751, "step": 44320 }, { "epoch": 0.0656814936392347, "grad_norm": 6.96875, "learning_rate": 0.0004891741014101648, "loss": 18.1515, "step": 44340 }, { "epoch": 0.06571111993316309, "grad_norm": 7.6875, "learning_rate": 0.0004891691624751572, "loss": 18.1534, "step": 44360 }, { "epoch": 0.06574074622709147, "grad_norm": 6.96875, "learning_rate": 0.0004891642235401497, "loss": 18.1406, "step": 44380 }, { "epoch": 0.06577037252101986, "grad_norm": 8.625, "learning_rate": 0.000489159284605142, "loss": 18.1022, "step": 44400 }, { "epoch": 0.06579999881494825, "grad_norm": 7.9375, "learning_rate": 0.0004891543456701344, "loss": 18.0375, "step": 44420 }, { "epoch": 0.06582962510887663, "grad_norm": 8.5625, "learning_rate": 0.0004891494067351269, "loss": 18.0538, "step": 44440 }, { "epoch": 0.06585925140280502, "grad_norm": 6.90625, "learning_rate": 0.0004891444678001193, "loss": 18.0487, "step": 44460 }, { "epoch": 0.0658888776967334, "grad_norm": 7.8125, "learning_rate": 0.0004891395288651117, "loss": 18.1347, "step": 44480 }, { "epoch": 0.06591850399066179, "grad_norm": 7.21875, "learning_rate": 0.0004891345899301041, "loss": 18.0459, "step": 44500 }, { "epoch": 0.06594813028459018, "grad_norm": 8.6875, "learning_rate": 0.0004891296509950966, "loss": 18.0135, "step": 44520 }, { "epoch": 0.06597775657851856, "grad_norm": 7.8125, "learning_rate": 0.000489124712060089, "loss": 18.0667, "step": 44540 }, { "epoch": 0.06600738287244695, "grad_norm": 7.34375, "learning_rate": 0.0004891197731250815, "loss": 18.0663, "step": 44560 }, { "epoch": 0.06603700916637534, "grad_norm": 7.5, "learning_rate": 0.0004891148341900739, "loss": 18.035, "step": 44580 }, { "epoch": 0.06606663546030372, "grad_norm": 7.28125, "learning_rate": 0.0004891098952550664, "loss": 18.0706, "step": 44600 }, { "epoch": 0.06609626175423211, "grad_norm": 6.6875, "learning_rate": 0.0004891049563200588, "loss": 18.0513, "step": 44620 }, { "epoch": 0.0661258880481605, "grad_norm": 7.78125, "learning_rate": 0.0004891000173850512, "loss": 18.0185, "step": 44640 }, { "epoch": 0.0661555143420889, "grad_norm": 6.90625, "learning_rate": 0.0004890950784500437, "loss": 17.9851, "step": 44660 }, { "epoch": 0.06618514063601728, "grad_norm": 6.65625, "learning_rate": 0.0004890901395150361, "loss": 18.0043, "step": 44680 }, { "epoch": 0.06621476692994567, "grad_norm": 7.28125, "learning_rate": 0.0004890852005800285, "loss": 17.9712, "step": 44700 }, { "epoch": 0.06624439322387406, "grad_norm": 7.34375, "learning_rate": 0.000489080261645021, "loss": 18.0501, "step": 44720 }, { "epoch": 0.06627401951780244, "grad_norm": 7.59375, "learning_rate": 0.0004890753227100134, "loss": 18.029, "step": 44740 }, { "epoch": 0.06630364581173083, "grad_norm": 6.5625, "learning_rate": 0.0004890703837750059, "loss": 18.0173, "step": 44760 }, { "epoch": 0.06633327210565922, "grad_norm": 7.71875, "learning_rate": 0.0004890654448399983, "loss": 17.9833, "step": 44780 }, { "epoch": 0.0663628983995876, "grad_norm": 7.125, "learning_rate": 0.0004890605059049907, "loss": 18.0, "step": 44800 }, { "epoch": 0.06639252469351599, "grad_norm": 7.15625, "learning_rate": 0.0004890555669699832, "loss": 17.9968, "step": 44820 }, { "epoch": 0.06642215098744438, "grad_norm": 6.96875, "learning_rate": 0.0004890506280349756, "loss": 18.009, "step": 44840 }, { "epoch": 0.06645177728137276, "grad_norm": 6.75, "learning_rate": 0.000489045689099968, "loss": 17.9378, "step": 44860 }, { "epoch": 0.06648140357530115, "grad_norm": 8.625, "learning_rate": 0.0004890407501649604, "loss": 17.9775, "step": 44880 }, { "epoch": 0.06651102986922953, "grad_norm": 7.28125, "learning_rate": 0.0004890358112299529, "loss": 17.9533, "step": 44900 }, { "epoch": 0.06654065616315792, "grad_norm": 6.75, "learning_rate": 0.0004890308722949453, "loss": 18.0079, "step": 44920 }, { "epoch": 0.06657028245708631, "grad_norm": 6.8125, "learning_rate": 0.0004890259333599378, "loss": 17.9616, "step": 44940 }, { "epoch": 0.0665999087510147, "grad_norm": 7.28125, "learning_rate": 0.0004890209944249302, "loss": 17.9617, "step": 44960 }, { "epoch": 0.0666295350449431, "grad_norm": 7.25, "learning_rate": 0.0004890160554899227, "loss": 17.9462, "step": 44980 }, { "epoch": 0.06665916133887148, "grad_norm": 7.21875, "learning_rate": 0.000489011116554915, "loss": 17.8648, "step": 45000 }, { "epoch": 0.06668878763279987, "grad_norm": 7.6875, "learning_rate": 0.0004890061776199074, "loss": 17.9662, "step": 45020 }, { "epoch": 0.06671841392672825, "grad_norm": 6.96875, "learning_rate": 0.0004890012386848999, "loss": 17.9083, "step": 45040 }, { "epoch": 0.06674804022065664, "grad_norm": 10.25, "learning_rate": 0.0004889962997498923, "loss": 17.9185, "step": 45060 }, { "epoch": 0.06677766651458503, "grad_norm": 7.9375, "learning_rate": 0.0004889913608148847, "loss": 17.862, "step": 45080 }, { "epoch": 0.06680729280851341, "grad_norm": 8.6875, "learning_rate": 0.0004889864218798772, "loss": 17.8337, "step": 45100 }, { "epoch": 0.0668369191024418, "grad_norm": 6.5625, "learning_rate": 0.0004889814829448696, "loss": 17.8831, "step": 45120 }, { "epoch": 0.06686654539637019, "grad_norm": 7.625, "learning_rate": 0.0004889765440098621, "loss": 17.8514, "step": 45140 }, { "epoch": 0.06689617169029857, "grad_norm": 7.625, "learning_rate": 0.0004889716050748545, "loss": 17.981, "step": 45160 }, { "epoch": 0.06692579798422696, "grad_norm": 7.125, "learning_rate": 0.0004889666661398469, "loss": 17.8824, "step": 45180 }, { "epoch": 0.06695542427815535, "grad_norm": 7.25, "learning_rate": 0.0004889617272048394, "loss": 17.9314, "step": 45200 }, { "epoch": 0.06698505057208373, "grad_norm": 7.09375, "learning_rate": 0.0004889567882698318, "loss": 17.915, "step": 45220 }, { "epoch": 0.06701467686601212, "grad_norm": 7.15625, "learning_rate": 0.0004889518493348242, "loss": 17.8693, "step": 45240 }, { "epoch": 0.0670443031599405, "grad_norm": 6.40625, "learning_rate": 0.0004889469103998167, "loss": 17.8761, "step": 45260 }, { "epoch": 0.06707392945386889, "grad_norm": 6.59375, "learning_rate": 0.0004889419714648091, "loss": 17.8901, "step": 45280 }, { "epoch": 0.06710355574779729, "grad_norm": 6.40625, "learning_rate": 0.0004889370325298015, "loss": 17.8894, "step": 45300 }, { "epoch": 0.06713318204172568, "grad_norm": 7.46875, "learning_rate": 0.000488932093594794, "loss": 17.8789, "step": 45320 }, { "epoch": 0.06716280833565406, "grad_norm": 7.4375, "learning_rate": 0.0004889271546597864, "loss": 17.8612, "step": 45340 }, { "epoch": 0.06719243462958245, "grad_norm": 7.6875, "learning_rate": 0.0004889222157247789, "loss": 17.8959, "step": 45360 }, { "epoch": 0.06722206092351084, "grad_norm": 7.6875, "learning_rate": 0.0004889172767897713, "loss": 17.8669, "step": 45380 }, { "epoch": 0.06725168721743922, "grad_norm": 6.90625, "learning_rate": 0.0004889123378547637, "loss": 17.8867, "step": 45400 }, { "epoch": 0.06728131351136761, "grad_norm": 7.0, "learning_rate": 0.0004889073989197562, "loss": 17.8982, "step": 45420 }, { "epoch": 0.067310939805296, "grad_norm": 6.09375, "learning_rate": 0.0004889024599847486, "loss": 17.8678, "step": 45440 }, { "epoch": 0.06734056609922438, "grad_norm": 6.1875, "learning_rate": 0.000488897521049741, "loss": 17.8527, "step": 45460 }, { "epoch": 0.06737019239315277, "grad_norm": 7.78125, "learning_rate": 0.0004888925821147335, "loss": 17.8837, "step": 45480 }, { "epoch": 0.06739981868708116, "grad_norm": 6.53125, "learning_rate": 0.0004888876431797259, "loss": 17.8579, "step": 45500 }, { "epoch": 0.06742944498100954, "grad_norm": 6.84375, "learning_rate": 0.0004888827042447183, "loss": 17.8815, "step": 45520 }, { "epoch": 0.06745907127493793, "grad_norm": 7.09375, "learning_rate": 0.0004888777653097108, "loss": 17.8608, "step": 45540 }, { "epoch": 0.06748869756886632, "grad_norm": 7.09375, "learning_rate": 0.0004888728263747032, "loss": 17.7998, "step": 45560 }, { "epoch": 0.0675183238627947, "grad_norm": 6.9375, "learning_rate": 0.0004888678874396957, "loss": 17.8417, "step": 45580 }, { "epoch": 0.06754795015672309, "grad_norm": 8.5, "learning_rate": 0.000488862948504688, "loss": 17.8157, "step": 45600 }, { "epoch": 0.06757757645065149, "grad_norm": 7.375, "learning_rate": 0.0004888580095696804, "loss": 17.8218, "step": 45620 }, { "epoch": 0.06760720274457988, "grad_norm": 8.125, "learning_rate": 0.000488853070634673, "loss": 17.8199, "step": 45640 }, { "epoch": 0.06763682903850826, "grad_norm": 7.21875, "learning_rate": 0.0004888481316996653, "loss": 17.7669, "step": 45660 }, { "epoch": 0.06766645533243665, "grad_norm": 7.5625, "learning_rate": 0.0004888431927646577, "loss": 17.8696, "step": 45680 }, { "epoch": 0.06769608162636503, "grad_norm": 7.3125, "learning_rate": 0.0004888382538296502, "loss": 17.8186, "step": 45700 }, { "epoch": 0.06772570792029342, "grad_norm": 6.5, "learning_rate": 0.0004888333148946426, "loss": 17.8254, "step": 45720 }, { "epoch": 0.06775533421422181, "grad_norm": 7.03125, "learning_rate": 0.0004888283759596351, "loss": 17.8372, "step": 45740 }, { "epoch": 0.0677849605081502, "grad_norm": 7.25, "learning_rate": 0.0004888234370246275, "loss": 17.8293, "step": 45760 }, { "epoch": 0.06781458680207858, "grad_norm": 7.28125, "learning_rate": 0.0004888184980896199, "loss": 17.7951, "step": 45780 }, { "epoch": 0.06784421309600697, "grad_norm": 6.78125, "learning_rate": 0.0004888135591546124, "loss": 17.7708, "step": 45800 }, { "epoch": 0.06787383938993535, "grad_norm": 7.03125, "learning_rate": 0.0004888086202196048, "loss": 17.8286, "step": 45820 }, { "epoch": 0.06790346568386374, "grad_norm": 8.0, "learning_rate": 0.0004888036812845972, "loss": 17.7869, "step": 45840 }, { "epoch": 0.06793309197779213, "grad_norm": 7.03125, "learning_rate": 0.0004887987423495897, "loss": 17.8534, "step": 45860 }, { "epoch": 0.06796271827172051, "grad_norm": 6.9375, "learning_rate": 0.0004887938034145821, "loss": 17.7981, "step": 45880 }, { "epoch": 0.0679923445656489, "grad_norm": 7.15625, "learning_rate": 0.0004887888644795745, "loss": 17.8228, "step": 45900 }, { "epoch": 0.06802197085957729, "grad_norm": 6.8125, "learning_rate": 0.000488783925544567, "loss": 17.7328, "step": 45920 }, { "epoch": 0.06805159715350569, "grad_norm": 7.875, "learning_rate": 0.0004887789866095594, "loss": 17.8309, "step": 45940 }, { "epoch": 0.06808122344743407, "grad_norm": 7.53125, "learning_rate": 0.0004887740476745519, "loss": 17.8396, "step": 45960 }, { "epoch": 0.06811084974136246, "grad_norm": 7.03125, "learning_rate": 0.0004887691087395443, "loss": 17.774, "step": 45980 }, { "epoch": 0.06814047603529085, "grad_norm": 6.78125, "learning_rate": 0.0004887641698045367, "loss": 17.8104, "step": 46000 }, { "epoch": 0.06817010232921923, "grad_norm": 7.0, "learning_rate": 0.0004887592308695292, "loss": 17.77, "step": 46020 }, { "epoch": 0.06819972862314762, "grad_norm": 6.8125, "learning_rate": 0.0004887542919345216, "loss": 17.819, "step": 46040 }, { "epoch": 0.068229354917076, "grad_norm": 7.78125, "learning_rate": 0.000488749352999514, "loss": 17.7926, "step": 46060 }, { "epoch": 0.06825898121100439, "grad_norm": 7.03125, "learning_rate": 0.0004887444140645065, "loss": 17.7642, "step": 46080 }, { "epoch": 0.06828860750493278, "grad_norm": 8.25, "learning_rate": 0.0004887394751294989, "loss": 17.8077, "step": 46100 }, { "epoch": 0.06831823379886116, "grad_norm": 7.3125, "learning_rate": 0.0004887345361944914, "loss": 17.7471, "step": 46120 }, { "epoch": 0.06834786009278955, "grad_norm": 7.28125, "learning_rate": 0.0004887295972594838, "loss": 17.7648, "step": 46140 }, { "epoch": 0.06837748638671794, "grad_norm": 7.34375, "learning_rate": 0.0004887246583244763, "loss": 17.6991, "step": 46160 }, { "epoch": 0.06840711268064632, "grad_norm": 7.78125, "learning_rate": 0.0004887197193894687, "loss": 17.75, "step": 46180 }, { "epoch": 0.06843673897457471, "grad_norm": 7.6875, "learning_rate": 0.000488714780454461, "loss": 17.7536, "step": 46200 }, { "epoch": 0.0684663652685031, "grad_norm": 6.875, "learning_rate": 0.0004887098415194534, "loss": 17.7599, "step": 46220 }, { "epoch": 0.06849599156243148, "grad_norm": 7.96875, "learning_rate": 0.000488704902584446, "loss": 17.7974, "step": 46240 }, { "epoch": 0.06852561785635988, "grad_norm": 7.3125, "learning_rate": 0.0004886999636494383, "loss": 17.7135, "step": 46260 }, { "epoch": 0.06855524415028827, "grad_norm": 6.59375, "learning_rate": 0.0004886950247144307, "loss": 17.7284, "step": 46280 }, { "epoch": 0.06858487044421666, "grad_norm": 7.78125, "learning_rate": 0.0004886900857794232, "loss": 17.7145, "step": 46300 }, { "epoch": 0.06861449673814504, "grad_norm": 7.71875, "learning_rate": 0.0004886851468444156, "loss": 17.7295, "step": 46320 }, { "epoch": 0.06864412303207343, "grad_norm": 7.09375, "learning_rate": 0.0004886802079094081, "loss": 17.747, "step": 46340 }, { "epoch": 0.06867374932600182, "grad_norm": 7.5, "learning_rate": 0.0004886752689744005, "loss": 17.7722, "step": 46360 }, { "epoch": 0.0687033756199302, "grad_norm": 6.34375, "learning_rate": 0.0004886703300393929, "loss": 17.6833, "step": 46380 }, { "epoch": 0.06873300191385859, "grad_norm": 7.03125, "learning_rate": 0.0004886653911043854, "loss": 17.7234, "step": 46400 }, { "epoch": 0.06876262820778697, "grad_norm": 6.4375, "learning_rate": 0.0004886604521693778, "loss": 17.7592, "step": 46420 }, { "epoch": 0.06879225450171536, "grad_norm": 7.40625, "learning_rate": 0.0004886555132343702, "loss": 17.7566, "step": 46440 }, { "epoch": 0.06882188079564375, "grad_norm": 7.53125, "learning_rate": 0.0004886505742993627, "loss": 17.7591, "step": 46460 }, { "epoch": 0.06885150708957213, "grad_norm": 7.09375, "learning_rate": 0.0004886456353643551, "loss": 17.7118, "step": 46480 }, { "epoch": 0.06888113338350052, "grad_norm": 7.53125, "learning_rate": 0.0004886406964293476, "loss": 17.7433, "step": 46500 }, { "epoch": 0.0689107596774289, "grad_norm": 7.5, "learning_rate": 0.00048863575749434, "loss": 17.6708, "step": 46520 }, { "epoch": 0.0689403859713573, "grad_norm": 7.25, "learning_rate": 0.0004886308185593324, "loss": 17.6743, "step": 46540 }, { "epoch": 0.06897001226528568, "grad_norm": 6.625, "learning_rate": 0.0004886258796243249, "loss": 17.8227, "step": 46560 }, { "epoch": 0.06899963855921408, "grad_norm": 6.75, "learning_rate": 0.0004886209406893173, "loss": 17.6357, "step": 46580 }, { "epoch": 0.06902926485314247, "grad_norm": 7.59375, "learning_rate": 0.0004886160017543097, "loss": 17.7412, "step": 46600 }, { "epoch": 0.06905889114707085, "grad_norm": 7.4375, "learning_rate": 0.0004886110628193022, "loss": 17.6527, "step": 46620 }, { "epoch": 0.06908851744099924, "grad_norm": 6.5625, "learning_rate": 0.0004886061238842946, "loss": 17.7538, "step": 46640 }, { "epoch": 0.06911814373492763, "grad_norm": 6.84375, "learning_rate": 0.000488601184949287, "loss": 17.6942, "step": 46660 }, { "epoch": 0.06914777002885601, "grad_norm": 7.25, "learning_rate": 0.0004885962460142795, "loss": 17.7382, "step": 46680 }, { "epoch": 0.0691773963227844, "grad_norm": 7.0, "learning_rate": 0.0004885913070792719, "loss": 17.6836, "step": 46700 }, { "epoch": 0.06920702261671278, "grad_norm": 8.25, "learning_rate": 0.0004885863681442644, "loss": 17.7226, "step": 46720 }, { "epoch": 0.06923664891064117, "grad_norm": 7.03125, "learning_rate": 0.0004885814292092568, "loss": 17.759, "step": 46740 }, { "epoch": 0.06926627520456956, "grad_norm": 7.25, "learning_rate": 0.0004885764902742493, "loss": 17.7323, "step": 46760 }, { "epoch": 0.06929590149849794, "grad_norm": 7.28125, "learning_rate": 0.0004885715513392417, "loss": 17.618, "step": 46780 }, { "epoch": 0.06932552779242633, "grad_norm": 6.75, "learning_rate": 0.000488566612404234, "loss": 17.7459, "step": 46800 }, { "epoch": 0.06935515408635472, "grad_norm": 6.96875, "learning_rate": 0.0004885616734692264, "loss": 17.6692, "step": 46820 }, { "epoch": 0.0693847803802831, "grad_norm": 6.5, "learning_rate": 0.000488556734534219, "loss": 17.7568, "step": 46840 }, { "epoch": 0.06941440667421149, "grad_norm": 8.375, "learning_rate": 0.0004885517955992113, "loss": 17.6868, "step": 46860 }, { "epoch": 0.06944403296813988, "grad_norm": 7.375, "learning_rate": 0.0004885468566642037, "loss": 17.6687, "step": 46880 }, { "epoch": 0.06947365926206828, "grad_norm": 6.6875, "learning_rate": 0.0004885419177291962, "loss": 17.6522, "step": 46900 }, { "epoch": 0.06950328555599666, "grad_norm": 7.71875, "learning_rate": 0.0004885369787941886, "loss": 17.7001, "step": 46920 }, { "epoch": 0.06953291184992505, "grad_norm": 8.875, "learning_rate": 0.0004885320398591811, "loss": 17.6782, "step": 46940 }, { "epoch": 0.06956253814385344, "grad_norm": 7.0, "learning_rate": 0.0004885271009241735, "loss": 17.6051, "step": 46960 }, { "epoch": 0.06959216443778182, "grad_norm": 6.5, "learning_rate": 0.0004885221619891659, "loss": 17.7128, "step": 46980 }, { "epoch": 0.06962179073171021, "grad_norm": 7.21875, "learning_rate": 0.0004885172230541584, "loss": 17.6774, "step": 47000 }, { "epoch": 0.0696514170256386, "grad_norm": 6.65625, "learning_rate": 0.0004885122841191508, "loss": 17.7358, "step": 47020 }, { "epoch": 0.06968104331956698, "grad_norm": 7.40625, "learning_rate": 0.0004885073451841432, "loss": 17.6421, "step": 47040 }, { "epoch": 0.06971066961349537, "grad_norm": 6.75, "learning_rate": 0.0004885024062491357, "loss": 17.6748, "step": 47060 }, { "epoch": 0.06974029590742375, "grad_norm": 7.03125, "learning_rate": 0.0004884974673141281, "loss": 17.6399, "step": 47080 }, { "epoch": 0.06976992220135214, "grad_norm": 6.8125, "learning_rate": 0.0004884925283791206, "loss": 17.5884, "step": 47100 }, { "epoch": 0.06979954849528053, "grad_norm": 6.625, "learning_rate": 0.000488487589444113, "loss": 17.644, "step": 47120 }, { "epoch": 0.06982917478920891, "grad_norm": 7.28125, "learning_rate": 0.0004884826505091055, "loss": 17.6506, "step": 47140 }, { "epoch": 0.0698588010831373, "grad_norm": 6.59375, "learning_rate": 0.0004884777115740979, "loss": 17.6561, "step": 47160 }, { "epoch": 0.06988842737706569, "grad_norm": 6.9375, "learning_rate": 0.0004884727726390903, "loss": 17.6101, "step": 47180 }, { "epoch": 0.06991805367099409, "grad_norm": 6.53125, "learning_rate": 0.0004884678337040827, "loss": 17.6825, "step": 47200 }, { "epoch": 0.06994767996492247, "grad_norm": 7.46875, "learning_rate": 0.0004884628947690752, "loss": 17.6727, "step": 47220 }, { "epoch": 0.06997730625885086, "grad_norm": 6.96875, "learning_rate": 0.0004884579558340676, "loss": 17.7289, "step": 47240 }, { "epoch": 0.07000693255277925, "grad_norm": 7.6875, "learning_rate": 0.00048845301689906, "loss": 17.6251, "step": 47260 }, { "epoch": 0.07003655884670763, "grad_norm": 7.8125, "learning_rate": 0.0004884480779640525, "loss": 17.6003, "step": 47280 }, { "epoch": 0.07006618514063602, "grad_norm": 7.21875, "learning_rate": 0.0004884431390290449, "loss": 17.6923, "step": 47300 }, { "epoch": 0.0700958114345644, "grad_norm": 7.25, "learning_rate": 0.0004884382000940374, "loss": 17.638, "step": 47320 }, { "epoch": 0.07012543772849279, "grad_norm": 7.8125, "learning_rate": 0.0004884332611590298, "loss": 17.6467, "step": 47340 }, { "epoch": 0.07015506402242118, "grad_norm": 7.09375, "learning_rate": 0.0004884283222240223, "loss": 17.6336, "step": 47360 }, { "epoch": 0.07018469031634957, "grad_norm": 7.03125, "learning_rate": 0.0004884233832890147, "loss": 17.5945, "step": 47380 }, { "epoch": 0.07021431661027795, "grad_norm": 7.40625, "learning_rate": 0.000488418444354007, "loss": 17.5602, "step": 47400 }, { "epoch": 0.07024394290420634, "grad_norm": 7.40625, "learning_rate": 0.0004884135054189994, "loss": 17.607, "step": 47420 }, { "epoch": 0.07027356919813472, "grad_norm": 8.5, "learning_rate": 0.000488408566483992, "loss": 17.6248, "step": 47440 }, { "epoch": 0.07030319549206311, "grad_norm": 6.96875, "learning_rate": 0.0004884036275489843, "loss": 17.6394, "step": 47460 }, { "epoch": 0.0703328217859915, "grad_norm": 6.65625, "learning_rate": 0.0004883986886139768, "loss": 17.5978, "step": 47480 }, { "epoch": 0.07036244807991988, "grad_norm": 6.9375, "learning_rate": 0.0004883937496789692, "loss": 17.6058, "step": 47500 }, { "epoch": 0.07039207437384828, "grad_norm": 7.46875, "learning_rate": 0.0004883888107439617, "loss": 17.5592, "step": 47520 }, { "epoch": 0.07042170066777667, "grad_norm": 6.0625, "learning_rate": 0.0004883838718089541, "loss": 17.6782, "step": 47540 }, { "epoch": 0.07045132696170506, "grad_norm": 7.5, "learning_rate": 0.0004883789328739465, "loss": 17.572, "step": 47560 }, { "epoch": 0.07048095325563344, "grad_norm": 7.5625, "learning_rate": 0.0004883739939389389, "loss": 17.6108, "step": 47580 }, { "epoch": 0.07051057954956183, "grad_norm": 7.09375, "learning_rate": 0.0004883690550039314, "loss": 17.558, "step": 47600 }, { "epoch": 0.07054020584349022, "grad_norm": 7.90625, "learning_rate": 0.0004883641160689238, "loss": 17.5706, "step": 47620 }, { "epoch": 0.0705698321374186, "grad_norm": 7.8125, "learning_rate": 0.0004883591771339162, "loss": 17.5645, "step": 47640 }, { "epoch": 0.07059945843134699, "grad_norm": 8.125, "learning_rate": 0.0004883542381989087, "loss": 17.5799, "step": 47660 }, { "epoch": 0.07062908472527538, "grad_norm": 6.96875, "learning_rate": 0.0004883492992639011, "loss": 17.5689, "step": 47680 }, { "epoch": 0.07065871101920376, "grad_norm": 7.40625, "learning_rate": 0.0004883443603288936, "loss": 17.592, "step": 47700 }, { "epoch": 0.07068833731313215, "grad_norm": 7.28125, "learning_rate": 0.000488339421393886, "loss": 17.6168, "step": 47720 }, { "epoch": 0.07071796360706054, "grad_norm": 7.1875, "learning_rate": 0.0004883344824588785, "loss": 17.5894, "step": 47740 }, { "epoch": 0.07074758990098892, "grad_norm": 6.84375, "learning_rate": 0.0004883295435238709, "loss": 17.6241, "step": 47760 }, { "epoch": 0.07077721619491731, "grad_norm": 6.40625, "learning_rate": 0.0004883246045888633, "loss": 17.6344, "step": 47780 }, { "epoch": 0.0708068424888457, "grad_norm": 7.84375, "learning_rate": 0.0004883196656538557, "loss": 17.5855, "step": 47800 }, { "epoch": 0.07083646878277408, "grad_norm": 6.59375, "learning_rate": 0.0004883147267188482, "loss": 17.5906, "step": 47820 }, { "epoch": 0.07086609507670248, "grad_norm": 6.78125, "learning_rate": 0.0004883097877838406, "loss": 17.6254, "step": 47840 }, { "epoch": 0.07089572137063087, "grad_norm": 7.03125, "learning_rate": 0.0004883048488488331, "loss": 17.6337, "step": 47860 }, { "epoch": 0.07092534766455925, "grad_norm": 7.4375, "learning_rate": 0.0004882999099138255, "loss": 17.5921, "step": 47880 }, { "epoch": 0.07095497395848764, "grad_norm": 6.65625, "learning_rate": 0.0004882949709788179, "loss": 17.624, "step": 47900 }, { "epoch": 0.07098460025241603, "grad_norm": 7.21875, "learning_rate": 0.0004882900320438104, "loss": 17.624, "step": 47920 }, { "epoch": 0.07101422654634441, "grad_norm": 6.71875, "learning_rate": 0.0004882850931088028, "loss": 17.5674, "step": 47940 }, { "epoch": 0.0710438528402728, "grad_norm": 7.46875, "learning_rate": 0.0004882801541737952, "loss": 17.5609, "step": 47960 }, { "epoch": 0.07107347913420119, "grad_norm": 7.96875, "learning_rate": 0.00048827521523878766, "loss": 17.5589, "step": 47980 }, { "epoch": 0.07110310542812957, "grad_norm": 7.15625, "learning_rate": 0.00048827027630378005, "loss": 17.6198, "step": 48000 }, { "epoch": 0.07113273172205796, "grad_norm": 7.0, "learning_rate": 0.0004882653373687725, "loss": 17.6268, "step": 48020 }, { "epoch": 0.07116235801598635, "grad_norm": 6.875, "learning_rate": 0.00048826039843376495, "loss": 17.5528, "step": 48040 }, { "epoch": 0.07119198430991473, "grad_norm": 7.53125, "learning_rate": 0.0004882554594987574, "loss": 17.522, "step": 48060 }, { "epoch": 0.07122161060384312, "grad_norm": 6.0625, "learning_rate": 0.0004882505205637498, "loss": 17.5529, "step": 48080 }, { "epoch": 0.0712512368977715, "grad_norm": 7.0, "learning_rate": 0.00048824558162874224, "loss": 17.669, "step": 48100 }, { "epoch": 0.07128086319169989, "grad_norm": 7.4375, "learning_rate": 0.0004882406426937347, "loss": 17.5906, "step": 48120 }, { "epoch": 0.07131048948562828, "grad_norm": 6.625, "learning_rate": 0.00048823570375872713, "loss": 17.5579, "step": 48140 }, { "epoch": 0.07134011577955668, "grad_norm": 6.4375, "learning_rate": 0.0004882307648237195, "loss": 17.5355, "step": 48160 }, { "epoch": 0.07136974207348507, "grad_norm": 7.84375, "learning_rate": 0.000488225825888712, "loss": 17.6029, "step": 48180 }, { "epoch": 0.07139936836741345, "grad_norm": 7.09375, "learning_rate": 0.0004882208869537044, "loss": 17.5423, "step": 48200 }, { "epoch": 0.07142899466134184, "grad_norm": 7.3125, "learning_rate": 0.0004882159480186968, "loss": 17.61, "step": 48220 }, { "epoch": 0.07145862095527022, "grad_norm": 7.15625, "learning_rate": 0.00048821100908368926, "loss": 17.5822, "step": 48240 }, { "epoch": 0.07148824724919861, "grad_norm": 7.375, "learning_rate": 0.0004882060701486817, "loss": 17.5893, "step": 48260 }, { "epoch": 0.071517873543127, "grad_norm": 7.6875, "learning_rate": 0.00048820113121367416, "loss": 17.5874, "step": 48280 }, { "epoch": 0.07154749983705538, "grad_norm": 6.90625, "learning_rate": 0.00048819619227866655, "loss": 17.5317, "step": 48300 }, { "epoch": 0.07157712613098377, "grad_norm": 6.46875, "learning_rate": 0.000488191253343659, "loss": 17.5101, "step": 48320 }, { "epoch": 0.07160675242491216, "grad_norm": 7.53125, "learning_rate": 0.00048818631440865145, "loss": 17.5436, "step": 48340 }, { "epoch": 0.07163637871884054, "grad_norm": 6.625, "learning_rate": 0.0004881813754736439, "loss": 17.5375, "step": 48360 }, { "epoch": 0.07166600501276893, "grad_norm": 7.625, "learning_rate": 0.0004881764365386363, "loss": 17.534, "step": 48380 }, { "epoch": 0.07169563130669732, "grad_norm": 7.3125, "learning_rate": 0.00048817149760362874, "loss": 17.5277, "step": 48400 }, { "epoch": 0.0717252576006257, "grad_norm": 6.4375, "learning_rate": 0.0004881665586686212, "loss": 17.5418, "step": 48420 }, { "epoch": 0.07175488389455409, "grad_norm": 6.5, "learning_rate": 0.00048816161973361363, "loss": 17.5704, "step": 48440 }, { "epoch": 0.07178451018848248, "grad_norm": 7.5, "learning_rate": 0.00048815668079860603, "loss": 17.5242, "step": 48460 }, { "epoch": 0.07181413648241088, "grad_norm": 6.59375, "learning_rate": 0.0004881517418635985, "loss": 17.5374, "step": 48480 }, { "epoch": 0.07184376277633926, "grad_norm": 7.25, "learning_rate": 0.0004881468029285909, "loss": 17.5777, "step": 48500 }, { "epoch": 0.07187338907026765, "grad_norm": 6.25, "learning_rate": 0.00048814186399358337, "loss": 17.6099, "step": 48520 }, { "epoch": 0.07190301536419604, "grad_norm": 7.09375, "learning_rate": 0.00048813692505857576, "loss": 17.5068, "step": 48540 }, { "epoch": 0.07193264165812442, "grad_norm": 8.125, "learning_rate": 0.0004881319861235682, "loss": 17.5705, "step": 48560 }, { "epoch": 0.07196226795205281, "grad_norm": 6.875, "learning_rate": 0.00048812704718856066, "loss": 17.5854, "step": 48580 }, { "epoch": 0.0719918942459812, "grad_norm": 7.65625, "learning_rate": 0.00048812210825355305, "loss": 17.5599, "step": 48600 }, { "epoch": 0.07202152053990958, "grad_norm": 7.125, "learning_rate": 0.0004881171693185455, "loss": 17.529, "step": 48620 }, { "epoch": 0.07205114683383797, "grad_norm": 7.5625, "learning_rate": 0.00048811223038353795, "loss": 17.5414, "step": 48640 }, { "epoch": 0.07208077312776635, "grad_norm": 7.8125, "learning_rate": 0.0004881072914485304, "loss": 17.4846, "step": 48660 }, { "epoch": 0.07211039942169474, "grad_norm": 7.21875, "learning_rate": 0.0004881023525135228, "loss": 17.4992, "step": 48680 }, { "epoch": 0.07214002571562313, "grad_norm": 6.90625, "learning_rate": 0.00048809741357851524, "loss": 17.4995, "step": 48700 }, { "epoch": 0.07216965200955151, "grad_norm": 6.75, "learning_rate": 0.0004880924746435077, "loss": 17.5419, "step": 48720 }, { "epoch": 0.0721992783034799, "grad_norm": 6.34375, "learning_rate": 0.00048808753570850013, "loss": 17.4992, "step": 48740 }, { "epoch": 0.07222890459740829, "grad_norm": 6.21875, "learning_rate": 0.00048808259677349253, "loss": 17.5407, "step": 48760 }, { "epoch": 0.07225853089133667, "grad_norm": 7.5, "learning_rate": 0.000488077657838485, "loss": 17.4798, "step": 48780 }, { "epoch": 0.07228815718526507, "grad_norm": 7.375, "learning_rate": 0.0004880727189034774, "loss": 17.5251, "step": 48800 }, { "epoch": 0.07231778347919346, "grad_norm": 6.9375, "learning_rate": 0.00048806777996846987, "loss": 17.5093, "step": 48820 }, { "epoch": 0.07234740977312185, "grad_norm": 7.3125, "learning_rate": 0.00048806284103346226, "loss": 17.515, "step": 48840 }, { "epoch": 0.07237703606705023, "grad_norm": 7.375, "learning_rate": 0.00048805790209845477, "loss": 17.4882, "step": 48860 }, { "epoch": 0.07240666236097862, "grad_norm": 7.1875, "learning_rate": 0.00048805296316344716, "loss": 17.5369, "step": 48880 }, { "epoch": 0.072436288654907, "grad_norm": 6.90625, "learning_rate": 0.00048804802422843955, "loss": 17.5024, "step": 48900 }, { "epoch": 0.07246591494883539, "grad_norm": 6.78125, "learning_rate": 0.000488043085293432, "loss": 17.5327, "step": 48920 }, { "epoch": 0.07249554124276378, "grad_norm": 7.4375, "learning_rate": 0.00048803814635842445, "loss": 17.5123, "step": 48940 }, { "epoch": 0.07252516753669216, "grad_norm": 7.0625, "learning_rate": 0.0004880332074234169, "loss": 17.4921, "step": 48960 }, { "epoch": 0.07255479383062055, "grad_norm": 7.375, "learning_rate": 0.0004880282684884093, "loss": 17.5089, "step": 48980 }, { "epoch": 0.07258442012454894, "grad_norm": 7.28125, "learning_rate": 0.00048802332955340174, "loss": 17.4868, "step": 49000 }, { "epoch": 0.07261404641847732, "grad_norm": 6.8125, "learning_rate": 0.0004880183906183942, "loss": 17.4997, "step": 49020 }, { "epoch": 0.07264367271240571, "grad_norm": 6.875, "learning_rate": 0.00048801345168338663, "loss": 17.5002, "step": 49040 }, { "epoch": 0.0726732990063341, "grad_norm": 7.125, "learning_rate": 0.00048800851274837903, "loss": 17.4802, "step": 49060 }, { "epoch": 0.07270292530026248, "grad_norm": 6.6875, "learning_rate": 0.0004880035738133715, "loss": 17.4853, "step": 49080 }, { "epoch": 0.07273255159419087, "grad_norm": 6.8125, "learning_rate": 0.0004879986348783639, "loss": 17.4769, "step": 49100 }, { "epoch": 0.07276217788811927, "grad_norm": 7.4375, "learning_rate": 0.00048799369594335637, "loss": 17.4832, "step": 49120 }, { "epoch": 0.07279180418204766, "grad_norm": 7.03125, "learning_rate": 0.00048798875700834876, "loss": 17.4134, "step": 49140 }, { "epoch": 0.07282143047597604, "grad_norm": 7.1875, "learning_rate": 0.00048798381807334127, "loss": 17.4654, "step": 49160 }, { "epoch": 0.07285105676990443, "grad_norm": 7.46875, "learning_rate": 0.00048797887913833366, "loss": 17.4494, "step": 49180 }, { "epoch": 0.07288068306383282, "grad_norm": 7.15625, "learning_rate": 0.0004879739402033261, "loss": 17.4735, "step": 49200 }, { "epoch": 0.0729103093577612, "grad_norm": 6.90625, "learning_rate": 0.0004879690012683185, "loss": 17.4226, "step": 49220 }, { "epoch": 0.07293993565168959, "grad_norm": 7.0625, "learning_rate": 0.00048796406233331095, "loss": 17.4748, "step": 49240 }, { "epoch": 0.07296956194561798, "grad_norm": 6.5625, "learning_rate": 0.0004879591233983034, "loss": 17.4576, "step": 49260 }, { "epoch": 0.07299918823954636, "grad_norm": 7.1875, "learning_rate": 0.0004879541844632958, "loss": 17.4779, "step": 49280 }, { "epoch": 0.07302881453347475, "grad_norm": 6.65625, "learning_rate": 0.00048794924552828824, "loss": 17.4941, "step": 49300 }, { "epoch": 0.07305844082740313, "grad_norm": 7.25, "learning_rate": 0.0004879443065932807, "loss": 17.5034, "step": 49320 }, { "epoch": 0.07308806712133152, "grad_norm": 6.5, "learning_rate": 0.00048793936765827313, "loss": 17.4597, "step": 49340 }, { "epoch": 0.07311769341525991, "grad_norm": 7.84375, "learning_rate": 0.00048793442872326553, "loss": 17.4733, "step": 49360 }, { "epoch": 0.0731473197091883, "grad_norm": 7.375, "learning_rate": 0.000487929489788258, "loss": 17.6096, "step": 49380 }, { "epoch": 0.07317694600311668, "grad_norm": 7.65625, "learning_rate": 0.0004879245508532504, "loss": 17.5011, "step": 49400 }, { "epoch": 0.07320657229704507, "grad_norm": 7.4375, "learning_rate": 0.00048791961191824287, "loss": 17.4563, "step": 49420 }, { "epoch": 0.07323619859097347, "grad_norm": 6.84375, "learning_rate": 0.00048791467298323527, "loss": 17.513, "step": 49440 }, { "epoch": 0.07326582488490185, "grad_norm": 7.40625, "learning_rate": 0.00048790973404822777, "loss": 17.4962, "step": 49460 }, { "epoch": 0.07329545117883024, "grad_norm": 6.53125, "learning_rate": 0.00048790479511322016, "loss": 17.4876, "step": 49480 }, { "epoch": 0.07332507747275863, "grad_norm": 7.40625, "learning_rate": 0.0004878998561782126, "loss": 17.4253, "step": 49500 }, { "epoch": 0.07335470376668701, "grad_norm": 7.25, "learning_rate": 0.000487894917243205, "loss": 17.4036, "step": 49520 }, { "epoch": 0.0733843300606154, "grad_norm": 6.09375, "learning_rate": 0.0004878899783081975, "loss": 17.4823, "step": 49540 }, { "epoch": 0.07341395635454379, "grad_norm": 7.25, "learning_rate": 0.0004878850393731899, "loss": 17.4558, "step": 49560 }, { "epoch": 0.07344358264847217, "grad_norm": 7.0625, "learning_rate": 0.0004878801004381823, "loss": 17.5073, "step": 49580 }, { "epoch": 0.07347320894240056, "grad_norm": 7.09375, "learning_rate": 0.00048787516150317474, "loss": 17.4589, "step": 49600 }, { "epoch": 0.07350283523632895, "grad_norm": 6.84375, "learning_rate": 0.0004878702225681672, "loss": 17.4994, "step": 49620 }, { "epoch": 0.07353246153025733, "grad_norm": 6.125, "learning_rate": 0.00048786528363315963, "loss": 17.4702, "step": 49640 }, { "epoch": 0.07356208782418572, "grad_norm": 7.5, "learning_rate": 0.00048786034469815203, "loss": 17.4532, "step": 49660 }, { "epoch": 0.0735917141181141, "grad_norm": 7.71875, "learning_rate": 0.0004878554057631445, "loss": 17.3864, "step": 49680 }, { "epoch": 0.07362134041204249, "grad_norm": 7.09375, "learning_rate": 0.0004878504668281369, "loss": 17.4477, "step": 49700 }, { "epoch": 0.07365096670597088, "grad_norm": 7.0625, "learning_rate": 0.00048784552789312937, "loss": 17.4052, "step": 49720 }, { "epoch": 0.07368059299989926, "grad_norm": 6.59375, "learning_rate": 0.00048784058895812177, "loss": 17.4343, "step": 49740 }, { "epoch": 0.07371021929382766, "grad_norm": 7.125, "learning_rate": 0.00048783565002311427, "loss": 17.4583, "step": 49760 }, { "epoch": 0.07373984558775605, "grad_norm": 6.9375, "learning_rate": 0.00048783071108810666, "loss": 17.4553, "step": 49780 }, { "epoch": 0.07376947188168444, "grad_norm": 7.09375, "learning_rate": 0.0004878257721530991, "loss": 17.4428, "step": 49800 }, { "epoch": 0.07379909817561282, "grad_norm": 7.03125, "learning_rate": 0.0004878208332180915, "loss": 17.4838, "step": 49820 }, { "epoch": 0.07382872446954121, "grad_norm": 8.375, "learning_rate": 0.000487815894283084, "loss": 17.4149, "step": 49840 }, { "epoch": 0.0738583507634696, "grad_norm": 7.5625, "learning_rate": 0.0004878109553480764, "loss": 17.3965, "step": 49860 }, { "epoch": 0.07388797705739798, "grad_norm": 6.59375, "learning_rate": 0.00048780601641306885, "loss": 17.4492, "step": 49880 }, { "epoch": 0.07391760335132637, "grad_norm": 8.875, "learning_rate": 0.00048780107747806124, "loss": 17.446, "step": 49900 }, { "epoch": 0.07394722964525476, "grad_norm": 6.5625, "learning_rate": 0.0004877961385430537, "loss": 17.4262, "step": 49920 }, { "epoch": 0.07397685593918314, "grad_norm": 7.53125, "learning_rate": 0.00048779119960804614, "loss": 17.4405, "step": 49940 }, { "epoch": 0.07400648223311153, "grad_norm": 7.65625, "learning_rate": 0.00048778626067303853, "loss": 17.4488, "step": 49960 }, { "epoch": 0.07403610852703991, "grad_norm": 6.59375, "learning_rate": 0.000487781321738031, "loss": 17.4147, "step": 49980 }, { "epoch": 0.0740657348209683, "grad_norm": 7.0625, "learning_rate": 0.0004877763828030234, "loss": 17.4521, "step": 50000 }, { "epoch": 0.07409536111489669, "grad_norm": 8.25, "learning_rate": 0.00048777144386801587, "loss": 17.4544, "step": 50020 }, { "epoch": 0.07412498740882507, "grad_norm": 7.0625, "learning_rate": 0.00048776650493300827, "loss": 17.4944, "step": 50040 }, { "epoch": 0.07415461370275347, "grad_norm": 7.4375, "learning_rate": 0.00048776156599800077, "loss": 17.4618, "step": 50060 }, { "epoch": 0.07418423999668186, "grad_norm": 6.53125, "learning_rate": 0.00048775662706299316, "loss": 17.479, "step": 50080 }, { "epoch": 0.07421386629061025, "grad_norm": 7.84375, "learning_rate": 0.0004877516881279856, "loss": 17.4405, "step": 50100 }, { "epoch": 0.07424349258453863, "grad_norm": 7.03125, "learning_rate": 0.000487746749192978, "loss": 17.4613, "step": 50120 }, { "epoch": 0.07427311887846702, "grad_norm": 8.5625, "learning_rate": 0.0004877418102579705, "loss": 17.4064, "step": 50140 }, { "epoch": 0.0743027451723954, "grad_norm": 6.6875, "learning_rate": 0.0004877368713229629, "loss": 17.4234, "step": 50160 }, { "epoch": 0.0743323714663238, "grad_norm": 6.625, "learning_rate": 0.00048773193238795535, "loss": 17.4364, "step": 50180 }, { "epoch": 0.07436199776025218, "grad_norm": 6.65625, "learning_rate": 0.00048772699345294774, "loss": 17.4157, "step": 50200 }, { "epoch": 0.07439162405418057, "grad_norm": 6.03125, "learning_rate": 0.00048772205451794024, "loss": 17.3633, "step": 50220 }, { "epoch": 0.07442125034810895, "grad_norm": 6.90625, "learning_rate": 0.00048771711558293264, "loss": 17.4643, "step": 50240 }, { "epoch": 0.07445087664203734, "grad_norm": 7.4375, "learning_rate": 0.00048771217664792503, "loss": 17.4576, "step": 50260 }, { "epoch": 0.07448050293596573, "grad_norm": 6.375, "learning_rate": 0.0004877072377129175, "loss": 17.3961, "step": 50280 }, { "epoch": 0.07451012922989411, "grad_norm": 6.1875, "learning_rate": 0.0004877022987779099, "loss": 17.4321, "step": 50300 }, { "epoch": 0.0745397555238225, "grad_norm": 7.03125, "learning_rate": 0.00048769735984290237, "loss": 17.4214, "step": 50320 }, { "epoch": 0.07456938181775088, "grad_norm": 7.875, "learning_rate": 0.00048769242090789477, "loss": 17.4401, "step": 50340 }, { "epoch": 0.07459900811167927, "grad_norm": 7.03125, "learning_rate": 0.00048768748197288727, "loss": 17.4159, "step": 50360 }, { "epoch": 0.07462863440560767, "grad_norm": 6.875, "learning_rate": 0.00048768254303787966, "loss": 17.3766, "step": 50380 }, { "epoch": 0.07465826069953606, "grad_norm": 6.8125, "learning_rate": 0.0004876776041028721, "loss": 17.3769, "step": 50400 }, { "epoch": 0.07468788699346444, "grad_norm": 7.90625, "learning_rate": 0.0004876726651678645, "loss": 17.3957, "step": 50420 }, { "epoch": 0.07471751328739283, "grad_norm": 6.1875, "learning_rate": 0.000487667726232857, "loss": 17.3538, "step": 50440 }, { "epoch": 0.07474713958132122, "grad_norm": 7.25, "learning_rate": 0.0004876627872978494, "loss": 17.376, "step": 50460 }, { "epoch": 0.0747767658752496, "grad_norm": 6.5, "learning_rate": 0.00048765784836284185, "loss": 17.4275, "step": 50480 }, { "epoch": 0.07480639216917799, "grad_norm": 6.34375, "learning_rate": 0.00048765290942783424, "loss": 17.3695, "step": 50500 }, { "epoch": 0.07483601846310638, "grad_norm": 7.03125, "learning_rate": 0.00048764797049282674, "loss": 17.3595, "step": 50520 }, { "epoch": 0.07486564475703476, "grad_norm": 8.1875, "learning_rate": 0.00048764303155781914, "loss": 17.3446, "step": 50540 }, { "epoch": 0.07489527105096315, "grad_norm": 7.0625, "learning_rate": 0.0004876380926228116, "loss": 17.3722, "step": 50560 }, { "epoch": 0.07492489734489154, "grad_norm": 7.3125, "learning_rate": 0.000487633153687804, "loss": 17.3849, "step": 50580 }, { "epoch": 0.07495452363881992, "grad_norm": 7.4375, "learning_rate": 0.0004876282147527964, "loss": 17.4023, "step": 50600 }, { "epoch": 0.07498414993274831, "grad_norm": 7.5625, "learning_rate": 0.00048762327581778887, "loss": 17.3755, "step": 50620 }, { "epoch": 0.0750137762266767, "grad_norm": 7.1875, "learning_rate": 0.00048761833688278127, "loss": 17.3693, "step": 50640 }, { "epoch": 0.07504340252060508, "grad_norm": 6.28125, "learning_rate": 0.00048761339794777377, "loss": 17.3601, "step": 50660 }, { "epoch": 0.07507302881453347, "grad_norm": 6.65625, "learning_rate": 0.00048760845901276616, "loss": 17.3749, "step": 50680 }, { "epoch": 0.07510265510846187, "grad_norm": 7.375, "learning_rate": 0.0004876035200777586, "loss": 17.3913, "step": 50700 }, { "epoch": 0.07513228140239026, "grad_norm": 7.0625, "learning_rate": 0.000487598581142751, "loss": 17.3392, "step": 50720 }, { "epoch": 0.07516190769631864, "grad_norm": 7.25, "learning_rate": 0.0004875936422077435, "loss": 17.3609, "step": 50740 }, { "epoch": 0.07519153399024703, "grad_norm": 6.53125, "learning_rate": 0.0004875887032727359, "loss": 17.4028, "step": 50760 }, { "epoch": 0.07522116028417541, "grad_norm": 6.625, "learning_rate": 0.00048758376433772835, "loss": 17.3713, "step": 50780 }, { "epoch": 0.0752507865781038, "grad_norm": 7.15625, "learning_rate": 0.00048757882540272074, "loss": 17.3517, "step": 50800 }, { "epoch": 0.07528041287203219, "grad_norm": 7.0, "learning_rate": 0.00048757388646771324, "loss": 17.3126, "step": 50820 }, { "epoch": 0.07531003916596057, "grad_norm": 6.6875, "learning_rate": 0.00048756894753270564, "loss": 17.3462, "step": 50840 }, { "epoch": 0.07533966545988896, "grad_norm": 6.59375, "learning_rate": 0.0004875640085976981, "loss": 17.3688, "step": 50860 }, { "epoch": 0.07536929175381735, "grad_norm": 7.8125, "learning_rate": 0.0004875590696626905, "loss": 17.3618, "step": 50880 }, { "epoch": 0.07539891804774573, "grad_norm": 7.15625, "learning_rate": 0.000487554130727683, "loss": 17.4048, "step": 50900 }, { "epoch": 0.07542854434167412, "grad_norm": 6.84375, "learning_rate": 0.0004875491917926754, "loss": 17.3509, "step": 50920 }, { "epoch": 0.0754581706356025, "grad_norm": 6.25, "learning_rate": 0.00048754425285766777, "loss": 17.423, "step": 50940 }, { "epoch": 0.07548779692953089, "grad_norm": 6.84375, "learning_rate": 0.00048753931392266027, "loss": 17.3699, "step": 50960 }, { "epoch": 0.07551742322345928, "grad_norm": 7.71875, "learning_rate": 0.00048753437498765266, "loss": 17.3798, "step": 50980 }, { "epoch": 0.07554704951738767, "grad_norm": 7.59375, "learning_rate": 0.0004875294360526451, "loss": 17.377, "step": 51000 }, { "epoch": 0.07557667581131607, "grad_norm": 6.90625, "learning_rate": 0.0004875244971176375, "loss": 17.431, "step": 51020 }, { "epoch": 0.07560630210524445, "grad_norm": 7.125, "learning_rate": 0.00048751955818263, "loss": 17.3491, "step": 51040 }, { "epoch": 0.07563592839917284, "grad_norm": 7.53125, "learning_rate": 0.0004875146192476224, "loss": 17.3375, "step": 51060 }, { "epoch": 0.07566555469310123, "grad_norm": 7.75, "learning_rate": 0.00048750968031261485, "loss": 17.3697, "step": 51080 }, { "epoch": 0.07569518098702961, "grad_norm": 7.6875, "learning_rate": 0.00048750474137760724, "loss": 17.4036, "step": 51100 }, { "epoch": 0.075724807280958, "grad_norm": 6.0, "learning_rate": 0.00048749980244259974, "loss": 17.3631, "step": 51120 }, { "epoch": 0.07575443357488638, "grad_norm": 6.875, "learning_rate": 0.00048749486350759214, "loss": 17.3267, "step": 51140 }, { "epoch": 0.07578405986881477, "grad_norm": 6.78125, "learning_rate": 0.0004874899245725846, "loss": 17.353, "step": 51160 }, { "epoch": 0.07581368616274316, "grad_norm": 6.5625, "learning_rate": 0.000487484985637577, "loss": 17.3355, "step": 51180 }, { "epoch": 0.07584331245667154, "grad_norm": 6.625, "learning_rate": 0.0004874800467025695, "loss": 17.3533, "step": 51200 }, { "epoch": 0.07587293875059993, "grad_norm": 6.90625, "learning_rate": 0.0004874751077675619, "loss": 17.2867, "step": 51220 }, { "epoch": 0.07590256504452832, "grad_norm": 7.4375, "learning_rate": 0.0004874701688325543, "loss": 17.3874, "step": 51240 }, { "epoch": 0.0759321913384567, "grad_norm": 7.03125, "learning_rate": 0.00048746522989754677, "loss": 17.3779, "step": 51260 }, { "epoch": 0.07596181763238509, "grad_norm": 6.75, "learning_rate": 0.00048746029096253916, "loss": 17.2991, "step": 51280 }, { "epoch": 0.07599144392631348, "grad_norm": 6.75, "learning_rate": 0.0004874553520275316, "loss": 17.3238, "step": 51300 }, { "epoch": 0.07602107022024186, "grad_norm": 7.34375, "learning_rate": 0.000487450413092524, "loss": 17.3291, "step": 51320 }, { "epoch": 0.07605069651417026, "grad_norm": 6.53125, "learning_rate": 0.0004874454741575165, "loss": 17.3827, "step": 51340 }, { "epoch": 0.07608032280809865, "grad_norm": 7.46875, "learning_rate": 0.0004874405352225089, "loss": 17.3967, "step": 51360 }, { "epoch": 0.07610994910202704, "grad_norm": 6.375, "learning_rate": 0.00048743559628750135, "loss": 17.2831, "step": 51380 }, { "epoch": 0.07613957539595542, "grad_norm": 7.09375, "learning_rate": 0.00048743065735249374, "loss": 17.3631, "step": 51400 }, { "epoch": 0.07616920168988381, "grad_norm": 7.125, "learning_rate": 0.00048742571841748624, "loss": 17.3253, "step": 51420 }, { "epoch": 0.0761988279838122, "grad_norm": 6.65625, "learning_rate": 0.00048742077948247864, "loss": 17.3055, "step": 51440 }, { "epoch": 0.07622845427774058, "grad_norm": 6.625, "learning_rate": 0.0004874158405474711, "loss": 17.2605, "step": 51460 }, { "epoch": 0.07625808057166897, "grad_norm": 6.8125, "learning_rate": 0.0004874109016124635, "loss": 17.3248, "step": 51480 }, { "epoch": 0.07628770686559735, "grad_norm": 7.75, "learning_rate": 0.000487405962677456, "loss": 17.3097, "step": 51500 }, { "epoch": 0.07631733315952574, "grad_norm": 7.40625, "learning_rate": 0.0004874010237424484, "loss": 17.3779, "step": 51520 }, { "epoch": 0.07634695945345413, "grad_norm": 6.8125, "learning_rate": 0.0004873960848074408, "loss": 17.3135, "step": 51540 }, { "epoch": 0.07637658574738251, "grad_norm": 7.0625, "learning_rate": 0.00048739114587243327, "loss": 17.3131, "step": 51560 }, { "epoch": 0.0764062120413109, "grad_norm": 6.9375, "learning_rate": 0.0004873862069374257, "loss": 17.3324, "step": 51580 }, { "epoch": 0.07643583833523929, "grad_norm": 7.96875, "learning_rate": 0.0004873812680024181, "loss": 17.2835, "step": 51600 }, { "epoch": 0.07646546462916767, "grad_norm": 6.78125, "learning_rate": 0.0004873763290674105, "loss": 17.3365, "step": 51620 }, { "epoch": 0.07649509092309606, "grad_norm": 8.3125, "learning_rate": 0.000487371390132403, "loss": 17.3028, "step": 51640 }, { "epoch": 0.07652471721702446, "grad_norm": 6.9375, "learning_rate": 0.0004873664511973954, "loss": 17.3042, "step": 51660 }, { "epoch": 0.07655434351095285, "grad_norm": 7.09375, "learning_rate": 0.00048736151226238785, "loss": 17.2441, "step": 51680 }, { "epoch": 0.07658396980488123, "grad_norm": 7.25, "learning_rate": 0.00048735657332738024, "loss": 17.2984, "step": 51700 }, { "epoch": 0.07661359609880962, "grad_norm": 7.5, "learning_rate": 0.00048735163439237274, "loss": 17.3066, "step": 51720 }, { "epoch": 0.076643222392738, "grad_norm": 6.21875, "learning_rate": 0.00048734669545736514, "loss": 17.3059, "step": 51740 }, { "epoch": 0.07667284868666639, "grad_norm": 6.84375, "learning_rate": 0.0004873417565223576, "loss": 17.323, "step": 51760 }, { "epoch": 0.07670247498059478, "grad_norm": 6.75, "learning_rate": 0.00048733681758735, "loss": 17.3962, "step": 51780 }, { "epoch": 0.07673210127452317, "grad_norm": 7.125, "learning_rate": 0.0004873318786523425, "loss": 17.3256, "step": 51800 }, { "epoch": 0.07676172756845155, "grad_norm": 6.46875, "learning_rate": 0.0004873269397173349, "loss": 17.2537, "step": 51820 }, { "epoch": 0.07679135386237994, "grad_norm": 6.6875, "learning_rate": 0.0004873220007823273, "loss": 17.2684, "step": 51840 }, { "epoch": 0.07682098015630832, "grad_norm": 6.59375, "learning_rate": 0.00048731706184731977, "loss": 17.2797, "step": 51860 }, { "epoch": 0.07685060645023671, "grad_norm": 7.0, "learning_rate": 0.0004873121229123122, "loss": 17.3257, "step": 51880 }, { "epoch": 0.0768802327441651, "grad_norm": 6.65625, "learning_rate": 0.0004873071839773046, "loss": 17.3123, "step": 51900 }, { "epoch": 0.07690985903809348, "grad_norm": 7.125, "learning_rate": 0.00048730224504229706, "loss": 17.3099, "step": 51920 }, { "epoch": 0.07693948533202187, "grad_norm": 7.625, "learning_rate": 0.0004872973061072895, "loss": 17.3529, "step": 51940 }, { "epoch": 0.07696911162595026, "grad_norm": 7.40625, "learning_rate": 0.0004872923671722819, "loss": 17.3129, "step": 51960 }, { "epoch": 0.07699873791987866, "grad_norm": 7.0, "learning_rate": 0.00048728742823727435, "loss": 17.2937, "step": 51980 }, { "epoch": 0.07702836421380704, "grad_norm": 7.59375, "learning_rate": 0.00048728248930226674, "loss": 17.2637, "step": 52000 }, { "epoch": 0.07705799050773543, "grad_norm": 7.1875, "learning_rate": 0.00048727755036725924, "loss": 17.3102, "step": 52020 }, { "epoch": 0.07708761680166382, "grad_norm": 7.21875, "learning_rate": 0.00048727261143225164, "loss": 17.3201, "step": 52040 }, { "epoch": 0.0771172430955922, "grad_norm": 7.3125, "learning_rate": 0.0004872676724972441, "loss": 17.2724, "step": 52060 }, { "epoch": 0.07714686938952059, "grad_norm": 7.03125, "learning_rate": 0.0004872627335622365, "loss": 17.3148, "step": 52080 }, { "epoch": 0.07717649568344898, "grad_norm": 7.125, "learning_rate": 0.000487257794627229, "loss": 17.3009, "step": 52100 }, { "epoch": 0.07720612197737736, "grad_norm": 6.65625, "learning_rate": 0.0004872528556922214, "loss": 17.3, "step": 52120 }, { "epoch": 0.07723574827130575, "grad_norm": 7.8125, "learning_rate": 0.0004872479167572138, "loss": 17.2864, "step": 52140 }, { "epoch": 0.07726537456523414, "grad_norm": 6.53125, "learning_rate": 0.00048724297782220627, "loss": 17.2851, "step": 52160 }, { "epoch": 0.07729500085916252, "grad_norm": 7.3125, "learning_rate": 0.0004872380388871987, "loss": 17.3464, "step": 52180 }, { "epoch": 0.07732462715309091, "grad_norm": 7.40625, "learning_rate": 0.0004872330999521911, "loss": 17.2766, "step": 52200 }, { "epoch": 0.0773542534470193, "grad_norm": 7.09375, "learning_rate": 0.00048722816101718356, "loss": 17.3107, "step": 52220 }, { "epoch": 0.07738387974094768, "grad_norm": 7.15625, "learning_rate": 0.000487223222082176, "loss": 17.3594, "step": 52240 }, { "epoch": 0.07741350603487607, "grad_norm": 6.875, "learning_rate": 0.00048721828314716845, "loss": 17.2498, "step": 52260 }, { "epoch": 0.07744313232880445, "grad_norm": 6.8125, "learning_rate": 0.00048721334421216085, "loss": 17.2867, "step": 52280 }, { "epoch": 0.07747275862273285, "grad_norm": 7.34375, "learning_rate": 0.00048720840527715324, "loss": 17.2789, "step": 52300 }, { "epoch": 0.07750238491666124, "grad_norm": 7.25, "learning_rate": 0.00048720346634214574, "loss": 17.3081, "step": 52320 }, { "epoch": 0.07753201121058963, "grad_norm": 6.65625, "learning_rate": 0.00048719852740713814, "loss": 17.2848, "step": 52340 }, { "epoch": 0.07756163750451801, "grad_norm": 7.34375, "learning_rate": 0.0004871935884721306, "loss": 17.3466, "step": 52360 }, { "epoch": 0.0775912637984464, "grad_norm": 7.15625, "learning_rate": 0.000487188649537123, "loss": 17.3124, "step": 52380 }, { "epoch": 0.07762089009237479, "grad_norm": 6.84375, "learning_rate": 0.0004871837106021155, "loss": 17.3499, "step": 52400 }, { "epoch": 0.07765051638630317, "grad_norm": 7.21875, "learning_rate": 0.0004871787716671079, "loss": 17.2475, "step": 52420 }, { "epoch": 0.07768014268023156, "grad_norm": 7.375, "learning_rate": 0.0004871738327321003, "loss": 17.2463, "step": 52440 }, { "epoch": 0.07770976897415995, "grad_norm": 6.84375, "learning_rate": 0.00048716889379709277, "loss": 17.3136, "step": 52460 }, { "epoch": 0.07773939526808833, "grad_norm": 6.5, "learning_rate": 0.0004871639548620852, "loss": 17.225, "step": 52480 }, { "epoch": 0.07776902156201672, "grad_norm": 6.90625, "learning_rate": 0.0004871590159270776, "loss": 17.2099, "step": 52500 }, { "epoch": 0.0777986478559451, "grad_norm": 7.0625, "learning_rate": 0.00048715407699207006, "loss": 17.3425, "step": 52520 }, { "epoch": 0.07782827414987349, "grad_norm": 7.6875, "learning_rate": 0.0004871491380570625, "loss": 17.283, "step": 52540 }, { "epoch": 0.07785790044380188, "grad_norm": 6.75, "learning_rate": 0.00048714419912205496, "loss": 17.267, "step": 52560 }, { "epoch": 0.07788752673773026, "grad_norm": 7.125, "learning_rate": 0.00048713926018704735, "loss": 17.2784, "step": 52580 }, { "epoch": 0.07791715303165865, "grad_norm": 6.96875, "learning_rate": 0.00048713432125203974, "loss": 17.2776, "step": 52600 }, { "epoch": 0.07794677932558705, "grad_norm": 6.71875, "learning_rate": 0.00048712938231703224, "loss": 17.2216, "step": 52620 }, { "epoch": 0.07797640561951544, "grad_norm": 7.65625, "learning_rate": 0.00048712444338202464, "loss": 17.2506, "step": 52640 }, { "epoch": 0.07800603191344382, "grad_norm": 6.84375, "learning_rate": 0.0004871195044470171, "loss": 17.2506, "step": 52660 }, { "epoch": 0.07803565820737221, "grad_norm": 7.46875, "learning_rate": 0.0004871145655120095, "loss": 17.2251, "step": 52680 }, { "epoch": 0.0780652845013006, "grad_norm": 7.25, "learning_rate": 0.000487109626577002, "loss": 17.2623, "step": 52700 }, { "epoch": 0.07809491079522898, "grad_norm": 6.875, "learning_rate": 0.0004871046876419944, "loss": 17.2404, "step": 52720 }, { "epoch": 0.07812453708915737, "grad_norm": 6.8125, "learning_rate": 0.0004870997487069868, "loss": 17.3015, "step": 52740 }, { "epoch": 0.07815416338308576, "grad_norm": 7.28125, "learning_rate": 0.00048709480977197927, "loss": 17.1941, "step": 52760 }, { "epoch": 0.07818378967701414, "grad_norm": 7.25, "learning_rate": 0.0004870898708369717, "loss": 17.2816, "step": 52780 }, { "epoch": 0.07821341597094253, "grad_norm": 7.09375, "learning_rate": 0.0004870849319019641, "loss": 17.1902, "step": 52800 }, { "epoch": 0.07824304226487092, "grad_norm": 7.40625, "learning_rate": 0.00048707999296695656, "loss": 17.233, "step": 52820 }, { "epoch": 0.0782726685587993, "grad_norm": 7.03125, "learning_rate": 0.000487075054031949, "loss": 17.2097, "step": 52840 }, { "epoch": 0.07830229485272769, "grad_norm": 6.6875, "learning_rate": 0.00048707011509694146, "loss": 17.2797, "step": 52860 }, { "epoch": 0.07833192114665607, "grad_norm": 7.0625, "learning_rate": 0.00048706517616193385, "loss": 17.235, "step": 52880 }, { "epoch": 0.07836154744058446, "grad_norm": 6.5625, "learning_rate": 0.0004870602372269263, "loss": 17.2324, "step": 52900 }, { "epoch": 0.07839117373451286, "grad_norm": 7.0625, "learning_rate": 0.00048705529829191874, "loss": 17.2295, "step": 52920 }, { "epoch": 0.07842080002844125, "grad_norm": 7.125, "learning_rate": 0.0004870503593569112, "loss": 17.1948, "step": 52940 }, { "epoch": 0.07845042632236963, "grad_norm": 7.46875, "learning_rate": 0.0004870454204219036, "loss": 17.2916, "step": 52960 }, { "epoch": 0.07848005261629802, "grad_norm": 7.375, "learning_rate": 0.000487040481486896, "loss": 17.222, "step": 52980 }, { "epoch": 0.07850967891022641, "grad_norm": 7.5, "learning_rate": 0.0004870355425518885, "loss": 17.2454, "step": 53000 }, { "epoch": 0.0785393052041548, "grad_norm": 6.59375, "learning_rate": 0.0004870306036168809, "loss": 17.2013, "step": 53020 }, { "epoch": 0.07856893149808318, "grad_norm": 6.53125, "learning_rate": 0.0004870256646818733, "loss": 17.2211, "step": 53040 }, { "epoch": 0.07859855779201157, "grad_norm": 7.0625, "learning_rate": 0.00048702072574686577, "loss": 17.1865, "step": 53060 }, { "epoch": 0.07862818408593995, "grad_norm": 7.75, "learning_rate": 0.0004870157868118582, "loss": 17.2641, "step": 53080 }, { "epoch": 0.07865781037986834, "grad_norm": 6.34375, "learning_rate": 0.0004870108478768506, "loss": 17.2733, "step": 53100 }, { "epoch": 0.07868743667379673, "grad_norm": 7.21875, "learning_rate": 0.00048700590894184306, "loss": 17.2437, "step": 53120 }, { "epoch": 0.07871706296772511, "grad_norm": 7.71875, "learning_rate": 0.0004870009700068355, "loss": 17.2441, "step": 53140 }, { "epoch": 0.0787466892616535, "grad_norm": 7.1875, "learning_rate": 0.00048699603107182796, "loss": 17.2594, "step": 53160 }, { "epoch": 0.07877631555558189, "grad_norm": 6.40625, "learning_rate": 0.00048699109213682035, "loss": 17.2052, "step": 53180 }, { "epoch": 0.07880594184951027, "grad_norm": 6.4375, "learning_rate": 0.0004869861532018128, "loss": 17.1867, "step": 53200 }, { "epoch": 0.07883556814343866, "grad_norm": 6.875, "learning_rate": 0.00048698121426680524, "loss": 17.2648, "step": 53220 }, { "epoch": 0.07886519443736706, "grad_norm": 7.65625, "learning_rate": 0.0004869762753317977, "loss": 17.2487, "step": 53240 }, { "epoch": 0.07889482073129545, "grad_norm": 7.25, "learning_rate": 0.0004869713363967901, "loss": 17.2441, "step": 53260 }, { "epoch": 0.07892444702522383, "grad_norm": 7.40625, "learning_rate": 0.0004869663974617825, "loss": 17.2138, "step": 53280 }, { "epoch": 0.07895407331915222, "grad_norm": 7.8125, "learning_rate": 0.000486961458526775, "loss": 17.2126, "step": 53300 }, { "epoch": 0.0789836996130806, "grad_norm": 6.875, "learning_rate": 0.0004869565195917674, "loss": 17.2211, "step": 53320 }, { "epoch": 0.07901332590700899, "grad_norm": 6.75, "learning_rate": 0.0004869515806567598, "loss": 17.2264, "step": 53340 }, { "epoch": 0.07904295220093738, "grad_norm": 6.65625, "learning_rate": 0.00048694664172175227, "loss": 17.1761, "step": 53360 }, { "epoch": 0.07907257849486576, "grad_norm": 7.125, "learning_rate": 0.0004869417027867447, "loss": 17.2379, "step": 53380 }, { "epoch": 0.07910220478879415, "grad_norm": 7.46875, "learning_rate": 0.0004869367638517371, "loss": 17.2008, "step": 53400 }, { "epoch": 0.07913183108272254, "grad_norm": 6.21875, "learning_rate": 0.00048693182491672956, "loss": 17.2113, "step": 53420 }, { "epoch": 0.07916145737665092, "grad_norm": 7.15625, "learning_rate": 0.000486926885981722, "loss": 17.2386, "step": 53440 }, { "epoch": 0.07919108367057931, "grad_norm": 6.46875, "learning_rate": 0.00048692194704671446, "loss": 17.2564, "step": 53460 }, { "epoch": 0.0792207099645077, "grad_norm": 6.53125, "learning_rate": 0.00048691700811170685, "loss": 17.2423, "step": 53480 }, { "epoch": 0.07925033625843608, "grad_norm": 9.3125, "learning_rate": 0.0004869120691766993, "loss": 17.208, "step": 53500 }, { "epoch": 0.07927996255236447, "grad_norm": 7.03125, "learning_rate": 0.00048690713024169175, "loss": 17.2321, "step": 53520 }, { "epoch": 0.07930958884629286, "grad_norm": 6.625, "learning_rate": 0.0004869021913066842, "loss": 17.2312, "step": 53540 }, { "epoch": 0.07933921514022126, "grad_norm": 6.75, "learning_rate": 0.0004868972523716766, "loss": 17.1516, "step": 53560 }, { "epoch": 0.07936884143414964, "grad_norm": 6.90625, "learning_rate": 0.00048689231343666903, "loss": 17.1932, "step": 53580 }, { "epoch": 0.07939846772807803, "grad_norm": 7.28125, "learning_rate": 0.0004868873745016615, "loss": 17.2248, "step": 53600 }, { "epoch": 0.07942809402200642, "grad_norm": 7.90625, "learning_rate": 0.0004868824355666539, "loss": 17.2469, "step": 53620 }, { "epoch": 0.0794577203159348, "grad_norm": 7.3125, "learning_rate": 0.0004868774966316463, "loss": 17.1983, "step": 53640 }, { "epoch": 0.07948734660986319, "grad_norm": 6.96875, "learning_rate": 0.00048687255769663877, "loss": 17.2376, "step": 53660 }, { "epoch": 0.07951697290379157, "grad_norm": 6.84375, "learning_rate": 0.0004868676187616312, "loss": 17.1938, "step": 53680 }, { "epoch": 0.07954659919771996, "grad_norm": 6.84375, "learning_rate": 0.0004868626798266236, "loss": 17.192, "step": 53700 }, { "epoch": 0.07957622549164835, "grad_norm": 6.59375, "learning_rate": 0.00048685774089161606, "loss": 17.1424, "step": 53720 }, { "epoch": 0.07960585178557673, "grad_norm": 7.4375, "learning_rate": 0.0004868528019566085, "loss": 17.1893, "step": 53740 }, { "epoch": 0.07963547807950512, "grad_norm": 7.03125, "learning_rate": 0.00048684786302160096, "loss": 17.1995, "step": 53760 }, { "epoch": 0.0796651043734335, "grad_norm": 6.75, "learning_rate": 0.00048684292408659335, "loss": 17.1073, "step": 53780 }, { "epoch": 0.0796947306673619, "grad_norm": 6.9375, "learning_rate": 0.0004868379851515858, "loss": 17.2079, "step": 53800 }, { "epoch": 0.07972435696129028, "grad_norm": 6.65625, "learning_rate": 0.00048683304621657825, "loss": 17.2591, "step": 53820 }, { "epoch": 0.07975398325521867, "grad_norm": 7.28125, "learning_rate": 0.0004868281072815707, "loss": 17.1709, "step": 53840 }, { "epoch": 0.07978360954914705, "grad_norm": 6.6875, "learning_rate": 0.0004868231683465631, "loss": 17.1648, "step": 53860 }, { "epoch": 0.07981323584307545, "grad_norm": 7.03125, "learning_rate": 0.00048681822941155553, "loss": 17.2134, "step": 53880 }, { "epoch": 0.07984286213700384, "grad_norm": 7.78125, "learning_rate": 0.000486813290476548, "loss": 17.171, "step": 53900 }, { "epoch": 0.07987248843093223, "grad_norm": 7.59375, "learning_rate": 0.00048680835154154043, "loss": 17.1823, "step": 53920 }, { "epoch": 0.07990211472486061, "grad_norm": 7.8125, "learning_rate": 0.0004868034126065328, "loss": 17.1077, "step": 53940 }, { "epoch": 0.079931741018789, "grad_norm": 6.96875, "learning_rate": 0.0004867984736715253, "loss": 17.1986, "step": 53960 }, { "epoch": 0.07996136731271739, "grad_norm": 6.84375, "learning_rate": 0.0004867935347365177, "loss": 17.2026, "step": 53980 }, { "epoch": 0.07999099360664577, "grad_norm": 7.84375, "learning_rate": 0.0004867885958015101, "loss": 17.1701, "step": 54000 }, { "epoch": 0.08002061990057416, "grad_norm": 6.5625, "learning_rate": 0.00048678365686650256, "loss": 17.0925, "step": 54020 }, { "epoch": 0.08005024619450254, "grad_norm": 6.5625, "learning_rate": 0.000486778717931495, "loss": 17.1561, "step": 54040 }, { "epoch": 0.08007987248843093, "grad_norm": 7.09375, "learning_rate": 0.00048677377899648746, "loss": 17.1681, "step": 54060 }, { "epoch": 0.08010949878235932, "grad_norm": 6.9375, "learning_rate": 0.00048676884006147985, "loss": 17.1963, "step": 54080 }, { "epoch": 0.0801391250762877, "grad_norm": 7.1875, "learning_rate": 0.0004867639011264723, "loss": 17.1735, "step": 54100 }, { "epoch": 0.08016875137021609, "grad_norm": 6.6875, "learning_rate": 0.00048675896219146475, "loss": 17.2106, "step": 54120 }, { "epoch": 0.08019837766414448, "grad_norm": 7.03125, "learning_rate": 0.0004867540232564572, "loss": 17.1526, "step": 54140 }, { "epoch": 0.08022800395807286, "grad_norm": 6.59375, "learning_rate": 0.0004867490843214496, "loss": 17.225, "step": 54160 }, { "epoch": 0.08025763025200125, "grad_norm": 7.625, "learning_rate": 0.00048674414538644204, "loss": 17.152, "step": 54180 }, { "epoch": 0.08028725654592965, "grad_norm": 7.25, "learning_rate": 0.0004867392064514345, "loss": 17.1413, "step": 54200 }, { "epoch": 0.08031688283985804, "grad_norm": 6.5625, "learning_rate": 0.00048673426751642693, "loss": 17.1871, "step": 54220 }, { "epoch": 0.08034650913378642, "grad_norm": 6.53125, "learning_rate": 0.0004867293285814193, "loss": 17.1902, "step": 54240 }, { "epoch": 0.08037613542771481, "grad_norm": 6.84375, "learning_rate": 0.0004867243896464118, "loss": 17.1454, "step": 54260 }, { "epoch": 0.0804057617216432, "grad_norm": 7.3125, "learning_rate": 0.0004867194507114042, "loss": 17.1514, "step": 54280 }, { "epoch": 0.08043538801557158, "grad_norm": 8.0, "learning_rate": 0.0004867145117763966, "loss": 17.1142, "step": 54300 }, { "epoch": 0.08046501430949997, "grad_norm": 6.59375, "learning_rate": 0.00048670957284138906, "loss": 17.1669, "step": 54320 }, { "epoch": 0.08049464060342836, "grad_norm": 7.15625, "learning_rate": 0.0004867046339063815, "loss": 17.2066, "step": 54340 }, { "epoch": 0.08052426689735674, "grad_norm": 8.5625, "learning_rate": 0.00048669969497137396, "loss": 17.1445, "step": 54360 }, { "epoch": 0.08055389319128513, "grad_norm": 7.3125, "learning_rate": 0.00048669475603636635, "loss": 17.1322, "step": 54380 }, { "epoch": 0.08058351948521351, "grad_norm": 7.875, "learning_rate": 0.0004866898171013588, "loss": 17.1718, "step": 54400 }, { "epoch": 0.0806131457791419, "grad_norm": 7.6875, "learning_rate": 0.00048668487816635125, "loss": 17.1944, "step": 54420 }, { "epoch": 0.08064277207307029, "grad_norm": 7.25, "learning_rate": 0.0004866799392313437, "loss": 17.1168, "step": 54440 }, { "epoch": 0.08067239836699867, "grad_norm": 6.4375, "learning_rate": 0.0004866750002963361, "loss": 17.1513, "step": 54460 }, { "epoch": 0.08070202466092706, "grad_norm": 8.0625, "learning_rate": 0.00048667006136132854, "loss": 17.1629, "step": 54480 }, { "epoch": 0.08073165095485545, "grad_norm": 7.125, "learning_rate": 0.000486665122426321, "loss": 17.184, "step": 54500 }, { "epoch": 0.08076127724878385, "grad_norm": 7.25, "learning_rate": 0.00048666018349131343, "loss": 17.1786, "step": 54520 }, { "epoch": 0.08079090354271223, "grad_norm": 6.59375, "learning_rate": 0.0004866552445563058, "loss": 17.165, "step": 54540 }, { "epoch": 0.08082052983664062, "grad_norm": 6.78125, "learning_rate": 0.0004866503056212983, "loss": 17.1977, "step": 54560 }, { "epoch": 0.080850156130569, "grad_norm": 6.59375, "learning_rate": 0.0004866453666862907, "loss": 17.2171, "step": 54580 }, { "epoch": 0.08087978242449739, "grad_norm": 7.53125, "learning_rate": 0.00048664042775128317, "loss": 17.1809, "step": 54600 }, { "epoch": 0.08090940871842578, "grad_norm": 8.5, "learning_rate": 0.00048663548881627556, "loss": 17.1462, "step": 54620 }, { "epoch": 0.08093903501235417, "grad_norm": 6.6875, "learning_rate": 0.000486630549881268, "loss": 17.1718, "step": 54640 }, { "epoch": 0.08096866130628255, "grad_norm": 6.6875, "learning_rate": 0.00048662561094626046, "loss": 17.1368, "step": 54660 }, { "epoch": 0.08099828760021094, "grad_norm": 6.78125, "learning_rate": 0.00048662067201125285, "loss": 17.1821, "step": 54680 }, { "epoch": 0.08102791389413933, "grad_norm": 7.0, "learning_rate": 0.0004866157330762453, "loss": 17.133, "step": 54700 }, { "epoch": 0.08105754018806771, "grad_norm": 6.46875, "learning_rate": 0.00048661079414123775, "loss": 17.1526, "step": 54720 }, { "epoch": 0.0810871664819961, "grad_norm": 6.375, "learning_rate": 0.0004866058552062302, "loss": 17.1467, "step": 54740 }, { "epoch": 0.08111679277592448, "grad_norm": 7.09375, "learning_rate": 0.0004866009162712226, "loss": 17.1284, "step": 54760 }, { "epoch": 0.08114641906985287, "grad_norm": 7.28125, "learning_rate": 0.00048659597733621504, "loss": 17.1651, "step": 54780 }, { "epoch": 0.08117604536378126, "grad_norm": 6.28125, "learning_rate": 0.0004865910384012075, "loss": 17.1654, "step": 54800 }, { "epoch": 0.08120567165770964, "grad_norm": 7.21875, "learning_rate": 0.00048658609946619993, "loss": 17.123, "step": 54820 }, { "epoch": 0.08123529795163804, "grad_norm": 7.09375, "learning_rate": 0.0004865811605311923, "loss": 17.1313, "step": 54840 }, { "epoch": 0.08126492424556643, "grad_norm": 6.71875, "learning_rate": 0.0004865762215961848, "loss": 17.1357, "step": 54860 }, { "epoch": 0.08129455053949482, "grad_norm": 6.125, "learning_rate": 0.0004865712826611772, "loss": 17.1314, "step": 54880 }, { "epoch": 0.0813241768334232, "grad_norm": 6.4375, "learning_rate": 0.00048656634372616967, "loss": 17.2081, "step": 54900 }, { "epoch": 0.08135380312735159, "grad_norm": 7.71875, "learning_rate": 0.00048656140479116206, "loss": 17.1288, "step": 54920 }, { "epoch": 0.08138342942127998, "grad_norm": 7.03125, "learning_rate": 0.00048655646585615456, "loss": 17.1525, "step": 54940 }, { "epoch": 0.08141305571520836, "grad_norm": 7.21875, "learning_rate": 0.00048655152692114696, "loss": 17.1445, "step": 54960 }, { "epoch": 0.08144268200913675, "grad_norm": 6.625, "learning_rate": 0.00048654658798613935, "loss": 17.115, "step": 54980 }, { "epoch": 0.08147230830306514, "grad_norm": 6.34375, "learning_rate": 0.0004865416490511318, "loss": 17.1327, "step": 55000 }, { "epoch": 0.08150193459699352, "grad_norm": 6.9375, "learning_rate": 0.00048653671011612425, "loss": 17.1478, "step": 55020 }, { "epoch": 0.08153156089092191, "grad_norm": 7.28125, "learning_rate": 0.0004865317711811167, "loss": 17.1158, "step": 55040 }, { "epoch": 0.0815611871848503, "grad_norm": 6.96875, "learning_rate": 0.0004865268322461091, "loss": 17.1082, "step": 55060 }, { "epoch": 0.08159081347877868, "grad_norm": 7.34375, "learning_rate": 0.00048652189331110154, "loss": 17.1178, "step": 55080 }, { "epoch": 0.08162043977270707, "grad_norm": 7.125, "learning_rate": 0.000486516954376094, "loss": 17.0626, "step": 55100 }, { "epoch": 0.08165006606663545, "grad_norm": 7.40625, "learning_rate": 0.00048651201544108643, "loss": 17.1654, "step": 55120 }, { "epoch": 0.08167969236056384, "grad_norm": 6.375, "learning_rate": 0.0004865070765060788, "loss": 17.1095, "step": 55140 }, { "epoch": 0.08170931865449224, "grad_norm": 7.21875, "learning_rate": 0.0004865021375710713, "loss": 17.1073, "step": 55160 }, { "epoch": 0.08173894494842063, "grad_norm": 7.15625, "learning_rate": 0.0004864971986360637, "loss": 17.1573, "step": 55180 }, { "epoch": 0.08176857124234901, "grad_norm": 7.15625, "learning_rate": 0.00048649225970105617, "loss": 17.1434, "step": 55200 }, { "epoch": 0.0817981975362774, "grad_norm": 6.59375, "learning_rate": 0.00048648732076604856, "loss": 17.0566, "step": 55220 }, { "epoch": 0.08182782383020579, "grad_norm": 7.71875, "learning_rate": 0.00048648238183104106, "loss": 17.1302, "step": 55240 }, { "epoch": 0.08185745012413417, "grad_norm": 6.9375, "learning_rate": 0.00048647744289603346, "loss": 17.1324, "step": 55260 }, { "epoch": 0.08188707641806256, "grad_norm": 12.125, "learning_rate": 0.0004864725039610259, "loss": 17.0792, "step": 55280 }, { "epoch": 0.08191670271199095, "grad_norm": 7.90625, "learning_rate": 0.0004864675650260183, "loss": 17.1382, "step": 55300 }, { "epoch": 0.08194632900591933, "grad_norm": 6.78125, "learning_rate": 0.00048646262609101075, "loss": 17.1347, "step": 55320 }, { "epoch": 0.08197595529984772, "grad_norm": 7.25, "learning_rate": 0.0004864576871560032, "loss": 17.0667, "step": 55340 }, { "epoch": 0.0820055815937761, "grad_norm": 7.0, "learning_rate": 0.0004864527482209956, "loss": 17.0842, "step": 55360 }, { "epoch": 0.08203520788770449, "grad_norm": 7.71875, "learning_rate": 0.00048644780928598804, "loss": 17.1114, "step": 55380 }, { "epoch": 0.08206483418163288, "grad_norm": 7.34375, "learning_rate": 0.0004864428703509805, "loss": 17.089, "step": 55400 }, { "epoch": 0.08209446047556127, "grad_norm": 7.875, "learning_rate": 0.00048643793141597293, "loss": 17.1292, "step": 55420 }, { "epoch": 0.08212408676948965, "grad_norm": 6.96875, "learning_rate": 0.0004864329924809653, "loss": 17.1258, "step": 55440 }, { "epoch": 0.08215371306341804, "grad_norm": 6.28125, "learning_rate": 0.0004864280535459578, "loss": 17.1261, "step": 55460 }, { "epoch": 0.08218333935734644, "grad_norm": 6.6875, "learning_rate": 0.0004864231146109502, "loss": 17.0684, "step": 55480 }, { "epoch": 0.08221296565127482, "grad_norm": 7.625, "learning_rate": 0.00048641817567594267, "loss": 17.0862, "step": 55500 }, { "epoch": 0.08224259194520321, "grad_norm": 6.53125, "learning_rate": 0.00048641323674093506, "loss": 17.1307, "step": 55520 }, { "epoch": 0.0822722182391316, "grad_norm": 6.46875, "learning_rate": 0.00048640829780592756, "loss": 17.0831, "step": 55540 }, { "epoch": 0.08230184453305998, "grad_norm": 7.0, "learning_rate": 0.00048640335887091996, "loss": 17.0549, "step": 55560 }, { "epoch": 0.08233147082698837, "grad_norm": 6.625, "learning_rate": 0.0004863984199359124, "loss": 17.1213, "step": 55580 }, { "epoch": 0.08236109712091676, "grad_norm": 7.09375, "learning_rate": 0.0004863934810009048, "loss": 17.1498, "step": 55600 }, { "epoch": 0.08239072341484514, "grad_norm": 6.25, "learning_rate": 0.0004863885420658973, "loss": 17.0782, "step": 55620 }, { "epoch": 0.08242034970877353, "grad_norm": 6.4375, "learning_rate": 0.0004863836031308897, "loss": 17.1037, "step": 55640 }, { "epoch": 0.08244997600270192, "grad_norm": 7.15625, "learning_rate": 0.0004863786641958821, "loss": 17.1475, "step": 55660 }, { "epoch": 0.0824796022966303, "grad_norm": 6.90625, "learning_rate": 0.00048637372526087454, "loss": 17.1295, "step": 55680 }, { "epoch": 0.08250922859055869, "grad_norm": 6.21875, "learning_rate": 0.000486368786325867, "loss": 17.0528, "step": 55700 }, { "epoch": 0.08253885488448708, "grad_norm": 6.21875, "learning_rate": 0.00048636384739085943, "loss": 17.0928, "step": 55720 }, { "epoch": 0.08256848117841546, "grad_norm": 7.09375, "learning_rate": 0.0004863589084558518, "loss": 17.0391, "step": 55740 }, { "epoch": 0.08259810747234385, "grad_norm": 7.21875, "learning_rate": 0.0004863539695208443, "loss": 17.1124, "step": 55760 }, { "epoch": 0.08262773376627225, "grad_norm": 6.65625, "learning_rate": 0.0004863490305858367, "loss": 17.1316, "step": 55780 }, { "epoch": 0.08265736006020064, "grad_norm": 6.8125, "learning_rate": 0.00048634409165082917, "loss": 17.081, "step": 55800 }, { "epoch": 0.08268698635412902, "grad_norm": 7.625, "learning_rate": 0.00048633915271582156, "loss": 17.064, "step": 55820 }, { "epoch": 0.08271661264805741, "grad_norm": 5.96875, "learning_rate": 0.00048633421378081406, "loss": 17.0937, "step": 55840 }, { "epoch": 0.0827462389419858, "grad_norm": 7.75, "learning_rate": 0.00048632927484580646, "loss": 17.0401, "step": 55860 }, { "epoch": 0.08277586523591418, "grad_norm": 6.84375, "learning_rate": 0.0004863243359107989, "loss": 17.0646, "step": 55880 }, { "epoch": 0.08280549152984257, "grad_norm": 6.3125, "learning_rate": 0.0004863193969757913, "loss": 17.0864, "step": 55900 }, { "epoch": 0.08283511782377095, "grad_norm": 7.90625, "learning_rate": 0.0004863144580407838, "loss": 17.0813, "step": 55920 }, { "epoch": 0.08286474411769934, "grad_norm": 8.0625, "learning_rate": 0.0004863095191057762, "loss": 17.0242, "step": 55940 }, { "epoch": 0.08289437041162773, "grad_norm": 6.78125, "learning_rate": 0.00048630458017076864, "loss": 17.1233, "step": 55960 }, { "epoch": 0.08292399670555611, "grad_norm": 6.3125, "learning_rate": 0.00048629964123576104, "loss": 17.0682, "step": 55980 }, { "epoch": 0.0829536229994845, "grad_norm": 7.375, "learning_rate": 0.0004862947023007535, "loss": 17.0628, "step": 56000 }, { "epoch": 0.08298324929341289, "grad_norm": 6.6875, "learning_rate": 0.00048628976336574593, "loss": 17.0877, "step": 56020 }, { "epoch": 0.08301287558734127, "grad_norm": 6.78125, "learning_rate": 0.0004862848244307383, "loss": 17.0776, "step": 56040 }, { "epoch": 0.08304250188126966, "grad_norm": 8.25, "learning_rate": 0.0004862798854957308, "loss": 17.0322, "step": 56060 }, { "epoch": 0.08307212817519805, "grad_norm": 6.5, "learning_rate": 0.0004862749465607232, "loss": 17.0593, "step": 56080 }, { "epoch": 0.08310175446912645, "grad_norm": 7.0, "learning_rate": 0.00048627000762571567, "loss": 17.0977, "step": 56100 }, { "epoch": 0.08313138076305483, "grad_norm": 6.21875, "learning_rate": 0.00048626506869070806, "loss": 17.0193, "step": 56120 }, { "epoch": 0.08316100705698322, "grad_norm": 7.09375, "learning_rate": 0.00048626012975570057, "loss": 17.0925, "step": 56140 }, { "epoch": 0.0831906333509116, "grad_norm": 7.5, "learning_rate": 0.00048625519082069296, "loss": 17.04, "step": 56160 }, { "epoch": 0.08322025964483999, "grad_norm": 7.40625, "learning_rate": 0.0004862502518856854, "loss": 17.1382, "step": 56180 }, { "epoch": 0.08324988593876838, "grad_norm": 7.34375, "learning_rate": 0.0004862453129506778, "loss": 17.0622, "step": 56200 }, { "epoch": 0.08327951223269676, "grad_norm": 8.125, "learning_rate": 0.0004862403740156703, "loss": 17.0424, "step": 56220 }, { "epoch": 0.08330913852662515, "grad_norm": 6.96875, "learning_rate": 0.0004862354350806627, "loss": 17.0722, "step": 56240 }, { "epoch": 0.08333876482055354, "grad_norm": 6.9375, "learning_rate": 0.00048623049614565514, "loss": 17.036, "step": 56260 }, { "epoch": 0.08336839111448192, "grad_norm": 6.78125, "learning_rate": 0.00048622555721064754, "loss": 17.0758, "step": 56280 }, { "epoch": 0.08339801740841031, "grad_norm": 6.6875, "learning_rate": 0.00048622061827564004, "loss": 17.1354, "step": 56300 }, { "epoch": 0.0834276437023387, "grad_norm": 6.9375, "learning_rate": 0.00048621567934063243, "loss": 17.1577, "step": 56320 }, { "epoch": 0.08345726999626708, "grad_norm": 6.46875, "learning_rate": 0.0004862107404056248, "loss": 17.0566, "step": 56340 }, { "epoch": 0.08348689629019547, "grad_norm": 7.40625, "learning_rate": 0.0004862058014706173, "loss": 17.0633, "step": 56360 }, { "epoch": 0.08351652258412386, "grad_norm": 7.0, "learning_rate": 0.0004862008625356097, "loss": 17.0427, "step": 56380 }, { "epoch": 0.08354614887805224, "grad_norm": 6.875, "learning_rate": 0.00048619592360060217, "loss": 17.068, "step": 56400 }, { "epoch": 0.08357577517198064, "grad_norm": 7.71875, "learning_rate": 0.00048619098466559456, "loss": 17.0289, "step": 56420 }, { "epoch": 0.08360540146590903, "grad_norm": 7.21875, "learning_rate": 0.00048618604573058707, "loss": 17.0595, "step": 56440 }, { "epoch": 0.08363502775983742, "grad_norm": 6.84375, "learning_rate": 0.00048618110679557946, "loss": 17.0464, "step": 56460 }, { "epoch": 0.0836646540537658, "grad_norm": 6.875, "learning_rate": 0.0004861761678605719, "loss": 17.0377, "step": 56480 }, { "epoch": 0.08369428034769419, "grad_norm": 6.90625, "learning_rate": 0.0004861712289255643, "loss": 17.1083, "step": 56500 }, { "epoch": 0.08372390664162258, "grad_norm": 6.4375, "learning_rate": 0.0004861662899905568, "loss": 17.0156, "step": 56520 }, { "epoch": 0.08375353293555096, "grad_norm": 7.21875, "learning_rate": 0.0004861613510555492, "loss": 17.1009, "step": 56540 }, { "epoch": 0.08378315922947935, "grad_norm": 7.09375, "learning_rate": 0.00048615641212054164, "loss": 17.0541, "step": 56560 }, { "epoch": 0.08381278552340773, "grad_norm": 6.875, "learning_rate": 0.00048615147318553404, "loss": 17.0052, "step": 56580 }, { "epoch": 0.08384241181733612, "grad_norm": 6.78125, "learning_rate": 0.00048614653425052654, "loss": 17.0689, "step": 56600 }, { "epoch": 0.08387203811126451, "grad_norm": 6.65625, "learning_rate": 0.00048614159531551893, "loss": 17.0825, "step": 56620 }, { "epoch": 0.0839016644051929, "grad_norm": 6.90625, "learning_rate": 0.0004861366563805114, "loss": 17.0745, "step": 56640 }, { "epoch": 0.08393129069912128, "grad_norm": 6.375, "learning_rate": 0.0004861317174455038, "loss": 17.0943, "step": 56660 }, { "epoch": 0.08396091699304967, "grad_norm": 7.28125, "learning_rate": 0.0004861267785104962, "loss": 16.9957, "step": 56680 }, { "epoch": 0.08399054328697805, "grad_norm": 7.15625, "learning_rate": 0.00048612183957548867, "loss": 17.0411, "step": 56700 }, { "epoch": 0.08402016958090644, "grad_norm": 7.25, "learning_rate": 0.00048611690064048106, "loss": 17.0705, "step": 56720 }, { "epoch": 0.08404979587483484, "grad_norm": 7.15625, "learning_rate": 0.00048611196170547357, "loss": 17.0302, "step": 56740 }, { "epoch": 0.08407942216876323, "grad_norm": 7.25, "learning_rate": 0.00048610702277046596, "loss": 17.0697, "step": 56760 }, { "epoch": 0.08410904846269161, "grad_norm": 7.0625, "learning_rate": 0.0004861020838354584, "loss": 16.9995, "step": 56780 }, { "epoch": 0.08413867475662, "grad_norm": 6.625, "learning_rate": 0.0004860971449004508, "loss": 17.0243, "step": 56800 }, { "epoch": 0.08416830105054839, "grad_norm": 7.1875, "learning_rate": 0.0004860922059654433, "loss": 17.0269, "step": 56820 }, { "epoch": 0.08419792734447677, "grad_norm": 7.1875, "learning_rate": 0.0004860872670304357, "loss": 17.095, "step": 56840 }, { "epoch": 0.08422755363840516, "grad_norm": 6.375, "learning_rate": 0.00048608232809542814, "loss": 16.9837, "step": 56860 }, { "epoch": 0.08425717993233355, "grad_norm": 6.40625, "learning_rate": 0.00048607738916042054, "loss": 17.0325, "step": 56880 }, { "epoch": 0.08428680622626193, "grad_norm": 6.5625, "learning_rate": 0.00048607245022541304, "loss": 17.0513, "step": 56900 }, { "epoch": 0.08431643252019032, "grad_norm": 6.71875, "learning_rate": 0.00048606751129040543, "loss": 17.0624, "step": 56920 }, { "epoch": 0.0843460588141187, "grad_norm": 6.8125, "learning_rate": 0.0004860625723553979, "loss": 17.0052, "step": 56940 }, { "epoch": 0.08437568510804709, "grad_norm": 7.09375, "learning_rate": 0.0004860576334203903, "loss": 17.0705, "step": 56960 }, { "epoch": 0.08440531140197548, "grad_norm": 7.0625, "learning_rate": 0.0004860526944853828, "loss": 16.9932, "step": 56980 }, { "epoch": 0.08443493769590386, "grad_norm": 6.9375, "learning_rate": 0.00048604775555037517, "loss": 16.9712, "step": 57000 }, { "epoch": 0.08446456398983225, "grad_norm": 7.34375, "learning_rate": 0.00048604281661536756, "loss": 17.056, "step": 57020 }, { "epoch": 0.08449419028376064, "grad_norm": 7.4375, "learning_rate": 0.00048603787768036007, "loss": 17.0091, "step": 57040 }, { "epoch": 0.08452381657768904, "grad_norm": 6.78125, "learning_rate": 0.00048603293874535246, "loss": 16.9849, "step": 57060 }, { "epoch": 0.08455344287161742, "grad_norm": 7.40625, "learning_rate": 0.0004860279998103449, "loss": 17.0407, "step": 57080 }, { "epoch": 0.08458306916554581, "grad_norm": 7.21875, "learning_rate": 0.0004860230608753373, "loss": 17.0114, "step": 57100 }, { "epoch": 0.0846126954594742, "grad_norm": 7.0, "learning_rate": 0.0004860181219403298, "loss": 17.0502, "step": 57120 }, { "epoch": 0.08464232175340258, "grad_norm": 7.4375, "learning_rate": 0.0004860131830053222, "loss": 17.044, "step": 57140 }, { "epoch": 0.08467194804733097, "grad_norm": 6.5625, "learning_rate": 0.00048600824407031464, "loss": 17.0582, "step": 57160 }, { "epoch": 0.08470157434125936, "grad_norm": 7.59375, "learning_rate": 0.00048600330513530704, "loss": 17.0316, "step": 57180 }, { "epoch": 0.08473120063518774, "grad_norm": 7.25, "learning_rate": 0.00048599836620029954, "loss": 17.0576, "step": 57200 }, { "epoch": 0.08476082692911613, "grad_norm": 7.6875, "learning_rate": 0.00048599342726529193, "loss": 17.0589, "step": 57220 }, { "epoch": 0.08479045322304452, "grad_norm": 6.46875, "learning_rate": 0.0004859884883302844, "loss": 16.9831, "step": 57240 }, { "epoch": 0.0848200795169729, "grad_norm": 6.125, "learning_rate": 0.0004859835493952768, "loss": 16.9896, "step": 57260 }, { "epoch": 0.08484970581090129, "grad_norm": 6.6875, "learning_rate": 0.0004859786104602693, "loss": 16.9824, "step": 57280 }, { "epoch": 0.08487933210482967, "grad_norm": 8.25, "learning_rate": 0.00048597367152526167, "loss": 16.9623, "step": 57300 }, { "epoch": 0.08490895839875806, "grad_norm": 7.03125, "learning_rate": 0.0004859687325902541, "loss": 16.9834, "step": 57320 }, { "epoch": 0.08493858469268645, "grad_norm": 6.53125, "learning_rate": 0.00048596379365524657, "loss": 16.9729, "step": 57340 }, { "epoch": 0.08496821098661483, "grad_norm": 6.21875, "learning_rate": 0.00048595885472023896, "loss": 17.0528, "step": 57360 }, { "epoch": 0.08499783728054323, "grad_norm": 7.15625, "learning_rate": 0.0004859539157852314, "loss": 17.0257, "step": 57380 }, { "epoch": 0.08502746357447162, "grad_norm": 7.09375, "learning_rate": 0.0004859489768502238, "loss": 16.8887, "step": 57400 }, { "epoch": 0.08505708986840001, "grad_norm": 7.4375, "learning_rate": 0.0004859440379152163, "loss": 17.0345, "step": 57420 }, { "epoch": 0.0850867161623284, "grad_norm": 6.84375, "learning_rate": 0.0004859390989802087, "loss": 16.9734, "step": 57440 }, { "epoch": 0.08511634245625678, "grad_norm": 6.875, "learning_rate": 0.00048593416004520114, "loss": 17.0113, "step": 57460 }, { "epoch": 0.08514596875018517, "grad_norm": 7.21875, "learning_rate": 0.00048592922111019354, "loss": 16.9691, "step": 57480 }, { "epoch": 0.08517559504411355, "grad_norm": 7.0625, "learning_rate": 0.00048592428217518604, "loss": 17.0288, "step": 57500 }, { "epoch": 0.08520522133804194, "grad_norm": 6.78125, "learning_rate": 0.00048591934324017843, "loss": 17.0317, "step": 57520 }, { "epoch": 0.08523484763197033, "grad_norm": 7.0625, "learning_rate": 0.0004859144043051709, "loss": 17.0653, "step": 57540 }, { "epoch": 0.08526447392589871, "grad_norm": 6.375, "learning_rate": 0.0004859094653701633, "loss": 17.0473, "step": 57560 }, { "epoch": 0.0852941002198271, "grad_norm": 7.375, "learning_rate": 0.0004859045264351558, "loss": 16.9975, "step": 57580 }, { "epoch": 0.08532372651375549, "grad_norm": 7.15625, "learning_rate": 0.00048589958750014817, "loss": 16.9854, "step": 57600 }, { "epoch": 0.08535335280768387, "grad_norm": 6.4375, "learning_rate": 0.0004858946485651406, "loss": 16.9975, "step": 57620 }, { "epoch": 0.08538297910161226, "grad_norm": 6.625, "learning_rate": 0.00048588970963013307, "loss": 17.0178, "step": 57640 }, { "epoch": 0.08541260539554064, "grad_norm": 7.3125, "learning_rate": 0.0004858847706951255, "loss": 16.9128, "step": 57660 }, { "epoch": 0.08544223168946903, "grad_norm": 7.5, "learning_rate": 0.0004858798317601179, "loss": 17.0329, "step": 57680 }, { "epoch": 0.08547185798339743, "grad_norm": 7.15625, "learning_rate": 0.0004858748928251103, "loss": 17.0416, "step": 57700 }, { "epoch": 0.08550148427732582, "grad_norm": 7.34375, "learning_rate": 0.0004858699538901028, "loss": 17.0423, "step": 57720 }, { "epoch": 0.0855311105712542, "grad_norm": 6.21875, "learning_rate": 0.0004858650149550952, "loss": 16.9665, "step": 57740 }, { "epoch": 0.08556073686518259, "grad_norm": 6.9375, "learning_rate": 0.00048586007602008765, "loss": 16.9616, "step": 57760 }, { "epoch": 0.08559036315911098, "grad_norm": 7.3125, "learning_rate": 0.00048585513708508004, "loss": 16.9745, "step": 57780 }, { "epoch": 0.08561998945303936, "grad_norm": 6.90625, "learning_rate": 0.00048585019815007254, "loss": 16.9302, "step": 57800 }, { "epoch": 0.08564961574696775, "grad_norm": 7.125, "learning_rate": 0.00048584525921506493, "loss": 16.9713, "step": 57820 }, { "epoch": 0.08567924204089614, "grad_norm": 6.59375, "learning_rate": 0.0004858403202800574, "loss": 16.9939, "step": 57840 }, { "epoch": 0.08570886833482452, "grad_norm": 6.78125, "learning_rate": 0.0004858353813450498, "loss": 16.9307, "step": 57860 }, { "epoch": 0.08573849462875291, "grad_norm": 7.125, "learning_rate": 0.0004858304424100423, "loss": 17.044, "step": 57880 }, { "epoch": 0.0857681209226813, "grad_norm": 7.25, "learning_rate": 0.00048582550347503467, "loss": 17.0308, "step": 57900 }, { "epoch": 0.08579774721660968, "grad_norm": 6.96875, "learning_rate": 0.0004858205645400271, "loss": 17.0287, "step": 57920 }, { "epoch": 0.08582737351053807, "grad_norm": 6.9375, "learning_rate": 0.00048581562560501957, "loss": 17.018, "step": 57940 }, { "epoch": 0.08585699980446646, "grad_norm": 7.28125, "learning_rate": 0.000485810686670012, "loss": 16.9714, "step": 57960 }, { "epoch": 0.08588662609839484, "grad_norm": 6.875, "learning_rate": 0.0004858057477350044, "loss": 16.9836, "step": 57980 }, { "epoch": 0.08591625239232323, "grad_norm": 8.125, "learning_rate": 0.00048580080879999686, "loss": 16.9677, "step": 58000 }, { "epoch": 0.08594587868625163, "grad_norm": 6.71875, "learning_rate": 0.0004857958698649893, "loss": 16.9968, "step": 58020 }, { "epoch": 0.08597550498018001, "grad_norm": 6.53125, "learning_rate": 0.0004857909309299817, "loss": 16.9598, "step": 58040 }, { "epoch": 0.0860051312741084, "grad_norm": 7.0, "learning_rate": 0.00048578599199497415, "loss": 16.9644, "step": 58060 }, { "epoch": 0.08603475756803679, "grad_norm": 6.46875, "learning_rate": 0.00048578105305996654, "loss": 16.9437, "step": 58080 }, { "epoch": 0.08606438386196517, "grad_norm": 6.75, "learning_rate": 0.00048577611412495904, "loss": 16.9639, "step": 58100 }, { "epoch": 0.08609401015589356, "grad_norm": 7.5625, "learning_rate": 0.00048577117518995143, "loss": 17.0207, "step": 58120 }, { "epoch": 0.08612363644982195, "grad_norm": 6.15625, "learning_rate": 0.0004857662362549439, "loss": 16.9994, "step": 58140 }, { "epoch": 0.08615326274375033, "grad_norm": 6.5, "learning_rate": 0.0004857612973199363, "loss": 16.9783, "step": 58160 }, { "epoch": 0.08618288903767872, "grad_norm": 7.4375, "learning_rate": 0.0004857563583849288, "loss": 16.9334, "step": 58180 }, { "epoch": 0.0862125153316071, "grad_norm": 6.75, "learning_rate": 0.00048575141944992117, "loss": 16.9536, "step": 58200 }, { "epoch": 0.08624214162553549, "grad_norm": 6.84375, "learning_rate": 0.0004857464805149136, "loss": 16.925, "step": 58220 }, { "epoch": 0.08627176791946388, "grad_norm": 6.96875, "learning_rate": 0.00048574154157990607, "loss": 16.9183, "step": 58240 }, { "epoch": 0.08630139421339227, "grad_norm": 6.90625, "learning_rate": 0.0004857366026448985, "loss": 17.0481, "step": 58260 }, { "epoch": 0.08633102050732065, "grad_norm": 7.125, "learning_rate": 0.0004857316637098909, "loss": 16.9385, "step": 58280 }, { "epoch": 0.08636064680124904, "grad_norm": 6.40625, "learning_rate": 0.00048572672477488336, "loss": 16.9946, "step": 58300 }, { "epoch": 0.08639027309517743, "grad_norm": 6.8125, "learning_rate": 0.0004857217858398758, "loss": 16.9443, "step": 58320 }, { "epoch": 0.08641989938910583, "grad_norm": 9.1875, "learning_rate": 0.00048571684690486825, "loss": 16.9844, "step": 58340 }, { "epoch": 0.08644952568303421, "grad_norm": 6.375, "learning_rate": 0.00048571190796986065, "loss": 16.9951, "step": 58360 }, { "epoch": 0.0864791519769626, "grad_norm": 6.4375, "learning_rate": 0.00048570696903485304, "loss": 16.9982, "step": 58380 }, { "epoch": 0.08650877827089098, "grad_norm": 6.59375, "learning_rate": 0.00048570203009984554, "loss": 16.9921, "step": 58400 }, { "epoch": 0.08653840456481937, "grad_norm": 6.5625, "learning_rate": 0.00048569709116483794, "loss": 16.9524, "step": 58420 }, { "epoch": 0.08656803085874776, "grad_norm": 6.5625, "learning_rate": 0.0004856921522298304, "loss": 16.9598, "step": 58440 }, { "epoch": 0.08659765715267614, "grad_norm": 7.09375, "learning_rate": 0.0004856872132948228, "loss": 16.98, "step": 58460 }, { "epoch": 0.08662728344660453, "grad_norm": 7.125, "learning_rate": 0.0004856822743598153, "loss": 16.9484, "step": 58480 }, { "epoch": 0.08665690974053292, "grad_norm": 6.53125, "learning_rate": 0.00048567733542480767, "loss": 16.9898, "step": 58500 }, { "epoch": 0.0866865360344613, "grad_norm": 6.75, "learning_rate": 0.0004856723964898001, "loss": 16.965, "step": 58520 }, { "epoch": 0.08671616232838969, "grad_norm": 7.125, "learning_rate": 0.00048566745755479257, "loss": 16.9641, "step": 58540 }, { "epoch": 0.08674578862231808, "grad_norm": 6.09375, "learning_rate": 0.000485662518619785, "loss": 16.9958, "step": 58560 }, { "epoch": 0.08677541491624646, "grad_norm": 6.75, "learning_rate": 0.0004856575796847774, "loss": 16.9278, "step": 58580 }, { "epoch": 0.08680504121017485, "grad_norm": 7.3125, "learning_rate": 0.00048565264074976986, "loss": 16.9816, "step": 58600 }, { "epoch": 0.08683466750410324, "grad_norm": 8.1875, "learning_rate": 0.0004856477018147623, "loss": 16.9842, "step": 58620 }, { "epoch": 0.08686429379803162, "grad_norm": 6.59375, "learning_rate": 0.00048564276287975475, "loss": 16.9433, "step": 58640 }, { "epoch": 0.08689392009196002, "grad_norm": 6.59375, "learning_rate": 0.00048563782394474715, "loss": 16.9635, "step": 58660 }, { "epoch": 0.08692354638588841, "grad_norm": 6.6875, "learning_rate": 0.00048563288500973954, "loss": 16.9406, "step": 58680 }, { "epoch": 0.0869531726798168, "grad_norm": 6.90625, "learning_rate": 0.00048562794607473204, "loss": 16.9745, "step": 58700 }, { "epoch": 0.08698279897374518, "grad_norm": 6.4375, "learning_rate": 0.00048562300713972444, "loss": 16.9556, "step": 58720 }, { "epoch": 0.08701242526767357, "grad_norm": 7.96875, "learning_rate": 0.0004856180682047169, "loss": 16.9668, "step": 58740 }, { "epoch": 0.08704205156160195, "grad_norm": 6.90625, "learning_rate": 0.0004856131292697093, "loss": 16.9706, "step": 58760 }, { "epoch": 0.08707167785553034, "grad_norm": 6.78125, "learning_rate": 0.0004856081903347018, "loss": 16.9702, "step": 58780 }, { "epoch": 0.08710130414945873, "grad_norm": 6.90625, "learning_rate": 0.00048560325139969417, "loss": 16.945, "step": 58800 }, { "epoch": 0.08713093044338711, "grad_norm": 6.40625, "learning_rate": 0.0004855983124646866, "loss": 16.9245, "step": 58820 }, { "epoch": 0.0871605567373155, "grad_norm": 6.03125, "learning_rate": 0.00048559337352967907, "loss": 16.9062, "step": 58840 }, { "epoch": 0.08719018303124389, "grad_norm": 6.6875, "learning_rate": 0.0004855884345946715, "loss": 16.9474, "step": 58860 }, { "epoch": 0.08721980932517227, "grad_norm": 7.9375, "learning_rate": 0.0004855834956596639, "loss": 16.9664, "step": 58880 }, { "epoch": 0.08724943561910066, "grad_norm": 6.96875, "learning_rate": 0.00048557855672465636, "loss": 16.9762, "step": 58900 }, { "epoch": 0.08727906191302905, "grad_norm": 6.78125, "learning_rate": 0.0004855736177896488, "loss": 16.9371, "step": 58920 }, { "epoch": 0.08730868820695743, "grad_norm": 6.8125, "learning_rate": 0.00048556867885464125, "loss": 16.9388, "step": 58940 }, { "epoch": 0.08733831450088583, "grad_norm": 7.375, "learning_rate": 0.00048556373991963365, "loss": 16.921, "step": 58960 }, { "epoch": 0.08736794079481422, "grad_norm": 6.9375, "learning_rate": 0.0004855588009846261, "loss": 16.9074, "step": 58980 }, { "epoch": 0.0873975670887426, "grad_norm": 6.71875, "learning_rate": 0.00048555386204961854, "loss": 17.0075, "step": 59000 }, { "epoch": 0.08742719338267099, "grad_norm": 6.84375, "learning_rate": 0.000485548923114611, "loss": 16.9763, "step": 59020 }, { "epoch": 0.08745681967659938, "grad_norm": 6.9375, "learning_rate": 0.0004855439841796034, "loss": 16.9323, "step": 59040 }, { "epoch": 0.08748644597052777, "grad_norm": 6.75, "learning_rate": 0.0004855390452445958, "loss": 16.8948, "step": 59060 }, { "epoch": 0.08751607226445615, "grad_norm": 6.75, "learning_rate": 0.0004855341063095883, "loss": 16.9331, "step": 59080 }, { "epoch": 0.08754569855838454, "grad_norm": 7.8125, "learning_rate": 0.00048552916737458067, "loss": 16.933, "step": 59100 }, { "epoch": 0.08757532485231292, "grad_norm": 6.1875, "learning_rate": 0.0004855242284395731, "loss": 16.9698, "step": 59120 }, { "epoch": 0.08760495114624131, "grad_norm": 6.8125, "learning_rate": 0.00048551928950456557, "loss": 16.9211, "step": 59140 }, { "epoch": 0.0876345774401697, "grad_norm": 7.40625, "learning_rate": 0.000485514350569558, "loss": 16.9048, "step": 59160 }, { "epoch": 0.08766420373409808, "grad_norm": 6.15625, "learning_rate": 0.0004855094116345504, "loss": 16.9459, "step": 59180 }, { "epoch": 0.08769383002802647, "grad_norm": 8.1875, "learning_rate": 0.00048550447269954286, "loss": 16.9266, "step": 59200 }, { "epoch": 0.08772345632195486, "grad_norm": 6.84375, "learning_rate": 0.0004854995337645353, "loss": 16.9213, "step": 59220 }, { "epoch": 0.08775308261588324, "grad_norm": 6.90625, "learning_rate": 0.00048549459482952775, "loss": 16.9731, "step": 59240 }, { "epoch": 0.08778270890981163, "grad_norm": 6.6875, "learning_rate": 0.00048548965589452015, "loss": 16.94, "step": 59260 }, { "epoch": 0.08781233520374003, "grad_norm": 5.71875, "learning_rate": 0.0004854847169595126, "loss": 16.8507, "step": 59280 }, { "epoch": 0.08784196149766842, "grad_norm": 6.5, "learning_rate": 0.00048547977802450504, "loss": 16.9175, "step": 59300 }, { "epoch": 0.0878715877915968, "grad_norm": 6.78125, "learning_rate": 0.0004854748390894975, "loss": 16.9454, "step": 59320 }, { "epoch": 0.08790121408552519, "grad_norm": 6.84375, "learning_rate": 0.0004854699001544899, "loss": 16.9354, "step": 59340 }, { "epoch": 0.08793084037945358, "grad_norm": 7.375, "learning_rate": 0.0004854649612194823, "loss": 16.8951, "step": 59360 }, { "epoch": 0.08796046667338196, "grad_norm": 7.625, "learning_rate": 0.0004854600222844748, "loss": 16.9231, "step": 59380 }, { "epoch": 0.08799009296731035, "grad_norm": 6.5, "learning_rate": 0.0004854550833494672, "loss": 16.8769, "step": 59400 }, { "epoch": 0.08801971926123874, "grad_norm": 7.71875, "learning_rate": 0.0004854501444144596, "loss": 16.9411, "step": 59420 }, { "epoch": 0.08804934555516712, "grad_norm": 6.96875, "learning_rate": 0.00048544520547945207, "loss": 16.943, "step": 59440 }, { "epoch": 0.08807897184909551, "grad_norm": 6.75, "learning_rate": 0.0004854402665444445, "loss": 16.9327, "step": 59460 }, { "epoch": 0.0881085981430239, "grad_norm": 6.8125, "learning_rate": 0.0004854353276094369, "loss": 16.9585, "step": 59480 }, { "epoch": 0.08813822443695228, "grad_norm": 7.5, "learning_rate": 0.00048543038867442936, "loss": 16.9142, "step": 59500 }, { "epoch": 0.08816785073088067, "grad_norm": 6.84375, "learning_rate": 0.0004854254497394218, "loss": 16.8786, "step": 59520 }, { "epoch": 0.08819747702480905, "grad_norm": 7.1875, "learning_rate": 0.00048542051080441425, "loss": 16.9315, "step": 59540 }, { "epoch": 0.08822710331873744, "grad_norm": 6.9375, "learning_rate": 0.00048541557186940665, "loss": 16.9105, "step": 59560 }, { "epoch": 0.08825672961266583, "grad_norm": 6.1875, "learning_rate": 0.0004854106329343991, "loss": 16.9476, "step": 59580 }, { "epoch": 0.08828635590659423, "grad_norm": 7.5, "learning_rate": 0.00048540569399939154, "loss": 16.9046, "step": 59600 }, { "epoch": 0.08831598220052261, "grad_norm": 7.4375, "learning_rate": 0.000485400755064384, "loss": 16.9677, "step": 59620 }, { "epoch": 0.088345608494451, "grad_norm": 7.25, "learning_rate": 0.0004853958161293764, "loss": 16.8664, "step": 59640 }, { "epoch": 0.08837523478837939, "grad_norm": 6.6875, "learning_rate": 0.00048539087719436883, "loss": 16.9334, "step": 59660 }, { "epoch": 0.08840486108230777, "grad_norm": 7.375, "learning_rate": 0.0004853859382593613, "loss": 16.8932, "step": 59680 }, { "epoch": 0.08843448737623616, "grad_norm": 6.96875, "learning_rate": 0.0004853809993243537, "loss": 16.9439, "step": 59700 }, { "epoch": 0.08846411367016455, "grad_norm": 6.625, "learning_rate": 0.0004853760603893461, "loss": 16.9137, "step": 59720 }, { "epoch": 0.08849373996409293, "grad_norm": 6.1875, "learning_rate": 0.00048537112145433857, "loss": 16.9407, "step": 59740 }, { "epoch": 0.08852336625802132, "grad_norm": 7.28125, "learning_rate": 0.000485366182519331, "loss": 16.9089, "step": 59760 }, { "epoch": 0.0885529925519497, "grad_norm": 6.8125, "learning_rate": 0.0004853612435843234, "loss": 16.9287, "step": 59780 }, { "epoch": 0.08858261884587809, "grad_norm": 6.875, "learning_rate": 0.00048535630464931586, "loss": 16.8928, "step": 59800 }, { "epoch": 0.08861224513980648, "grad_norm": 7.1875, "learning_rate": 0.0004853513657143083, "loss": 16.8844, "step": 59820 }, { "epoch": 0.08864187143373486, "grad_norm": 6.375, "learning_rate": 0.00048534642677930075, "loss": 16.8664, "step": 59840 }, { "epoch": 0.08867149772766325, "grad_norm": 6.5, "learning_rate": 0.00048534148784429315, "loss": 16.936, "step": 59860 }, { "epoch": 0.08870112402159164, "grad_norm": 6.84375, "learning_rate": 0.0004853365489092856, "loss": 16.9358, "step": 59880 }, { "epoch": 0.08873075031552002, "grad_norm": 7.03125, "learning_rate": 0.00048533160997427804, "loss": 16.9482, "step": 59900 }, { "epoch": 0.08876037660944842, "grad_norm": 7.46875, "learning_rate": 0.0004853266710392705, "loss": 16.8417, "step": 59920 }, { "epoch": 0.08879000290337681, "grad_norm": 7.3125, "learning_rate": 0.0004853217321042629, "loss": 16.869, "step": 59940 }, { "epoch": 0.0888196291973052, "grad_norm": 5.65625, "learning_rate": 0.00048531679316925533, "loss": 16.8701, "step": 59960 }, { "epoch": 0.08884925549123358, "grad_norm": 6.46875, "learning_rate": 0.0004853118542342478, "loss": 16.9125, "step": 59980 }, { "epoch": 0.08887888178516197, "grad_norm": 7.625, "learning_rate": 0.00048530691529924023, "loss": 16.8648, "step": 60000 }, { "epoch": 0.08890850807909036, "grad_norm": 7.125, "learning_rate": 0.0004853019763642326, "loss": 16.9234, "step": 60020 }, { "epoch": 0.08893813437301874, "grad_norm": 6.96875, "learning_rate": 0.0004852970374292251, "loss": 16.8423, "step": 60040 }, { "epoch": 0.08896776066694713, "grad_norm": 6.34375, "learning_rate": 0.0004852920984942175, "loss": 16.9306, "step": 60060 }, { "epoch": 0.08899738696087552, "grad_norm": 7.03125, "learning_rate": 0.0004852871595592099, "loss": 16.8989, "step": 60080 }, { "epoch": 0.0890270132548039, "grad_norm": 7.0625, "learning_rate": 0.00048528222062420236, "loss": 16.9109, "step": 60100 }, { "epoch": 0.08905663954873229, "grad_norm": 6.96875, "learning_rate": 0.0004852772816891948, "loss": 16.8353, "step": 60120 }, { "epoch": 0.08908626584266068, "grad_norm": 6.875, "learning_rate": 0.00048527234275418725, "loss": 16.8815, "step": 60140 }, { "epoch": 0.08911589213658906, "grad_norm": 6.78125, "learning_rate": 0.00048526740381917965, "loss": 16.849, "step": 60160 }, { "epoch": 0.08914551843051745, "grad_norm": 7.90625, "learning_rate": 0.0004852624648841721, "loss": 16.9424, "step": 60180 }, { "epoch": 0.08917514472444583, "grad_norm": 6.53125, "learning_rate": 0.00048525752594916454, "loss": 16.8972, "step": 60200 }, { "epoch": 0.08920477101837422, "grad_norm": 6.375, "learning_rate": 0.000485252587014157, "loss": 16.8699, "step": 60220 }, { "epoch": 0.08923439731230262, "grad_norm": 6.71875, "learning_rate": 0.0004852476480791494, "loss": 16.8951, "step": 60240 }, { "epoch": 0.08926402360623101, "grad_norm": 6.59375, "learning_rate": 0.00048524270914414183, "loss": 16.8896, "step": 60260 }, { "epoch": 0.0892936499001594, "grad_norm": 6.8125, "learning_rate": 0.0004852377702091343, "loss": 16.8979, "step": 60280 }, { "epoch": 0.08932327619408778, "grad_norm": 7.90625, "learning_rate": 0.00048523283127412673, "loss": 16.9208, "step": 60300 }, { "epoch": 0.08935290248801617, "grad_norm": 8.0, "learning_rate": 0.0004852278923391191, "loss": 16.9476, "step": 60320 }, { "epoch": 0.08938252878194455, "grad_norm": 6.4375, "learning_rate": 0.0004852229534041116, "loss": 16.83, "step": 60340 }, { "epoch": 0.08941215507587294, "grad_norm": 6.71875, "learning_rate": 0.000485218014469104, "loss": 16.8334, "step": 60360 }, { "epoch": 0.08944178136980133, "grad_norm": 7.09375, "learning_rate": 0.0004852130755340964, "loss": 16.8982, "step": 60380 }, { "epoch": 0.08947140766372971, "grad_norm": 7.03125, "learning_rate": 0.00048520813659908886, "loss": 16.8715, "step": 60400 }, { "epoch": 0.0895010339576581, "grad_norm": 7.40625, "learning_rate": 0.0004852031976640813, "loss": 16.9139, "step": 60420 }, { "epoch": 0.08953066025158649, "grad_norm": 6.5625, "learning_rate": 0.00048519825872907375, "loss": 16.8323, "step": 60440 }, { "epoch": 0.08956028654551487, "grad_norm": 7.28125, "learning_rate": 0.00048519331979406615, "loss": 16.8942, "step": 60460 }, { "epoch": 0.08958991283944326, "grad_norm": 7.46875, "learning_rate": 0.0004851883808590586, "loss": 16.858, "step": 60480 }, { "epoch": 0.08961953913337165, "grad_norm": 6.875, "learning_rate": 0.00048518344192405104, "loss": 16.9273, "step": 60500 }, { "epoch": 0.08964916542730003, "grad_norm": 7.0, "learning_rate": 0.0004851785029890435, "loss": 16.9133, "step": 60520 }, { "epoch": 0.08967879172122842, "grad_norm": 6.625, "learning_rate": 0.0004851735640540359, "loss": 16.8469, "step": 60540 }, { "epoch": 0.08970841801515682, "grad_norm": 6.46875, "learning_rate": 0.00048516862511902833, "loss": 16.8442, "step": 60560 }, { "epoch": 0.0897380443090852, "grad_norm": 6.4375, "learning_rate": 0.0004851636861840208, "loss": 16.8199, "step": 60580 }, { "epoch": 0.08976767060301359, "grad_norm": 7.03125, "learning_rate": 0.00048515874724901323, "loss": 16.8291, "step": 60600 }, { "epoch": 0.08979729689694198, "grad_norm": 6.9375, "learning_rate": 0.0004851538083140056, "loss": 16.8837, "step": 60620 }, { "epoch": 0.08982692319087036, "grad_norm": 6.34375, "learning_rate": 0.0004851488693789981, "loss": 16.8889, "step": 60640 }, { "epoch": 0.08985654948479875, "grad_norm": 6.46875, "learning_rate": 0.0004851439304439905, "loss": 16.8414, "step": 60660 }, { "epoch": 0.08988617577872714, "grad_norm": 7.0, "learning_rate": 0.00048513899150898297, "loss": 16.8226, "step": 60680 }, { "epoch": 0.08991580207265552, "grad_norm": 6.75, "learning_rate": 0.00048513405257397536, "loss": 16.9003, "step": 60700 }, { "epoch": 0.08994542836658391, "grad_norm": 6.84375, "learning_rate": 0.0004851291136389678, "loss": 16.7763, "step": 60720 }, { "epoch": 0.0899750546605123, "grad_norm": 6.8125, "learning_rate": 0.00048512417470396025, "loss": 16.8099, "step": 60740 }, { "epoch": 0.09000468095444068, "grad_norm": 6.5, "learning_rate": 0.00048511923576895265, "loss": 16.8429, "step": 60760 }, { "epoch": 0.09003430724836907, "grad_norm": 6.4375, "learning_rate": 0.0004851142968339451, "loss": 16.8121, "step": 60780 }, { "epoch": 0.09006393354229746, "grad_norm": 6.78125, "learning_rate": 0.00048510935789893754, "loss": 16.816, "step": 60800 }, { "epoch": 0.09009355983622584, "grad_norm": 7.65625, "learning_rate": 0.00048510441896393, "loss": 16.8427, "step": 60820 }, { "epoch": 0.09012318613015423, "grad_norm": 6.5625, "learning_rate": 0.0004850994800289224, "loss": 16.7964, "step": 60840 }, { "epoch": 0.09015281242408262, "grad_norm": 7.34375, "learning_rate": 0.00048509454109391483, "loss": 16.9274, "step": 60860 }, { "epoch": 0.09018243871801102, "grad_norm": 6.65625, "learning_rate": 0.0004850896021589073, "loss": 16.8846, "step": 60880 }, { "epoch": 0.0902120650119394, "grad_norm": 7.75, "learning_rate": 0.00048508466322389973, "loss": 16.8355, "step": 60900 }, { "epoch": 0.09024169130586779, "grad_norm": 6.78125, "learning_rate": 0.0004850797242888921, "loss": 16.8287, "step": 60920 }, { "epoch": 0.09027131759979617, "grad_norm": 6.625, "learning_rate": 0.0004850747853538846, "loss": 16.7957, "step": 60940 }, { "epoch": 0.09030094389372456, "grad_norm": 6.96875, "learning_rate": 0.000485069846418877, "loss": 16.9066, "step": 60960 }, { "epoch": 0.09033057018765295, "grad_norm": 7.84375, "learning_rate": 0.00048506490748386947, "loss": 16.8342, "step": 60980 }, { "epoch": 0.09036019648158133, "grad_norm": 6.8125, "learning_rate": 0.00048505996854886186, "loss": 16.8766, "step": 61000 }, { "epoch": 0.09038982277550972, "grad_norm": 6.28125, "learning_rate": 0.00048505502961385436, "loss": 16.8707, "step": 61020 }, { "epoch": 0.09041944906943811, "grad_norm": 7.15625, "learning_rate": 0.00048505009067884676, "loss": 16.8145, "step": 61040 }, { "epoch": 0.0904490753633665, "grad_norm": 7.34375, "learning_rate": 0.00048504515174383915, "loss": 16.8387, "step": 61060 }, { "epoch": 0.09047870165729488, "grad_norm": 7.75, "learning_rate": 0.0004850402128088316, "loss": 16.8164, "step": 61080 }, { "epoch": 0.09050832795122327, "grad_norm": 7.125, "learning_rate": 0.00048503527387382404, "loss": 16.867, "step": 61100 }, { "epoch": 0.09053795424515165, "grad_norm": 5.8125, "learning_rate": 0.0004850303349388165, "loss": 16.857, "step": 61120 }, { "epoch": 0.09056758053908004, "grad_norm": 7.59375, "learning_rate": 0.0004850253960038089, "loss": 16.8391, "step": 61140 }, { "epoch": 0.09059720683300843, "grad_norm": 7.15625, "learning_rate": 0.00048502045706880133, "loss": 16.9228, "step": 61160 }, { "epoch": 0.09062683312693681, "grad_norm": 7.09375, "learning_rate": 0.0004850155181337938, "loss": 16.8162, "step": 61180 }, { "epoch": 0.09065645942086521, "grad_norm": 8.1875, "learning_rate": 0.00048501057919878623, "loss": 16.9047, "step": 61200 }, { "epoch": 0.0906860857147936, "grad_norm": 7.125, "learning_rate": 0.0004850056402637786, "loss": 16.8695, "step": 61220 }, { "epoch": 0.09071571200872199, "grad_norm": 6.28125, "learning_rate": 0.0004850007013287711, "loss": 16.8322, "step": 61240 }, { "epoch": 0.09074533830265037, "grad_norm": 6.6875, "learning_rate": 0.0004849957623937635, "loss": 16.8633, "step": 61260 }, { "epoch": 0.09077496459657876, "grad_norm": 7.0625, "learning_rate": 0.00048499082345875597, "loss": 16.7942, "step": 61280 }, { "epoch": 0.09080459089050714, "grad_norm": 6.96875, "learning_rate": 0.00048498588452374836, "loss": 16.8137, "step": 61300 }, { "epoch": 0.09083421718443553, "grad_norm": 6.5, "learning_rate": 0.00048498094558874086, "loss": 16.8784, "step": 61320 }, { "epoch": 0.09086384347836392, "grad_norm": 6.46875, "learning_rate": 0.00048497600665373326, "loss": 16.8997, "step": 61340 }, { "epoch": 0.0908934697722923, "grad_norm": 6.84375, "learning_rate": 0.0004849710677187257, "loss": 16.789, "step": 61360 }, { "epoch": 0.09092309606622069, "grad_norm": 5.90625, "learning_rate": 0.0004849661287837181, "loss": 16.8069, "step": 61380 }, { "epoch": 0.09095272236014908, "grad_norm": 6.375, "learning_rate": 0.00048496118984871054, "loss": 16.8303, "step": 61400 }, { "epoch": 0.09098234865407746, "grad_norm": 7.3125, "learning_rate": 0.000484956250913703, "loss": 16.8745, "step": 61420 }, { "epoch": 0.09101197494800585, "grad_norm": 6.96875, "learning_rate": 0.0004849513119786954, "loss": 16.8351, "step": 61440 }, { "epoch": 0.09104160124193424, "grad_norm": 6.875, "learning_rate": 0.00048494637304368783, "loss": 16.7959, "step": 61460 }, { "epoch": 0.09107122753586262, "grad_norm": 7.46875, "learning_rate": 0.0004849414341086803, "loss": 16.8403, "step": 61480 }, { "epoch": 0.09110085382979101, "grad_norm": 7.0, "learning_rate": 0.00048493649517367273, "loss": 16.8478, "step": 61500 }, { "epoch": 0.09113048012371941, "grad_norm": 6.875, "learning_rate": 0.0004849315562386651, "loss": 16.8452, "step": 61520 }, { "epoch": 0.0911601064176478, "grad_norm": 6.9375, "learning_rate": 0.0004849266173036576, "loss": 16.7965, "step": 61540 }, { "epoch": 0.09118973271157618, "grad_norm": 7.1875, "learning_rate": 0.00048492167836865, "loss": 16.7977, "step": 61560 }, { "epoch": 0.09121935900550457, "grad_norm": 6.75, "learning_rate": 0.00048491673943364247, "loss": 16.7875, "step": 61580 }, { "epoch": 0.09124898529943296, "grad_norm": 7.6875, "learning_rate": 0.00048491180049863486, "loss": 16.873, "step": 61600 }, { "epoch": 0.09127861159336134, "grad_norm": 6.78125, "learning_rate": 0.00048490686156362736, "loss": 16.8589, "step": 61620 }, { "epoch": 0.09130823788728973, "grad_norm": 7.90625, "learning_rate": 0.00048490192262861976, "loss": 16.793, "step": 61640 }, { "epoch": 0.09133786418121811, "grad_norm": 7.4375, "learning_rate": 0.0004848969836936122, "loss": 16.8028, "step": 61660 }, { "epoch": 0.0913674904751465, "grad_norm": 6.3125, "learning_rate": 0.0004848920447586046, "loss": 16.805, "step": 61680 }, { "epoch": 0.09139711676907489, "grad_norm": 7.0, "learning_rate": 0.0004848871058235971, "loss": 16.7904, "step": 61700 }, { "epoch": 0.09142674306300327, "grad_norm": 7.125, "learning_rate": 0.0004848821668885895, "loss": 16.7873, "step": 61720 }, { "epoch": 0.09145636935693166, "grad_norm": 7.125, "learning_rate": 0.0004848772279535819, "loss": 16.8407, "step": 61740 }, { "epoch": 0.09148599565086005, "grad_norm": 6.96875, "learning_rate": 0.00048487228901857433, "loss": 16.7869, "step": 61760 }, { "epoch": 0.09151562194478843, "grad_norm": 6.65625, "learning_rate": 0.0004848673500835668, "loss": 16.8435, "step": 61780 }, { "epoch": 0.09154524823871682, "grad_norm": 7.90625, "learning_rate": 0.00048486241114855923, "loss": 16.8419, "step": 61800 }, { "epoch": 0.09157487453264522, "grad_norm": 7.21875, "learning_rate": 0.0004848574722135516, "loss": 16.7863, "step": 61820 }, { "epoch": 0.0916045008265736, "grad_norm": 6.53125, "learning_rate": 0.0004848525332785441, "loss": 16.8984, "step": 61840 }, { "epoch": 0.09163412712050199, "grad_norm": 7.40625, "learning_rate": 0.0004848475943435365, "loss": 16.8329, "step": 61860 }, { "epoch": 0.09166375341443038, "grad_norm": 6.9375, "learning_rate": 0.00048484265540852897, "loss": 16.8384, "step": 61880 }, { "epoch": 0.09169337970835877, "grad_norm": 6.84375, "learning_rate": 0.00048483771647352136, "loss": 16.8325, "step": 61900 }, { "epoch": 0.09172300600228715, "grad_norm": 6.53125, "learning_rate": 0.00048483277753851386, "loss": 16.83, "step": 61920 }, { "epoch": 0.09175263229621554, "grad_norm": 6.875, "learning_rate": 0.00048482783860350626, "loss": 16.7585, "step": 61940 }, { "epoch": 0.09178225859014393, "grad_norm": 6.6875, "learning_rate": 0.0004848228996684987, "loss": 16.7828, "step": 61960 }, { "epoch": 0.09181188488407231, "grad_norm": 7.5, "learning_rate": 0.0004848179607334911, "loss": 16.7676, "step": 61980 }, { "epoch": 0.0918415111780007, "grad_norm": 7.375, "learning_rate": 0.0004848130217984836, "loss": 16.7898, "step": 62000 }, { "epoch": 0.09187113747192908, "grad_norm": 6.625, "learning_rate": 0.000484808082863476, "loss": 16.8082, "step": 62020 }, { "epoch": 0.09190076376585747, "grad_norm": 7.40625, "learning_rate": 0.00048480314392846844, "loss": 16.8028, "step": 62040 }, { "epoch": 0.09193039005978586, "grad_norm": 6.78125, "learning_rate": 0.00048479820499346083, "loss": 16.8189, "step": 62060 }, { "epoch": 0.09196001635371424, "grad_norm": 7.15625, "learning_rate": 0.0004847932660584533, "loss": 16.8504, "step": 62080 }, { "epoch": 0.09198964264764263, "grad_norm": 6.6875, "learning_rate": 0.00048478832712344573, "loss": 16.7952, "step": 62100 }, { "epoch": 0.09201926894157102, "grad_norm": 7.65625, "learning_rate": 0.0004847833881884381, "loss": 16.7928, "step": 62120 }, { "epoch": 0.09204889523549942, "grad_norm": 7.65625, "learning_rate": 0.0004847784492534306, "loss": 16.7917, "step": 62140 }, { "epoch": 0.0920785215294278, "grad_norm": 7.125, "learning_rate": 0.000484773510318423, "loss": 16.7933, "step": 62160 }, { "epoch": 0.09210814782335619, "grad_norm": 7.53125, "learning_rate": 0.00048476857138341547, "loss": 16.8126, "step": 62180 }, { "epoch": 0.09213777411728458, "grad_norm": 6.65625, "learning_rate": 0.00048476363244840786, "loss": 16.7553, "step": 62200 }, { "epoch": 0.09216740041121296, "grad_norm": 6.78125, "learning_rate": 0.00048475869351340036, "loss": 16.7727, "step": 62220 }, { "epoch": 0.09219702670514135, "grad_norm": 6.6875, "learning_rate": 0.00048475375457839276, "loss": 16.879, "step": 62240 }, { "epoch": 0.09222665299906974, "grad_norm": 6.78125, "learning_rate": 0.0004847488156433852, "loss": 16.7929, "step": 62260 }, { "epoch": 0.09225627929299812, "grad_norm": 7.25, "learning_rate": 0.0004847438767083776, "loss": 16.7963, "step": 62280 }, { "epoch": 0.09228590558692651, "grad_norm": 6.4375, "learning_rate": 0.0004847389377733701, "loss": 16.7686, "step": 62300 }, { "epoch": 0.0923155318808549, "grad_norm": 6.96875, "learning_rate": 0.0004847339988383625, "loss": 16.8297, "step": 62320 }, { "epoch": 0.09234515817478328, "grad_norm": 6.875, "learning_rate": 0.00048472905990335494, "loss": 16.8149, "step": 62340 }, { "epoch": 0.09237478446871167, "grad_norm": 7.3125, "learning_rate": 0.00048472412096834733, "loss": 16.8835, "step": 62360 }, { "epoch": 0.09240441076264005, "grad_norm": 7.03125, "learning_rate": 0.00048471918203333984, "loss": 16.8404, "step": 62380 }, { "epoch": 0.09243403705656844, "grad_norm": 6.75, "learning_rate": 0.00048471424309833223, "loss": 16.8179, "step": 62400 }, { "epoch": 0.09246366335049683, "grad_norm": 6.3125, "learning_rate": 0.0004847093041633246, "loss": 16.7695, "step": 62420 }, { "epoch": 0.09249328964442521, "grad_norm": 6.25, "learning_rate": 0.0004847043652283171, "loss": 16.8396, "step": 62440 }, { "epoch": 0.09252291593835361, "grad_norm": 7.09375, "learning_rate": 0.0004846994262933095, "loss": 16.8195, "step": 62460 }, { "epoch": 0.092552542232282, "grad_norm": 7.375, "learning_rate": 0.00048469448735830197, "loss": 16.7936, "step": 62480 }, { "epoch": 0.09258216852621039, "grad_norm": 6.5625, "learning_rate": 0.00048468954842329436, "loss": 16.846, "step": 62500 }, { "epoch": 0.09261179482013877, "grad_norm": 7.15625, "learning_rate": 0.00048468460948828686, "loss": 16.7826, "step": 62520 }, { "epoch": 0.09264142111406716, "grad_norm": 6.90625, "learning_rate": 0.00048467967055327926, "loss": 16.8026, "step": 62540 }, { "epoch": 0.09267104740799555, "grad_norm": 7.3125, "learning_rate": 0.0004846747316182717, "loss": 16.7603, "step": 62560 }, { "epoch": 0.09270067370192393, "grad_norm": 7.84375, "learning_rate": 0.0004846697926832641, "loss": 16.8185, "step": 62580 }, { "epoch": 0.09273029999585232, "grad_norm": 6.625, "learning_rate": 0.0004846648537482566, "loss": 16.7457, "step": 62600 }, { "epoch": 0.0927599262897807, "grad_norm": 7.3125, "learning_rate": 0.000484659914813249, "loss": 16.7269, "step": 62620 }, { "epoch": 0.09278955258370909, "grad_norm": 6.21875, "learning_rate": 0.00048465497587824144, "loss": 16.7724, "step": 62640 }, { "epoch": 0.09281917887763748, "grad_norm": 7.4375, "learning_rate": 0.00048465003694323383, "loss": 16.7531, "step": 62660 }, { "epoch": 0.09284880517156587, "grad_norm": 6.53125, "learning_rate": 0.00048464509800822634, "loss": 16.7827, "step": 62680 }, { "epoch": 0.09287843146549425, "grad_norm": 6.375, "learning_rate": 0.00048464015907321873, "loss": 16.7857, "step": 62700 }, { "epoch": 0.09290805775942264, "grad_norm": 6.25, "learning_rate": 0.0004846352201382112, "loss": 16.7486, "step": 62720 }, { "epoch": 0.09293768405335102, "grad_norm": 6.3125, "learning_rate": 0.0004846302812032036, "loss": 16.7855, "step": 62740 }, { "epoch": 0.09296731034727941, "grad_norm": 5.84375, "learning_rate": 0.000484625342268196, "loss": 16.745, "step": 62760 }, { "epoch": 0.09299693664120781, "grad_norm": 25.875, "learning_rate": 0.00048462040333318847, "loss": 16.8632, "step": 62780 }, { "epoch": 0.0930265629351362, "grad_norm": 7.03125, "learning_rate": 0.00048461546439818086, "loss": 16.7818, "step": 62800 }, { "epoch": 0.09305618922906458, "grad_norm": 7.6875, "learning_rate": 0.00048461052546317336, "loss": 16.7971, "step": 62820 }, { "epoch": 0.09308581552299297, "grad_norm": 6.46875, "learning_rate": 0.00048460558652816576, "loss": 16.7621, "step": 62840 }, { "epoch": 0.09311544181692136, "grad_norm": 6.78125, "learning_rate": 0.0004846006475931582, "loss": 16.7443, "step": 62860 }, { "epoch": 0.09314506811084974, "grad_norm": 6.875, "learning_rate": 0.0004845957086581506, "loss": 16.8043, "step": 62880 }, { "epoch": 0.09317469440477813, "grad_norm": 7.3125, "learning_rate": 0.0004845907697231431, "loss": 16.797, "step": 62900 }, { "epoch": 0.09320432069870652, "grad_norm": 6.3125, "learning_rate": 0.0004845858307881355, "loss": 16.7884, "step": 62920 }, { "epoch": 0.0932339469926349, "grad_norm": 6.5625, "learning_rate": 0.00048458089185312794, "loss": 16.7869, "step": 62940 }, { "epoch": 0.09326357328656329, "grad_norm": 6.21875, "learning_rate": 0.00048457595291812034, "loss": 16.7761, "step": 62960 }, { "epoch": 0.09329319958049168, "grad_norm": 6.875, "learning_rate": 0.00048457101398311284, "loss": 16.7793, "step": 62980 }, { "epoch": 0.09332282587442006, "grad_norm": 6.40625, "learning_rate": 0.00048456607504810523, "loss": 16.7568, "step": 63000 }, { "epoch": 0.09335245216834845, "grad_norm": 6.9375, "learning_rate": 0.0004845611361130977, "loss": 16.8394, "step": 63020 }, { "epoch": 0.09338207846227684, "grad_norm": 6.53125, "learning_rate": 0.0004845561971780901, "loss": 16.8243, "step": 63040 }, { "epoch": 0.09341170475620522, "grad_norm": 6.9375, "learning_rate": 0.0004845512582430826, "loss": 16.7747, "step": 63060 }, { "epoch": 0.09344133105013361, "grad_norm": 7.65625, "learning_rate": 0.00048454631930807497, "loss": 16.7301, "step": 63080 }, { "epoch": 0.09347095734406201, "grad_norm": 6.875, "learning_rate": 0.00048454138037306736, "loss": 16.7784, "step": 63100 }, { "epoch": 0.0935005836379904, "grad_norm": 7.46875, "learning_rate": 0.00048453644143805986, "loss": 16.7329, "step": 63120 }, { "epoch": 0.09353020993191878, "grad_norm": 6.65625, "learning_rate": 0.00048453150250305226, "loss": 16.7494, "step": 63140 }, { "epoch": 0.09355983622584717, "grad_norm": 7.0625, "learning_rate": 0.0004845265635680447, "loss": 16.7277, "step": 63160 }, { "epoch": 0.09358946251977555, "grad_norm": 6.90625, "learning_rate": 0.0004845216246330371, "loss": 16.723, "step": 63180 }, { "epoch": 0.09361908881370394, "grad_norm": 6.40625, "learning_rate": 0.0004845166856980296, "loss": 16.7447, "step": 63200 }, { "epoch": 0.09364871510763233, "grad_norm": 5.8125, "learning_rate": 0.000484511746763022, "loss": 16.7586, "step": 63220 }, { "epoch": 0.09367834140156071, "grad_norm": 6.125, "learning_rate": 0.00048450680782801444, "loss": 16.7316, "step": 63240 }, { "epoch": 0.0937079676954891, "grad_norm": 6.125, "learning_rate": 0.00048450186889300684, "loss": 16.6967, "step": 63260 }, { "epoch": 0.09373759398941749, "grad_norm": 6.53125, "learning_rate": 0.00048449692995799934, "loss": 16.7287, "step": 63280 }, { "epoch": 0.09376722028334587, "grad_norm": 6.53125, "learning_rate": 0.00048449199102299173, "loss": 16.78, "step": 63300 }, { "epoch": 0.09379684657727426, "grad_norm": 6.625, "learning_rate": 0.0004844870520879842, "loss": 16.7051, "step": 63320 }, { "epoch": 0.09382647287120265, "grad_norm": 7.28125, "learning_rate": 0.0004844821131529766, "loss": 16.7584, "step": 63340 }, { "epoch": 0.09385609916513103, "grad_norm": 7.84375, "learning_rate": 0.0004844771742179691, "loss": 16.7307, "step": 63360 }, { "epoch": 0.09388572545905942, "grad_norm": 6.78125, "learning_rate": 0.00048447223528296147, "loss": 16.7168, "step": 63380 }, { "epoch": 0.0939153517529878, "grad_norm": 6.53125, "learning_rate": 0.0004844672963479539, "loss": 16.6948, "step": 63400 }, { "epoch": 0.0939449780469162, "grad_norm": 6.59375, "learning_rate": 0.00048446235741294636, "loss": 16.7118, "step": 63420 }, { "epoch": 0.09397460434084459, "grad_norm": 6.90625, "learning_rate": 0.00048445741847793876, "loss": 16.7566, "step": 63440 }, { "epoch": 0.09400423063477298, "grad_norm": 6.53125, "learning_rate": 0.0004844524795429312, "loss": 16.7323, "step": 63460 }, { "epoch": 0.09403385692870136, "grad_norm": 6.46875, "learning_rate": 0.0004844475406079236, "loss": 16.7964, "step": 63480 }, { "epoch": 0.09406348322262975, "grad_norm": 6.65625, "learning_rate": 0.0004844426016729161, "loss": 16.7028, "step": 63500 }, { "epoch": 0.09409310951655814, "grad_norm": 6.84375, "learning_rate": 0.0004844376627379085, "loss": 16.7663, "step": 63520 }, { "epoch": 0.09412273581048652, "grad_norm": 7.09375, "learning_rate": 0.00048443272380290094, "loss": 16.7777, "step": 63540 }, { "epoch": 0.09415236210441491, "grad_norm": 7.28125, "learning_rate": 0.00048442778486789334, "loss": 16.7348, "step": 63560 }, { "epoch": 0.0941819883983433, "grad_norm": 6.5, "learning_rate": 0.00048442284593288584, "loss": 16.7293, "step": 63580 }, { "epoch": 0.09421161469227168, "grad_norm": 6.28125, "learning_rate": 0.00048441790699787823, "loss": 16.7371, "step": 63600 }, { "epoch": 0.09424124098620007, "grad_norm": 6.84375, "learning_rate": 0.0004844129680628707, "loss": 16.7628, "step": 63620 }, { "epoch": 0.09427086728012846, "grad_norm": 6.9375, "learning_rate": 0.00048440802912786313, "loss": 16.743, "step": 63640 }, { "epoch": 0.09430049357405684, "grad_norm": 6.46875, "learning_rate": 0.0004844030901928556, "loss": 16.759, "step": 63660 }, { "epoch": 0.09433011986798523, "grad_norm": 6.78125, "learning_rate": 0.00048439815125784797, "loss": 16.781, "step": 63680 }, { "epoch": 0.09435974616191362, "grad_norm": 6.28125, "learning_rate": 0.0004843932123228404, "loss": 16.7374, "step": 63700 }, { "epoch": 0.094389372455842, "grad_norm": 7.125, "learning_rate": 0.00048438827338783286, "loss": 16.767, "step": 63720 }, { "epoch": 0.0944189987497704, "grad_norm": 7.34375, "learning_rate": 0.0004843833344528253, "loss": 16.6797, "step": 63740 }, { "epoch": 0.09444862504369879, "grad_norm": 7.15625, "learning_rate": 0.0004843783955178177, "loss": 16.7178, "step": 63760 }, { "epoch": 0.09447825133762718, "grad_norm": 7.03125, "learning_rate": 0.0004843734565828101, "loss": 16.7747, "step": 63780 }, { "epoch": 0.09450787763155556, "grad_norm": 6.15625, "learning_rate": 0.0004843685176478026, "loss": 16.722, "step": 63800 }, { "epoch": 0.09453750392548395, "grad_norm": 6.46875, "learning_rate": 0.000484363578712795, "loss": 16.7015, "step": 63820 }, { "epoch": 0.09456713021941233, "grad_norm": 9.25, "learning_rate": 0.00048435863977778744, "loss": 16.7375, "step": 63840 }, { "epoch": 0.09459675651334072, "grad_norm": 6.90625, "learning_rate": 0.00048435370084277984, "loss": 16.7442, "step": 63860 }, { "epoch": 0.09462638280726911, "grad_norm": 6.78125, "learning_rate": 0.00048434876190777234, "loss": 16.7361, "step": 63880 }, { "epoch": 0.0946560091011975, "grad_norm": 6.46875, "learning_rate": 0.00048434382297276473, "loss": 16.7216, "step": 63900 }, { "epoch": 0.09468563539512588, "grad_norm": 6.84375, "learning_rate": 0.0004843388840377572, "loss": 16.7706, "step": 63920 }, { "epoch": 0.09471526168905427, "grad_norm": 6.6875, "learning_rate": 0.00048433394510274963, "loss": 16.6918, "step": 63940 }, { "epoch": 0.09474488798298265, "grad_norm": 6.78125, "learning_rate": 0.0004843290061677421, "loss": 16.6463, "step": 63960 }, { "epoch": 0.09477451427691104, "grad_norm": 7.21875, "learning_rate": 0.00048432406723273447, "loss": 16.7036, "step": 63980 }, { "epoch": 0.09480414057083943, "grad_norm": 6.09375, "learning_rate": 0.0004843191282977269, "loss": 16.7843, "step": 64000 }, { "epoch": 0.09483376686476781, "grad_norm": 6.6875, "learning_rate": 0.00048431418936271936, "loss": 16.719, "step": 64020 }, { "epoch": 0.0948633931586962, "grad_norm": 7.375, "learning_rate": 0.0004843092504277118, "loss": 16.7347, "step": 64040 }, { "epoch": 0.0948930194526246, "grad_norm": 6.21875, "learning_rate": 0.0004843043114927042, "loss": 16.8096, "step": 64060 }, { "epoch": 0.09492264574655299, "grad_norm": 7.84375, "learning_rate": 0.00048429937255769665, "loss": 16.7218, "step": 64080 }, { "epoch": 0.09495227204048137, "grad_norm": 6.46875, "learning_rate": 0.0004842944336226891, "loss": 16.755, "step": 64100 }, { "epoch": 0.09498189833440976, "grad_norm": 7.21875, "learning_rate": 0.0004842894946876815, "loss": 16.7069, "step": 64120 }, { "epoch": 0.09501152462833815, "grad_norm": 6.78125, "learning_rate": 0.00048428455575267394, "loss": 16.7824, "step": 64140 }, { "epoch": 0.09504115092226653, "grad_norm": 7.03125, "learning_rate": 0.00048427961681766634, "loss": 16.6922, "step": 64160 }, { "epoch": 0.09507077721619492, "grad_norm": 6.8125, "learning_rate": 0.00048427467788265884, "loss": 16.6847, "step": 64180 }, { "epoch": 0.0951004035101233, "grad_norm": 7.09375, "learning_rate": 0.00048426973894765123, "loss": 16.7158, "step": 64200 }, { "epoch": 0.09513002980405169, "grad_norm": 6.96875, "learning_rate": 0.0004842648000126437, "loss": 16.6815, "step": 64220 }, { "epoch": 0.09515965609798008, "grad_norm": 7.0625, "learning_rate": 0.00048425986107763613, "loss": 16.7583, "step": 64240 }, { "epoch": 0.09518928239190846, "grad_norm": 6.59375, "learning_rate": 0.0004842549221426286, "loss": 16.7004, "step": 64260 }, { "epoch": 0.09521890868583685, "grad_norm": 7.25, "learning_rate": 0.00048424998320762097, "loss": 16.7253, "step": 64280 }, { "epoch": 0.09524853497976524, "grad_norm": 6.375, "learning_rate": 0.0004842450442726134, "loss": 16.7563, "step": 64300 }, { "epoch": 0.09527816127369362, "grad_norm": 6.53125, "learning_rate": 0.00048424010533760586, "loss": 16.8224, "step": 64320 }, { "epoch": 0.09530778756762201, "grad_norm": 7.375, "learning_rate": 0.0004842351664025983, "loss": 16.72, "step": 64340 }, { "epoch": 0.0953374138615504, "grad_norm": 6.90625, "learning_rate": 0.0004842302274675907, "loss": 16.6931, "step": 64360 }, { "epoch": 0.0953670401554788, "grad_norm": 6.84375, "learning_rate": 0.00048422528853258315, "loss": 16.7532, "step": 64380 }, { "epoch": 0.09539666644940718, "grad_norm": 7.34375, "learning_rate": 0.0004842203495975756, "loss": 16.6859, "step": 64400 }, { "epoch": 0.09542629274333557, "grad_norm": 7.40625, "learning_rate": 0.00048421541066256805, "loss": 16.7375, "step": 64420 }, { "epoch": 0.09545591903726396, "grad_norm": 6.9375, "learning_rate": 0.00048421047172756044, "loss": 16.7586, "step": 64440 }, { "epoch": 0.09548554533119234, "grad_norm": 6.3125, "learning_rate": 0.00048420553279255284, "loss": 16.7103, "step": 64460 }, { "epoch": 0.09551517162512073, "grad_norm": 6.0, "learning_rate": 0.00048420059385754534, "loss": 16.7307, "step": 64480 }, { "epoch": 0.09554479791904912, "grad_norm": 6.40625, "learning_rate": 0.00048419565492253773, "loss": 16.6492, "step": 64500 }, { "epoch": 0.0955744242129775, "grad_norm": 7.46875, "learning_rate": 0.0004841907159875302, "loss": 16.7124, "step": 64520 }, { "epoch": 0.09560405050690589, "grad_norm": 7.125, "learning_rate": 0.00048418577705252263, "loss": 16.6885, "step": 64540 }, { "epoch": 0.09563367680083427, "grad_norm": 6.875, "learning_rate": 0.0004841808381175151, "loss": 16.6734, "step": 64560 }, { "epoch": 0.09566330309476266, "grad_norm": 7.3125, "learning_rate": 0.00048417589918250747, "loss": 16.7048, "step": 64580 }, { "epoch": 0.09569292938869105, "grad_norm": 7.53125, "learning_rate": 0.0004841709602474999, "loss": 16.741, "step": 64600 }, { "epoch": 0.09572255568261943, "grad_norm": 5.9375, "learning_rate": 0.00048416602131249237, "loss": 16.7261, "step": 64620 }, { "epoch": 0.09575218197654782, "grad_norm": 6.375, "learning_rate": 0.0004841610823774848, "loss": 16.6945, "step": 64640 }, { "epoch": 0.0957818082704762, "grad_norm": 6.6875, "learning_rate": 0.0004841561434424772, "loss": 16.6719, "step": 64660 }, { "epoch": 0.09581143456440461, "grad_norm": 6.625, "learning_rate": 0.00048415120450746965, "loss": 16.7877, "step": 64680 }, { "epoch": 0.095841060858333, "grad_norm": 7.84375, "learning_rate": 0.0004841462655724621, "loss": 16.7272, "step": 64700 }, { "epoch": 0.09587068715226138, "grad_norm": 7.0, "learning_rate": 0.00048414132663745455, "loss": 16.7008, "step": 64720 }, { "epoch": 0.09590031344618977, "grad_norm": 7.1875, "learning_rate": 0.00048413638770244694, "loss": 16.7269, "step": 64740 }, { "epoch": 0.09592993974011815, "grad_norm": 8.375, "learning_rate": 0.00048413144876743934, "loss": 16.6705, "step": 64760 }, { "epoch": 0.09595956603404654, "grad_norm": 7.125, "learning_rate": 0.00048412650983243184, "loss": 16.6745, "step": 64780 }, { "epoch": 0.09598919232797493, "grad_norm": 7.5, "learning_rate": 0.00048412157089742423, "loss": 16.6829, "step": 64800 }, { "epoch": 0.09601881862190331, "grad_norm": 6.1875, "learning_rate": 0.0004841166319624167, "loss": 16.6921, "step": 64820 }, { "epoch": 0.0960484449158317, "grad_norm": 6.59375, "learning_rate": 0.00048411169302740913, "loss": 16.6877, "step": 64840 }, { "epoch": 0.09607807120976009, "grad_norm": 8.1875, "learning_rate": 0.0004841067540924016, "loss": 16.7151, "step": 64860 }, { "epoch": 0.09610769750368847, "grad_norm": 6.875, "learning_rate": 0.00048410181515739397, "loss": 16.7201, "step": 64880 }, { "epoch": 0.09613732379761686, "grad_norm": 6.9375, "learning_rate": 0.0004840968762223864, "loss": 16.6966, "step": 64900 }, { "epoch": 0.09616695009154524, "grad_norm": 6.4375, "learning_rate": 0.00048409193728737887, "loss": 16.6942, "step": 64920 }, { "epoch": 0.09619657638547363, "grad_norm": 6.34375, "learning_rate": 0.0004840869983523713, "loss": 16.725, "step": 64940 }, { "epoch": 0.09622620267940202, "grad_norm": 7.0625, "learning_rate": 0.0004840820594173637, "loss": 16.726, "step": 64960 }, { "epoch": 0.0962558289733304, "grad_norm": 6.0, "learning_rate": 0.00048407712048235615, "loss": 16.6779, "step": 64980 }, { "epoch": 0.0962854552672588, "grad_norm": 6.5, "learning_rate": 0.0004840721815473486, "loss": 16.7394, "step": 65000 }, { "epoch": 0.09631508156118719, "grad_norm": 6.9375, "learning_rate": 0.00048406724261234105, "loss": 16.7388, "step": 65020 }, { "epoch": 0.09634470785511558, "grad_norm": 6.96875, "learning_rate": 0.00048406230367733344, "loss": 16.7261, "step": 65040 }, { "epoch": 0.09637433414904396, "grad_norm": 6.59375, "learning_rate": 0.0004840573647423259, "loss": 16.7156, "step": 65060 }, { "epoch": 0.09640396044297235, "grad_norm": 7.71875, "learning_rate": 0.00048405242580731834, "loss": 16.6965, "step": 65080 }, { "epoch": 0.09643358673690074, "grad_norm": 6.9375, "learning_rate": 0.0004840474868723108, "loss": 16.6861, "step": 65100 }, { "epoch": 0.09646321303082912, "grad_norm": 6.90625, "learning_rate": 0.0004840425479373032, "loss": 16.7005, "step": 65120 }, { "epoch": 0.09649283932475751, "grad_norm": 6.0625, "learning_rate": 0.00048403760900229563, "loss": 16.7591, "step": 65140 }, { "epoch": 0.0965224656186859, "grad_norm": 6.15625, "learning_rate": 0.0004840326700672881, "loss": 16.6792, "step": 65160 }, { "epoch": 0.09655209191261428, "grad_norm": 6.96875, "learning_rate": 0.00048402773113228047, "loss": 16.6438, "step": 65180 }, { "epoch": 0.09658171820654267, "grad_norm": 6.78125, "learning_rate": 0.0004840227921972729, "loss": 16.6596, "step": 65200 }, { "epoch": 0.09661134450047106, "grad_norm": 6.9375, "learning_rate": 0.00048401785326226537, "loss": 16.7024, "step": 65220 }, { "epoch": 0.09664097079439944, "grad_norm": 6.25, "learning_rate": 0.0004840129143272578, "loss": 16.7537, "step": 65240 }, { "epoch": 0.09667059708832783, "grad_norm": 6.5, "learning_rate": 0.0004840079753922502, "loss": 16.6684, "step": 65260 }, { "epoch": 0.09670022338225621, "grad_norm": 6.84375, "learning_rate": 0.00048400303645724265, "loss": 16.6689, "step": 65280 }, { "epoch": 0.0967298496761846, "grad_norm": 7.71875, "learning_rate": 0.0004839980975222351, "loss": 16.6964, "step": 65300 }, { "epoch": 0.096759475970113, "grad_norm": 7.34375, "learning_rate": 0.00048399315858722755, "loss": 16.7334, "step": 65320 }, { "epoch": 0.09678910226404139, "grad_norm": 6.25, "learning_rate": 0.00048398821965221994, "loss": 16.6653, "step": 65340 }, { "epoch": 0.09681872855796977, "grad_norm": 6.59375, "learning_rate": 0.0004839832807172124, "loss": 16.6492, "step": 65360 }, { "epoch": 0.09684835485189816, "grad_norm": 7.4375, "learning_rate": 0.00048397834178220484, "loss": 16.593, "step": 65380 }, { "epoch": 0.09687798114582655, "grad_norm": 6.28125, "learning_rate": 0.0004839734028471973, "loss": 16.687, "step": 65400 }, { "epoch": 0.09690760743975493, "grad_norm": 7.59375, "learning_rate": 0.0004839684639121897, "loss": 16.6924, "step": 65420 }, { "epoch": 0.09693723373368332, "grad_norm": 6.125, "learning_rate": 0.0004839635249771822, "loss": 16.7129, "step": 65440 }, { "epoch": 0.0969668600276117, "grad_norm": 6.21875, "learning_rate": 0.0004839585860421746, "loss": 16.7551, "step": 65460 }, { "epoch": 0.09699648632154009, "grad_norm": 6.6875, "learning_rate": 0.00048395364710716697, "loss": 16.7323, "step": 65480 }, { "epoch": 0.09702611261546848, "grad_norm": 6.6875, "learning_rate": 0.0004839487081721594, "loss": 16.7262, "step": 65500 }, { "epoch": 0.09705573890939687, "grad_norm": 6.75, "learning_rate": 0.00048394376923715187, "loss": 16.712, "step": 65520 }, { "epoch": 0.09708536520332525, "grad_norm": 7.96875, "learning_rate": 0.0004839388303021443, "loss": 16.7063, "step": 65540 }, { "epoch": 0.09711499149725364, "grad_norm": 6.5, "learning_rate": 0.0004839338913671367, "loss": 16.6636, "step": 65560 }, { "epoch": 0.09714461779118203, "grad_norm": 6.5625, "learning_rate": 0.00048392895243212916, "loss": 16.6222, "step": 65580 }, { "epoch": 0.09717424408511041, "grad_norm": 6.90625, "learning_rate": 0.0004839240134971216, "loss": 16.6467, "step": 65600 }, { "epoch": 0.0972038703790388, "grad_norm": 6.34375, "learning_rate": 0.00048391907456211405, "loss": 16.6927, "step": 65620 }, { "epoch": 0.0972334966729672, "grad_norm": 7.0625, "learning_rate": 0.00048391413562710644, "loss": 16.6531, "step": 65640 }, { "epoch": 0.09726312296689558, "grad_norm": 6.46875, "learning_rate": 0.0004839091966920989, "loss": 16.7031, "step": 65660 }, { "epoch": 0.09729274926082397, "grad_norm": 6.78125, "learning_rate": 0.00048390425775709134, "loss": 16.6487, "step": 65680 }, { "epoch": 0.09732237555475236, "grad_norm": 7.625, "learning_rate": 0.0004838993188220838, "loss": 16.6103, "step": 65700 }, { "epoch": 0.09735200184868074, "grad_norm": 6.4375, "learning_rate": 0.0004838943798870762, "loss": 16.6815, "step": 65720 }, { "epoch": 0.09738162814260913, "grad_norm": 8.375, "learning_rate": 0.0004838894409520687, "loss": 16.7185, "step": 65740 }, { "epoch": 0.09741125443653752, "grad_norm": 7.15625, "learning_rate": 0.0004838845020170611, "loss": 16.7408, "step": 65760 }, { "epoch": 0.0974408807304659, "grad_norm": 7.34375, "learning_rate": 0.00048387956308205347, "loss": 16.6787, "step": 65780 }, { "epoch": 0.09747050702439429, "grad_norm": 7.5, "learning_rate": 0.0004838746241470459, "loss": 16.6534, "step": 65800 }, { "epoch": 0.09750013331832268, "grad_norm": 7.0, "learning_rate": 0.00048386968521203837, "loss": 16.6736, "step": 65820 }, { "epoch": 0.09752975961225106, "grad_norm": 7.125, "learning_rate": 0.0004838647462770308, "loss": 16.6966, "step": 65840 }, { "epoch": 0.09755938590617945, "grad_norm": 6.6875, "learning_rate": 0.0004838598073420232, "loss": 16.6469, "step": 65860 }, { "epoch": 0.09758901220010784, "grad_norm": 6.75, "learning_rate": 0.00048385486840701566, "loss": 16.6895, "step": 65880 }, { "epoch": 0.09761863849403622, "grad_norm": 6.53125, "learning_rate": 0.0004838499294720081, "loss": 16.6792, "step": 65900 }, { "epoch": 0.09764826478796461, "grad_norm": 6.15625, "learning_rate": 0.00048384499053700055, "loss": 16.7067, "step": 65920 }, { "epoch": 0.097677891081893, "grad_norm": 5.8125, "learning_rate": 0.00048384005160199294, "loss": 16.6676, "step": 65940 }, { "epoch": 0.0977075173758214, "grad_norm": 6.96875, "learning_rate": 0.0004838351126669854, "loss": 16.717, "step": 65960 }, { "epoch": 0.09773714366974978, "grad_norm": 7.46875, "learning_rate": 0.00048383017373197784, "loss": 16.6675, "step": 65980 }, { "epoch": 0.09776676996367817, "grad_norm": 6.3125, "learning_rate": 0.0004838252347969703, "loss": 16.6672, "step": 66000 }, { "epoch": 0.09779639625760655, "grad_norm": 7.1875, "learning_rate": 0.0004838202958619627, "loss": 16.7281, "step": 66020 }, { "epoch": 0.09782602255153494, "grad_norm": 7.5, "learning_rate": 0.0004838153569269552, "loss": 16.6424, "step": 66040 }, { "epoch": 0.09785564884546333, "grad_norm": 6.46875, "learning_rate": 0.0004838104179919476, "loss": 16.6993, "step": 66060 }, { "epoch": 0.09788527513939171, "grad_norm": 6.78125, "learning_rate": 0.00048380547905694, "loss": 16.7206, "step": 66080 }, { "epoch": 0.0979149014333201, "grad_norm": 6.5625, "learning_rate": 0.0004838005401219324, "loss": 16.69, "step": 66100 }, { "epoch": 0.09794452772724849, "grad_norm": 6.9375, "learning_rate": 0.0004837956011869249, "loss": 16.6846, "step": 66120 }, { "epoch": 0.09797415402117687, "grad_norm": 7.15625, "learning_rate": 0.0004837906622519173, "loss": 16.6381, "step": 66140 }, { "epoch": 0.09800378031510526, "grad_norm": 6.84375, "learning_rate": 0.0004837857233169097, "loss": 16.6673, "step": 66160 }, { "epoch": 0.09803340660903365, "grad_norm": 6.84375, "learning_rate": 0.00048378078438190216, "loss": 16.6789, "step": 66180 }, { "epoch": 0.09806303290296203, "grad_norm": 7.125, "learning_rate": 0.0004837758454468946, "loss": 16.7082, "step": 66200 }, { "epoch": 0.09809265919689042, "grad_norm": 6.75, "learning_rate": 0.00048377090651188705, "loss": 16.6822, "step": 66220 }, { "epoch": 0.0981222854908188, "grad_norm": 6.90625, "learning_rate": 0.00048376596757687945, "loss": 16.6494, "step": 66240 }, { "epoch": 0.09815191178474719, "grad_norm": 6.5625, "learning_rate": 0.0004837610286418719, "loss": 16.6508, "step": 66260 }, { "epoch": 0.09818153807867559, "grad_norm": 6.5625, "learning_rate": 0.00048375608970686434, "loss": 16.6827, "step": 66280 }, { "epoch": 0.09821116437260398, "grad_norm": 5.625, "learning_rate": 0.0004837511507718568, "loss": 16.6265, "step": 66300 }, { "epoch": 0.09824079066653237, "grad_norm": 6.71875, "learning_rate": 0.0004837462118368492, "loss": 16.6498, "step": 66320 }, { "epoch": 0.09827041696046075, "grad_norm": 7.9375, "learning_rate": 0.0004837412729018417, "loss": 16.6315, "step": 66340 }, { "epoch": 0.09830004325438914, "grad_norm": 6.375, "learning_rate": 0.0004837363339668341, "loss": 16.6039, "step": 66360 }, { "epoch": 0.09832966954831752, "grad_norm": 6.53125, "learning_rate": 0.0004837313950318265, "loss": 16.6509, "step": 66380 }, { "epoch": 0.09835929584224591, "grad_norm": 5.84375, "learning_rate": 0.0004837264560968189, "loss": 16.601, "step": 66400 }, { "epoch": 0.0983889221361743, "grad_norm": 6.46875, "learning_rate": 0.0004837215171618114, "loss": 16.6926, "step": 66420 }, { "epoch": 0.09841854843010268, "grad_norm": 6.625, "learning_rate": 0.0004837165782268038, "loss": 16.7138, "step": 66440 }, { "epoch": 0.09844817472403107, "grad_norm": 6.71875, "learning_rate": 0.0004837116392917962, "loss": 16.5876, "step": 66460 }, { "epoch": 0.09847780101795946, "grad_norm": 6.4375, "learning_rate": 0.00048370670035678866, "loss": 16.6884, "step": 66480 }, { "epoch": 0.09850742731188784, "grad_norm": 6.75, "learning_rate": 0.0004837017614217811, "loss": 16.6649, "step": 66500 }, { "epoch": 0.09853705360581623, "grad_norm": 7.25, "learning_rate": 0.00048369682248677355, "loss": 16.6092, "step": 66520 }, { "epoch": 0.09856667989974462, "grad_norm": 6.03125, "learning_rate": 0.00048369188355176595, "loss": 16.627, "step": 66540 }, { "epoch": 0.098596306193673, "grad_norm": 6.78125, "learning_rate": 0.0004836869446167584, "loss": 16.6375, "step": 66560 }, { "epoch": 0.09862593248760139, "grad_norm": 7.4375, "learning_rate": 0.00048368200568175084, "loss": 16.6451, "step": 66580 }, { "epoch": 0.09865555878152979, "grad_norm": 7.09375, "learning_rate": 0.0004836770667467433, "loss": 16.6191, "step": 66600 }, { "epoch": 0.09868518507545818, "grad_norm": 7.03125, "learning_rate": 0.0004836721278117357, "loss": 16.6238, "step": 66620 }, { "epoch": 0.09871481136938656, "grad_norm": 7.25, "learning_rate": 0.0004836671888767282, "loss": 16.6124, "step": 66640 }, { "epoch": 0.09874443766331495, "grad_norm": 6.28125, "learning_rate": 0.0004836622499417206, "loss": 16.6843, "step": 66660 }, { "epoch": 0.09877406395724334, "grad_norm": 6.59375, "learning_rate": 0.000483657311006713, "loss": 16.6849, "step": 66680 }, { "epoch": 0.09880369025117172, "grad_norm": 7.90625, "learning_rate": 0.0004836523720717054, "loss": 16.6554, "step": 66700 }, { "epoch": 0.09883331654510011, "grad_norm": 7.125, "learning_rate": 0.0004836474331366979, "loss": 16.6205, "step": 66720 }, { "epoch": 0.0988629428390285, "grad_norm": 7.03125, "learning_rate": 0.0004836424942016903, "loss": 16.6324, "step": 66740 }, { "epoch": 0.09889256913295688, "grad_norm": 6.90625, "learning_rate": 0.00048363755526668276, "loss": 16.6846, "step": 66760 }, { "epoch": 0.09892219542688527, "grad_norm": 7.15625, "learning_rate": 0.00048363261633167516, "loss": 16.6027, "step": 66780 }, { "epoch": 0.09895182172081365, "grad_norm": 6.71875, "learning_rate": 0.0004836276773966676, "loss": 16.6125, "step": 66800 }, { "epoch": 0.09898144801474204, "grad_norm": 6.0625, "learning_rate": 0.00048362273846166005, "loss": 16.6692, "step": 66820 }, { "epoch": 0.09901107430867043, "grad_norm": 6.46875, "learning_rate": 0.00048361779952665245, "loss": 16.6261, "step": 66840 }, { "epoch": 0.09904070060259881, "grad_norm": 7.03125, "learning_rate": 0.0004836128605916449, "loss": 16.5866, "step": 66860 }, { "epoch": 0.0990703268965272, "grad_norm": 6.375, "learning_rate": 0.00048360792165663734, "loss": 16.6372, "step": 66880 }, { "epoch": 0.09909995319045559, "grad_norm": 6.65625, "learning_rate": 0.0004836029827216298, "loss": 16.5989, "step": 66900 }, { "epoch": 0.09912957948438399, "grad_norm": 7.375, "learning_rate": 0.0004835980437866222, "loss": 16.6772, "step": 66920 }, { "epoch": 0.09915920577831237, "grad_norm": 6.90625, "learning_rate": 0.0004835931048516147, "loss": 16.6382, "step": 66940 }, { "epoch": 0.09918883207224076, "grad_norm": 7.53125, "learning_rate": 0.0004835881659166071, "loss": 16.6731, "step": 66960 }, { "epoch": 0.09921845836616915, "grad_norm": 6.90625, "learning_rate": 0.0004835832269815995, "loss": 16.6457, "step": 66980 }, { "epoch": 0.09924808466009753, "grad_norm": 6.40625, "learning_rate": 0.0004835782880465919, "loss": 16.6314, "step": 67000 }, { "epoch": 0.09927771095402592, "grad_norm": 6.46875, "learning_rate": 0.0004835733491115844, "loss": 16.6278, "step": 67020 }, { "epoch": 0.0993073372479543, "grad_norm": 6.78125, "learning_rate": 0.0004835684101765768, "loss": 16.5956, "step": 67040 }, { "epoch": 0.09933696354188269, "grad_norm": 6.75, "learning_rate": 0.00048356347124156926, "loss": 16.6496, "step": 67060 }, { "epoch": 0.09936658983581108, "grad_norm": 6.65625, "learning_rate": 0.00048355853230656166, "loss": 16.6581, "step": 67080 }, { "epoch": 0.09939621612973946, "grad_norm": 6.4375, "learning_rate": 0.00048355359337155416, "loss": 16.6221, "step": 67100 }, { "epoch": 0.09942584242366785, "grad_norm": 6.71875, "learning_rate": 0.00048354865443654655, "loss": 16.6772, "step": 67120 }, { "epoch": 0.09945546871759624, "grad_norm": 7.25, "learning_rate": 0.00048354371550153895, "loss": 16.6104, "step": 67140 }, { "epoch": 0.09948509501152462, "grad_norm": 6.75, "learning_rate": 0.0004835387765665314, "loss": 16.6313, "step": 67160 }, { "epoch": 0.09951472130545301, "grad_norm": 6.53125, "learning_rate": 0.00048353383763152384, "loss": 16.6284, "step": 67180 }, { "epoch": 0.0995443475993814, "grad_norm": 7.6875, "learning_rate": 0.0004835288986965163, "loss": 16.6672, "step": 67200 }, { "epoch": 0.09957397389330978, "grad_norm": 6.75, "learning_rate": 0.0004835239597615087, "loss": 16.6011, "step": 67220 }, { "epoch": 0.09960360018723818, "grad_norm": 6.4375, "learning_rate": 0.0004835190208265012, "loss": 16.6609, "step": 67240 }, { "epoch": 0.09963322648116657, "grad_norm": 7.40625, "learning_rate": 0.0004835140818914936, "loss": 16.6259, "step": 67260 }, { "epoch": 0.09966285277509496, "grad_norm": 6.8125, "learning_rate": 0.000483509142956486, "loss": 16.6092, "step": 67280 }, { "epoch": 0.09969247906902334, "grad_norm": 6.3125, "learning_rate": 0.0004835042040214784, "loss": 16.6204, "step": 67300 }, { "epoch": 0.09972210536295173, "grad_norm": 6.65625, "learning_rate": 0.0004834992650864709, "loss": 16.6558, "step": 67320 }, { "epoch": 0.09975173165688012, "grad_norm": 6.875, "learning_rate": 0.0004834943261514633, "loss": 16.6027, "step": 67340 }, { "epoch": 0.0997813579508085, "grad_norm": 6.53125, "learning_rate": 0.00048348938721645576, "loss": 16.5951, "step": 67360 }, { "epoch": 0.09981098424473689, "grad_norm": 7.1875, "learning_rate": 0.00048348444828144816, "loss": 16.6149, "step": 67380 }, { "epoch": 0.09984061053866528, "grad_norm": 6.78125, "learning_rate": 0.00048347950934644066, "loss": 16.5772, "step": 67400 }, { "epoch": 0.09987023683259366, "grad_norm": 6.8125, "learning_rate": 0.00048347457041143305, "loss": 16.6317, "step": 67420 }, { "epoch": 0.09989986312652205, "grad_norm": 7.0625, "learning_rate": 0.0004834696314764255, "loss": 16.5701, "step": 67440 }, { "epoch": 0.09992948942045043, "grad_norm": 6.65625, "learning_rate": 0.0004834646925414179, "loss": 16.6727, "step": 67460 }, { "epoch": 0.09995911571437882, "grad_norm": 6.84375, "learning_rate": 0.00048345975360641034, "loss": 16.6282, "step": 67480 }, { "epoch": 0.09998874200830721, "grad_norm": 7.59375, "learning_rate": 0.0004834548146714028, "loss": 16.58, "step": 67500 }, { "epoch": 0.1000183683022356, "grad_norm": 8.375, "learning_rate": 0.0004834498757363952, "loss": 16.657, "step": 67520 }, { "epoch": 0.100047994596164, "grad_norm": 6.4375, "learning_rate": 0.0004834449368013877, "loss": 16.6031, "step": 67540 }, { "epoch": 0.10007762089009238, "grad_norm": 6.46875, "learning_rate": 0.0004834399978663801, "loss": 16.5908, "step": 67560 }, { "epoch": 0.10010724718402077, "grad_norm": 6.875, "learning_rate": 0.0004834350589313725, "loss": 16.5555, "step": 67580 }, { "epoch": 0.10013687347794915, "grad_norm": 8.375, "learning_rate": 0.0004834301199963649, "loss": 16.6778, "step": 67600 }, { "epoch": 0.10016649977187754, "grad_norm": 6.90625, "learning_rate": 0.0004834251810613574, "loss": 16.6049, "step": 67620 }, { "epoch": 0.10019612606580593, "grad_norm": 6.96875, "learning_rate": 0.0004834202421263498, "loss": 16.6337, "step": 67640 }, { "epoch": 0.10022575235973431, "grad_norm": 7.09375, "learning_rate": 0.00048341530319134226, "loss": 16.6476, "step": 67660 }, { "epoch": 0.1002553786536627, "grad_norm": 6.03125, "learning_rate": 0.00048341036425633466, "loss": 16.6637, "step": 67680 }, { "epoch": 0.10028500494759109, "grad_norm": 6.25, "learning_rate": 0.00048340542532132716, "loss": 16.6126, "step": 67700 }, { "epoch": 0.10031463124151947, "grad_norm": 7.0, "learning_rate": 0.00048340048638631955, "loss": 16.6287, "step": 67720 }, { "epoch": 0.10034425753544786, "grad_norm": 7.71875, "learning_rate": 0.000483395547451312, "loss": 16.6074, "step": 67740 }, { "epoch": 0.10037388382937625, "grad_norm": 6.46875, "learning_rate": 0.0004833906085163044, "loss": 16.5914, "step": 67760 }, { "epoch": 0.10040351012330463, "grad_norm": 6.71875, "learning_rate": 0.0004833856695812969, "loss": 16.6092, "step": 67780 }, { "epoch": 0.10043313641723302, "grad_norm": 6.5625, "learning_rate": 0.0004833807306462893, "loss": 16.6073, "step": 67800 }, { "epoch": 0.1004627627111614, "grad_norm": 6.40625, "learning_rate": 0.0004833757917112817, "loss": 16.5553, "step": 67820 }, { "epoch": 0.10049238900508979, "grad_norm": 6.65625, "learning_rate": 0.0004833708527762742, "loss": 16.6536, "step": 67840 }, { "epoch": 0.10052201529901819, "grad_norm": 6.5625, "learning_rate": 0.0004833659138412666, "loss": 16.6156, "step": 67860 }, { "epoch": 0.10055164159294658, "grad_norm": 6.8125, "learning_rate": 0.00048336097490625903, "loss": 16.527, "step": 67880 }, { "epoch": 0.10058126788687496, "grad_norm": 6.8125, "learning_rate": 0.0004833560359712514, "loss": 16.5762, "step": 67900 }, { "epoch": 0.10061089418080335, "grad_norm": 6.96875, "learning_rate": 0.0004833510970362439, "loss": 16.5745, "step": 67920 }, { "epoch": 0.10064052047473174, "grad_norm": 6.84375, "learning_rate": 0.0004833461581012363, "loss": 16.6078, "step": 67940 }, { "epoch": 0.10067014676866012, "grad_norm": 6.4375, "learning_rate": 0.00048334121916622876, "loss": 16.581, "step": 67960 }, { "epoch": 0.10069977306258851, "grad_norm": 6.53125, "learning_rate": 0.00048333628023122116, "loss": 16.6142, "step": 67980 }, { "epoch": 0.1007293993565169, "grad_norm": 6.40625, "learning_rate": 0.00048333134129621366, "loss": 16.5557, "step": 68000 }, { "epoch": 0.10075902565044528, "grad_norm": 7.46875, "learning_rate": 0.00048332640236120605, "loss": 16.6099, "step": 68020 }, { "epoch": 0.10078865194437367, "grad_norm": 6.84375, "learning_rate": 0.0004833214634261985, "loss": 16.6517, "step": 68040 }, { "epoch": 0.10081827823830206, "grad_norm": 7.4375, "learning_rate": 0.0004833165244911909, "loss": 16.5766, "step": 68060 }, { "epoch": 0.10084790453223044, "grad_norm": 6.71875, "learning_rate": 0.0004833115855561834, "loss": 16.606, "step": 68080 }, { "epoch": 0.10087753082615883, "grad_norm": 6.59375, "learning_rate": 0.0004833066466211758, "loss": 16.5779, "step": 68100 }, { "epoch": 0.10090715712008722, "grad_norm": 6.8125, "learning_rate": 0.00048330170768616824, "loss": 16.6219, "step": 68120 }, { "epoch": 0.1009367834140156, "grad_norm": 7.15625, "learning_rate": 0.0004832967687511607, "loss": 16.5954, "step": 68140 }, { "epoch": 0.10096640970794399, "grad_norm": 7.0, "learning_rate": 0.0004832918298161531, "loss": 16.5884, "step": 68160 }, { "epoch": 0.10099603600187239, "grad_norm": 6.78125, "learning_rate": 0.00048328689088114553, "loss": 16.5813, "step": 68180 }, { "epoch": 0.10102566229580077, "grad_norm": 6.46875, "learning_rate": 0.0004832819519461379, "loss": 16.6294, "step": 68200 }, { "epoch": 0.10105528858972916, "grad_norm": 7.34375, "learning_rate": 0.0004832770130111304, "loss": 16.627, "step": 68220 }, { "epoch": 0.10108491488365755, "grad_norm": 7.0, "learning_rate": 0.0004832720740761228, "loss": 16.5516, "step": 68240 }, { "epoch": 0.10111454117758593, "grad_norm": 7.40625, "learning_rate": 0.00048326713514111526, "loss": 16.6234, "step": 68260 }, { "epoch": 0.10114416747151432, "grad_norm": 7.1875, "learning_rate": 0.00048326219620610766, "loss": 16.5566, "step": 68280 }, { "epoch": 0.10117379376544271, "grad_norm": 7.1875, "learning_rate": 0.00048325725727110016, "loss": 16.6184, "step": 68300 }, { "epoch": 0.1012034200593711, "grad_norm": 7.46875, "learning_rate": 0.00048325231833609255, "loss": 16.6634, "step": 68320 }, { "epoch": 0.10123304635329948, "grad_norm": 7.75, "learning_rate": 0.000483247379401085, "loss": 16.5374, "step": 68340 }, { "epoch": 0.10126267264722787, "grad_norm": 7.40625, "learning_rate": 0.0004832424404660774, "loss": 16.6118, "step": 68360 }, { "epoch": 0.10129229894115625, "grad_norm": 7.03125, "learning_rate": 0.0004832375015310699, "loss": 16.5952, "step": 68380 }, { "epoch": 0.10132192523508464, "grad_norm": 7.0, "learning_rate": 0.0004832325625960623, "loss": 16.5866, "step": 68400 }, { "epoch": 0.10135155152901303, "grad_norm": 7.09375, "learning_rate": 0.00048322762366105474, "loss": 16.5999, "step": 68420 }, { "epoch": 0.10138117782294141, "grad_norm": 6.9375, "learning_rate": 0.0004832226847260472, "loss": 16.553, "step": 68440 }, { "epoch": 0.1014108041168698, "grad_norm": 6.4375, "learning_rate": 0.00048321774579103963, "loss": 16.6224, "step": 68460 }, { "epoch": 0.10144043041079819, "grad_norm": 7.09375, "learning_rate": 0.00048321280685603203, "loss": 16.5789, "step": 68480 }, { "epoch": 0.10147005670472659, "grad_norm": 6.96875, "learning_rate": 0.0004832078679210244, "loss": 16.6379, "step": 68500 }, { "epoch": 0.10149968299865497, "grad_norm": 6.0625, "learning_rate": 0.0004832029289860169, "loss": 16.5628, "step": 68520 }, { "epoch": 0.10152930929258336, "grad_norm": 6.875, "learning_rate": 0.0004831979900510093, "loss": 16.5675, "step": 68540 }, { "epoch": 0.10155893558651174, "grad_norm": 6.96875, "learning_rate": 0.00048319305111600176, "loss": 16.591, "step": 68560 }, { "epoch": 0.10158856188044013, "grad_norm": 6.8125, "learning_rate": 0.00048318811218099416, "loss": 16.5641, "step": 68580 }, { "epoch": 0.10161818817436852, "grad_norm": 6.5625, "learning_rate": 0.00048318317324598666, "loss": 16.5867, "step": 68600 }, { "epoch": 0.1016478144682969, "grad_norm": 6.4375, "learning_rate": 0.00048317823431097905, "loss": 16.5886, "step": 68620 }, { "epoch": 0.10167744076222529, "grad_norm": 7.625, "learning_rate": 0.0004831732953759715, "loss": 16.5648, "step": 68640 }, { "epoch": 0.10170706705615368, "grad_norm": 6.46875, "learning_rate": 0.0004831683564409639, "loss": 16.5727, "step": 68660 }, { "epoch": 0.10173669335008206, "grad_norm": 6.75, "learning_rate": 0.0004831634175059564, "loss": 16.5931, "step": 68680 }, { "epoch": 0.10176631964401045, "grad_norm": 6.6875, "learning_rate": 0.0004831584785709488, "loss": 16.6137, "step": 68700 }, { "epoch": 0.10179594593793884, "grad_norm": 7.15625, "learning_rate": 0.00048315353963594124, "loss": 16.5733, "step": 68720 }, { "epoch": 0.10182557223186722, "grad_norm": 7.15625, "learning_rate": 0.0004831486007009337, "loss": 16.5379, "step": 68740 }, { "epoch": 0.10185519852579561, "grad_norm": 5.59375, "learning_rate": 0.00048314366176592613, "loss": 16.6031, "step": 68760 }, { "epoch": 0.101884824819724, "grad_norm": 6.6875, "learning_rate": 0.00048313872283091853, "loss": 16.5457, "step": 68780 }, { "epoch": 0.10191445111365238, "grad_norm": 6.125, "learning_rate": 0.000483133783895911, "loss": 16.5053, "step": 68800 }, { "epoch": 0.10194407740758078, "grad_norm": 7.1875, "learning_rate": 0.0004831288449609034, "loss": 16.576, "step": 68820 }, { "epoch": 0.10197370370150917, "grad_norm": 6.875, "learning_rate": 0.0004831239060258958, "loss": 16.6092, "step": 68840 }, { "epoch": 0.10200332999543756, "grad_norm": 6.5, "learning_rate": 0.00048311896709088827, "loss": 16.598, "step": 68860 }, { "epoch": 0.10203295628936594, "grad_norm": 6.3125, "learning_rate": 0.00048311402815588066, "loss": 16.5953, "step": 68880 }, { "epoch": 0.10206258258329433, "grad_norm": 6.34375, "learning_rate": 0.00048310908922087316, "loss": 16.5686, "step": 68900 }, { "epoch": 0.10209220887722271, "grad_norm": 7.3125, "learning_rate": 0.00048310415028586555, "loss": 16.5649, "step": 68920 }, { "epoch": 0.1021218351711511, "grad_norm": 7.15625, "learning_rate": 0.000483099211350858, "loss": 16.5833, "step": 68940 }, { "epoch": 0.10215146146507949, "grad_norm": 7.0, "learning_rate": 0.0004830942724158504, "loss": 16.5813, "step": 68960 }, { "epoch": 0.10218108775900787, "grad_norm": 7.125, "learning_rate": 0.0004830893334808429, "loss": 16.6435, "step": 68980 }, { "epoch": 0.10221071405293626, "grad_norm": 6.6875, "learning_rate": 0.0004830843945458353, "loss": 16.6299, "step": 69000 }, { "epoch": 0.10224034034686465, "grad_norm": 7.875, "learning_rate": 0.00048307945561082774, "loss": 16.5241, "step": 69020 }, { "epoch": 0.10226996664079303, "grad_norm": 6.40625, "learning_rate": 0.0004830745166758202, "loss": 16.5397, "step": 69040 }, { "epoch": 0.10229959293472142, "grad_norm": 6.8125, "learning_rate": 0.00048306957774081263, "loss": 16.5522, "step": 69060 }, { "epoch": 0.1023292192286498, "grad_norm": 6.6875, "learning_rate": 0.00048306463880580503, "loss": 16.5518, "step": 69080 }, { "epoch": 0.10235884552257819, "grad_norm": 6.28125, "learning_rate": 0.0004830596998707975, "loss": 16.54, "step": 69100 }, { "epoch": 0.10238847181650658, "grad_norm": 8.0, "learning_rate": 0.0004830547609357899, "loss": 16.6088, "step": 69120 }, { "epoch": 0.10241809811043498, "grad_norm": 6.53125, "learning_rate": 0.00048304982200078237, "loss": 16.6152, "step": 69140 }, { "epoch": 0.10244772440436337, "grad_norm": 7.75, "learning_rate": 0.00048304488306577477, "loss": 16.633, "step": 69160 }, { "epoch": 0.10247735069829175, "grad_norm": 7.46875, "learning_rate": 0.00048303994413076716, "loss": 16.5787, "step": 69180 }, { "epoch": 0.10250697699222014, "grad_norm": 6.4375, "learning_rate": 0.00048303500519575966, "loss": 16.5533, "step": 69200 }, { "epoch": 0.10253660328614853, "grad_norm": 6.40625, "learning_rate": 0.00048303006626075205, "loss": 16.5498, "step": 69220 }, { "epoch": 0.10256622958007691, "grad_norm": 7.15625, "learning_rate": 0.0004830251273257445, "loss": 16.5712, "step": 69240 }, { "epoch": 0.1025958558740053, "grad_norm": 6.625, "learning_rate": 0.0004830201883907369, "loss": 16.5545, "step": 69260 }, { "epoch": 0.10262548216793368, "grad_norm": 7.65625, "learning_rate": 0.0004830152494557294, "loss": 16.5233, "step": 69280 }, { "epoch": 0.10265510846186207, "grad_norm": 6.40625, "learning_rate": 0.0004830103105207218, "loss": 16.5822, "step": 69300 }, { "epoch": 0.10268473475579046, "grad_norm": 6.9375, "learning_rate": 0.00048300537158571424, "loss": 16.5851, "step": 69320 }, { "epoch": 0.10271436104971884, "grad_norm": 6.53125, "learning_rate": 0.0004830004326507067, "loss": 16.5013, "step": 69340 }, { "epoch": 0.10274398734364723, "grad_norm": 6.5, "learning_rate": 0.00048299549371569914, "loss": 16.5316, "step": 69360 }, { "epoch": 0.10277361363757562, "grad_norm": 7.34375, "learning_rate": 0.00048299055478069153, "loss": 16.4967, "step": 69380 }, { "epoch": 0.102803239931504, "grad_norm": 6.46875, "learning_rate": 0.000482985615845684, "loss": 16.6028, "step": 69400 }, { "epoch": 0.10283286622543239, "grad_norm": 6.78125, "learning_rate": 0.0004829806769106764, "loss": 16.5567, "step": 69420 }, { "epoch": 0.10286249251936078, "grad_norm": 6.5625, "learning_rate": 0.00048297573797566887, "loss": 16.5561, "step": 69440 }, { "epoch": 0.10289211881328918, "grad_norm": 6.34375, "learning_rate": 0.00048297079904066127, "loss": 16.6269, "step": 69460 }, { "epoch": 0.10292174510721756, "grad_norm": 8.8125, "learning_rate": 0.0004829658601056537, "loss": 16.5778, "step": 69480 }, { "epoch": 0.10295137140114595, "grad_norm": 6.9375, "learning_rate": 0.00048296092117064616, "loss": 16.5705, "step": 69500 }, { "epoch": 0.10298099769507434, "grad_norm": 7.375, "learning_rate": 0.00048295598223563855, "loss": 16.538, "step": 69520 }, { "epoch": 0.10301062398900272, "grad_norm": 6.65625, "learning_rate": 0.000482951043300631, "loss": 16.5615, "step": 69540 }, { "epoch": 0.10304025028293111, "grad_norm": 7.375, "learning_rate": 0.0004829461043656234, "loss": 16.5769, "step": 69560 }, { "epoch": 0.1030698765768595, "grad_norm": 6.65625, "learning_rate": 0.0004829411654306159, "loss": 16.5347, "step": 69580 }, { "epoch": 0.10309950287078788, "grad_norm": 6.625, "learning_rate": 0.0004829362264956083, "loss": 16.5224, "step": 69600 }, { "epoch": 0.10312912916471627, "grad_norm": 7.71875, "learning_rate": 0.00048293128756060074, "loss": 16.4941, "step": 69620 }, { "epoch": 0.10315875545864465, "grad_norm": 6.59375, "learning_rate": 0.0004829263486255932, "loss": 16.567, "step": 69640 }, { "epoch": 0.10318838175257304, "grad_norm": 7.0625, "learning_rate": 0.00048292140969058564, "loss": 16.5851, "step": 69660 }, { "epoch": 0.10321800804650143, "grad_norm": 6.625, "learning_rate": 0.00048291647075557803, "loss": 16.5477, "step": 69680 }, { "epoch": 0.10324763434042981, "grad_norm": 6.875, "learning_rate": 0.0004829115318205705, "loss": 16.5258, "step": 69700 }, { "epoch": 0.1032772606343582, "grad_norm": 6.21875, "learning_rate": 0.0004829065928855629, "loss": 16.5651, "step": 69720 }, { "epoch": 0.10330688692828659, "grad_norm": 6.125, "learning_rate": 0.00048290165395055537, "loss": 16.5892, "step": 69740 }, { "epoch": 0.10333651322221497, "grad_norm": 6.65625, "learning_rate": 0.00048289671501554777, "loss": 16.5085, "step": 69760 }, { "epoch": 0.10336613951614337, "grad_norm": 7.25, "learning_rate": 0.0004828917760805402, "loss": 16.6262, "step": 69780 }, { "epoch": 0.10339576581007176, "grad_norm": 6.1875, "learning_rate": 0.00048288683714553266, "loss": 16.52, "step": 69800 }, { "epoch": 0.10342539210400015, "grad_norm": 6.28125, "learning_rate": 0.0004828818982105251, "loss": 16.5513, "step": 69820 }, { "epoch": 0.10345501839792853, "grad_norm": 6.71875, "learning_rate": 0.0004828769592755175, "loss": 16.5175, "step": 69840 }, { "epoch": 0.10348464469185692, "grad_norm": 7.375, "learning_rate": 0.0004828720203405099, "loss": 16.5315, "step": 69860 }, { "epoch": 0.1035142709857853, "grad_norm": 6.375, "learning_rate": 0.0004828670814055024, "loss": 16.542, "step": 69880 }, { "epoch": 0.10354389727971369, "grad_norm": 7.25, "learning_rate": 0.0004828621424704948, "loss": 16.5067, "step": 69900 }, { "epoch": 0.10357352357364208, "grad_norm": 6.1875, "learning_rate": 0.00048285720353548724, "loss": 16.5152, "step": 69920 }, { "epoch": 0.10360314986757047, "grad_norm": 6.46875, "learning_rate": 0.0004828522646004797, "loss": 16.5035, "step": 69940 }, { "epoch": 0.10363277616149885, "grad_norm": 6.9375, "learning_rate": 0.00048284732566547214, "loss": 16.5808, "step": 69960 }, { "epoch": 0.10366240245542724, "grad_norm": 6.65625, "learning_rate": 0.00048284238673046453, "loss": 16.5044, "step": 69980 }, { "epoch": 0.10369202874935562, "grad_norm": 6.125, "learning_rate": 0.000482837447795457, "loss": 16.578, "step": 70000 }, { "epoch": 0.10372165504328401, "grad_norm": 7.25, "learning_rate": 0.0004828325088604494, "loss": 16.562, "step": 70020 }, { "epoch": 0.1037512813372124, "grad_norm": 6.625, "learning_rate": 0.00048282756992544187, "loss": 16.5095, "step": 70040 }, { "epoch": 0.10378090763114078, "grad_norm": 6.09375, "learning_rate": 0.00048282263099043427, "loss": 16.5207, "step": 70060 }, { "epoch": 0.10381053392506917, "grad_norm": 6.625, "learning_rate": 0.0004828176920554267, "loss": 16.5471, "step": 70080 }, { "epoch": 0.10384016021899757, "grad_norm": 6.6875, "learning_rate": 0.00048281275312041916, "loss": 16.524, "step": 70100 }, { "epoch": 0.10386978651292596, "grad_norm": 6.1875, "learning_rate": 0.0004828078141854116, "loss": 16.4794, "step": 70120 }, { "epoch": 0.10389941280685434, "grad_norm": 6.75, "learning_rate": 0.000482802875250404, "loss": 16.5229, "step": 70140 }, { "epoch": 0.10392903910078273, "grad_norm": 6.125, "learning_rate": 0.00048279793631539645, "loss": 16.5296, "step": 70160 }, { "epoch": 0.10395866539471112, "grad_norm": 6.59375, "learning_rate": 0.0004827929973803889, "loss": 16.5783, "step": 70180 }, { "epoch": 0.1039882916886395, "grad_norm": 6.8125, "learning_rate": 0.0004827880584453813, "loss": 16.5537, "step": 70200 }, { "epoch": 0.10401791798256789, "grad_norm": 6.75, "learning_rate": 0.00048278311951037374, "loss": 16.5815, "step": 70220 }, { "epoch": 0.10404754427649628, "grad_norm": 8.6875, "learning_rate": 0.0004827781805753662, "loss": 16.5732, "step": 70240 }, { "epoch": 0.10407717057042466, "grad_norm": 6.5, "learning_rate": 0.00048277324164035864, "loss": 16.5605, "step": 70260 }, { "epoch": 0.10410679686435305, "grad_norm": 6.65625, "learning_rate": 0.00048276830270535103, "loss": 16.5224, "step": 70280 }, { "epoch": 0.10413642315828144, "grad_norm": 7.0625, "learning_rate": 0.0004827633637703435, "loss": 16.5452, "step": 70300 }, { "epoch": 0.10416604945220982, "grad_norm": 6.71875, "learning_rate": 0.0004827584248353359, "loss": 16.5098, "step": 70320 }, { "epoch": 0.10419567574613821, "grad_norm": 6.25, "learning_rate": 0.0004827534859003284, "loss": 16.5676, "step": 70340 }, { "epoch": 0.1042253020400666, "grad_norm": 6.875, "learning_rate": 0.00048274854696532077, "loss": 16.4514, "step": 70360 }, { "epoch": 0.10425492833399498, "grad_norm": 6.40625, "learning_rate": 0.0004827436080303132, "loss": 16.5532, "step": 70380 }, { "epoch": 0.10428455462792338, "grad_norm": 6.65625, "learning_rate": 0.00048273866909530566, "loss": 16.5367, "step": 70400 }, { "epoch": 0.10431418092185177, "grad_norm": 6.15625, "learning_rate": 0.0004827337301602981, "loss": 16.5784, "step": 70420 }, { "epoch": 0.10434380721578015, "grad_norm": 6.59375, "learning_rate": 0.0004827287912252905, "loss": 16.5085, "step": 70440 }, { "epoch": 0.10437343350970854, "grad_norm": 6.53125, "learning_rate": 0.00048272385229028295, "loss": 16.5486, "step": 70460 }, { "epoch": 0.10440305980363693, "grad_norm": 5.75, "learning_rate": 0.0004827189133552754, "loss": 16.4874, "step": 70480 }, { "epoch": 0.10443268609756531, "grad_norm": 6.84375, "learning_rate": 0.00048271397442026785, "loss": 16.4957, "step": 70500 }, { "epoch": 0.1044623123914937, "grad_norm": 6.75, "learning_rate": 0.00048270903548526024, "loss": 16.5765, "step": 70520 }, { "epoch": 0.10449193868542209, "grad_norm": 8.125, "learning_rate": 0.00048270409655025263, "loss": 16.5136, "step": 70540 }, { "epoch": 0.10452156497935047, "grad_norm": 6.78125, "learning_rate": 0.00048269915761524514, "loss": 16.5299, "step": 70560 }, { "epoch": 0.10455119127327886, "grad_norm": 6.6875, "learning_rate": 0.00048269421868023753, "loss": 16.4576, "step": 70580 }, { "epoch": 0.10458081756720725, "grad_norm": 6.84375, "learning_rate": 0.00048268927974523, "loss": 16.4926, "step": 70600 }, { "epoch": 0.10461044386113563, "grad_norm": 6.75, "learning_rate": 0.0004826843408102224, "loss": 16.4703, "step": 70620 }, { "epoch": 0.10464007015506402, "grad_norm": 6.96875, "learning_rate": 0.0004826794018752149, "loss": 16.5149, "step": 70640 }, { "epoch": 0.1046696964489924, "grad_norm": 6.3125, "learning_rate": 0.00048267446294020727, "loss": 16.4792, "step": 70660 }, { "epoch": 0.10469932274292079, "grad_norm": 6.59375, "learning_rate": 0.0004826695240051997, "loss": 16.5295, "step": 70680 }, { "epoch": 0.10472894903684918, "grad_norm": 8.125, "learning_rate": 0.00048266458507019216, "loss": 16.5405, "step": 70700 }, { "epoch": 0.10475857533077758, "grad_norm": 7.90625, "learning_rate": 0.0004826596461351846, "loss": 16.543, "step": 70720 }, { "epoch": 0.10478820162470596, "grad_norm": 6.3125, "learning_rate": 0.000482654707200177, "loss": 16.5194, "step": 70740 }, { "epoch": 0.10481782791863435, "grad_norm": 6.75, "learning_rate": 0.00048264976826516945, "loss": 16.5217, "step": 70760 }, { "epoch": 0.10484745421256274, "grad_norm": 6.09375, "learning_rate": 0.0004826448293301619, "loss": 16.5735, "step": 70780 }, { "epoch": 0.10487708050649112, "grad_norm": 6.125, "learning_rate": 0.00048263989039515435, "loss": 16.4886, "step": 70800 }, { "epoch": 0.10490670680041951, "grad_norm": 7.71875, "learning_rate": 0.00048263495146014674, "loss": 16.4874, "step": 70820 }, { "epoch": 0.1049363330943479, "grad_norm": 6.59375, "learning_rate": 0.00048263001252513913, "loss": 16.5424, "step": 70840 }, { "epoch": 0.10496595938827628, "grad_norm": 6.84375, "learning_rate": 0.00048262507359013164, "loss": 16.5108, "step": 70860 }, { "epoch": 0.10499558568220467, "grad_norm": 6.71875, "learning_rate": 0.00048262013465512403, "loss": 16.4778, "step": 70880 }, { "epoch": 0.10502521197613306, "grad_norm": 7.21875, "learning_rate": 0.0004826151957201165, "loss": 16.4908, "step": 70900 }, { "epoch": 0.10505483827006144, "grad_norm": 6.8125, "learning_rate": 0.0004826102567851089, "loss": 16.4564, "step": 70920 }, { "epoch": 0.10508446456398983, "grad_norm": 6.625, "learning_rate": 0.0004826053178501014, "loss": 16.5258, "step": 70940 }, { "epoch": 0.10511409085791822, "grad_norm": 6.21875, "learning_rate": 0.00048260037891509377, "loss": 16.5403, "step": 70960 }, { "epoch": 0.1051437171518466, "grad_norm": 7.375, "learning_rate": 0.0004825954399800862, "loss": 16.575, "step": 70980 }, { "epoch": 0.10517334344577499, "grad_norm": 6.4375, "learning_rate": 0.00048259050104507866, "loss": 16.5599, "step": 71000 }, { "epoch": 0.10520296973970338, "grad_norm": 6.84375, "learning_rate": 0.0004825855621100711, "loss": 16.4962, "step": 71020 }, { "epoch": 0.10523259603363178, "grad_norm": 6.875, "learning_rate": 0.0004825806231750635, "loss": 16.4845, "step": 71040 }, { "epoch": 0.10526222232756016, "grad_norm": 6.71875, "learning_rate": 0.00048257568424005595, "loss": 16.5411, "step": 71060 }, { "epoch": 0.10529184862148855, "grad_norm": 6.65625, "learning_rate": 0.0004825707453050484, "loss": 16.5235, "step": 71080 }, { "epoch": 0.10532147491541693, "grad_norm": 6.375, "learning_rate": 0.00048256580637004085, "loss": 16.5079, "step": 71100 }, { "epoch": 0.10535110120934532, "grad_norm": 6.375, "learning_rate": 0.00048256086743503324, "loss": 16.5093, "step": 71120 }, { "epoch": 0.10538072750327371, "grad_norm": 6.84375, "learning_rate": 0.0004825559285000257, "loss": 16.5136, "step": 71140 }, { "epoch": 0.1054103537972021, "grad_norm": 6.8125, "learning_rate": 0.00048255098956501814, "loss": 16.4724, "step": 71160 }, { "epoch": 0.10543998009113048, "grad_norm": 6.25, "learning_rate": 0.0004825460506300106, "loss": 16.4691, "step": 71180 }, { "epoch": 0.10546960638505887, "grad_norm": 7.96875, "learning_rate": 0.000482541111695003, "loss": 16.4072, "step": 71200 }, { "epoch": 0.10549923267898725, "grad_norm": 6.375, "learning_rate": 0.0004825361727599954, "loss": 16.531, "step": 71220 }, { "epoch": 0.10552885897291564, "grad_norm": 6.3125, "learning_rate": 0.0004825312338249879, "loss": 16.5211, "step": 71240 }, { "epoch": 0.10555848526684403, "grad_norm": 6.625, "learning_rate": 0.00048252629488998027, "loss": 16.5079, "step": 71260 }, { "epoch": 0.10558811156077241, "grad_norm": 6.5625, "learning_rate": 0.0004825213559549727, "loss": 16.4813, "step": 71280 }, { "epoch": 0.1056177378547008, "grad_norm": 7.5, "learning_rate": 0.00048251641701996516, "loss": 16.5194, "step": 71300 }, { "epoch": 0.10564736414862919, "grad_norm": 6.84375, "learning_rate": 0.0004825114780849576, "loss": 16.4672, "step": 71320 }, { "epoch": 0.10567699044255757, "grad_norm": 6.90625, "learning_rate": 0.00048250653914995, "loss": 16.5088, "step": 71340 }, { "epoch": 0.10570661673648597, "grad_norm": 7.53125, "learning_rate": 0.00048250160021494245, "loss": 16.5076, "step": 71360 }, { "epoch": 0.10573624303041436, "grad_norm": 5.65625, "learning_rate": 0.0004824966612799349, "loss": 16.4723, "step": 71380 }, { "epoch": 0.10576586932434275, "grad_norm": 6.3125, "learning_rate": 0.00048249172234492735, "loss": 16.4759, "step": 71400 }, { "epoch": 0.10579549561827113, "grad_norm": 6.03125, "learning_rate": 0.00048248678340991974, "loss": 16.4789, "step": 71420 }, { "epoch": 0.10582512191219952, "grad_norm": 6.71875, "learning_rate": 0.0004824818444749122, "loss": 16.5194, "step": 71440 }, { "epoch": 0.1058547482061279, "grad_norm": 7.1875, "learning_rate": 0.00048247690553990464, "loss": 16.5426, "step": 71460 }, { "epoch": 0.10588437450005629, "grad_norm": 6.40625, "learning_rate": 0.0004824719666048971, "loss": 16.5093, "step": 71480 }, { "epoch": 0.10591400079398468, "grad_norm": 7.28125, "learning_rate": 0.0004824670276698895, "loss": 16.5204, "step": 71500 }, { "epoch": 0.10594362708791306, "grad_norm": 7.0, "learning_rate": 0.000482462088734882, "loss": 16.5197, "step": 71520 }, { "epoch": 0.10597325338184145, "grad_norm": 6.1875, "learning_rate": 0.0004824571497998744, "loss": 16.4954, "step": 71540 }, { "epoch": 0.10600287967576984, "grad_norm": 6.6875, "learning_rate": 0.00048245221086486677, "loss": 16.4379, "step": 71560 }, { "epoch": 0.10603250596969822, "grad_norm": 7.8125, "learning_rate": 0.0004824472719298592, "loss": 16.4314, "step": 71580 }, { "epoch": 0.10606213226362661, "grad_norm": 7.40625, "learning_rate": 0.00048244233299485166, "loss": 16.4561, "step": 71600 }, { "epoch": 0.106091758557555, "grad_norm": 6.625, "learning_rate": 0.0004824373940598441, "loss": 16.5224, "step": 71620 }, { "epoch": 0.10612138485148338, "grad_norm": 6.78125, "learning_rate": 0.0004824324551248365, "loss": 16.4732, "step": 71640 }, { "epoch": 0.10615101114541177, "grad_norm": 6.90625, "learning_rate": 0.00048242751618982895, "loss": 16.4654, "step": 71660 }, { "epoch": 0.10618063743934017, "grad_norm": 7.875, "learning_rate": 0.0004824225772548214, "loss": 16.5161, "step": 71680 }, { "epoch": 0.10621026373326856, "grad_norm": 6.09375, "learning_rate": 0.00048241763831981385, "loss": 16.446, "step": 71700 }, { "epoch": 0.10623989002719694, "grad_norm": 7.1875, "learning_rate": 0.00048241269938480624, "loss": 16.5245, "step": 71720 }, { "epoch": 0.10626951632112533, "grad_norm": 6.1875, "learning_rate": 0.0004824077604497987, "loss": 16.4989, "step": 71740 }, { "epoch": 0.10629914261505372, "grad_norm": 7.0, "learning_rate": 0.00048240282151479114, "loss": 16.5321, "step": 71760 }, { "epoch": 0.1063287689089821, "grad_norm": 6.15625, "learning_rate": 0.0004823978825797836, "loss": 16.4498, "step": 71780 }, { "epoch": 0.10635839520291049, "grad_norm": 6.875, "learning_rate": 0.000482392943644776, "loss": 16.4793, "step": 71800 }, { "epoch": 0.10638802149683887, "grad_norm": 7.90625, "learning_rate": 0.0004823880047097685, "loss": 16.4312, "step": 71820 }, { "epoch": 0.10641764779076726, "grad_norm": 6.625, "learning_rate": 0.0004823830657747609, "loss": 16.5154, "step": 71840 }, { "epoch": 0.10644727408469565, "grad_norm": 7.5, "learning_rate": 0.00048237812683975327, "loss": 16.4446, "step": 71860 }, { "epoch": 0.10647690037862403, "grad_norm": 7.25, "learning_rate": 0.0004823731879047457, "loss": 16.5021, "step": 71880 }, { "epoch": 0.10650652667255242, "grad_norm": 6.5, "learning_rate": 0.00048236824896973816, "loss": 16.4583, "step": 71900 }, { "epoch": 0.10653615296648081, "grad_norm": 6.625, "learning_rate": 0.0004823633100347306, "loss": 16.4529, "step": 71920 }, { "epoch": 0.1065657792604092, "grad_norm": 6.15625, "learning_rate": 0.000482358371099723, "loss": 16.4529, "step": 71940 }, { "epoch": 0.10659540555433758, "grad_norm": 6.53125, "learning_rate": 0.00048235343216471545, "loss": 16.4638, "step": 71960 }, { "epoch": 0.10662503184826597, "grad_norm": 6.5, "learning_rate": 0.0004823484932297079, "loss": 16.4739, "step": 71980 }, { "epoch": 0.10665465814219437, "grad_norm": 6.75, "learning_rate": 0.00048234355429470035, "loss": 16.5261, "step": 72000 }, { "epoch": 0.10668428443612275, "grad_norm": 6.65625, "learning_rate": 0.00048233861535969274, "loss": 16.4916, "step": 72020 }, { "epoch": 0.10671391073005114, "grad_norm": 6.6875, "learning_rate": 0.0004823336764246852, "loss": 16.4841, "step": 72040 }, { "epoch": 0.10674353702397953, "grad_norm": 6.09375, "learning_rate": 0.00048232873748967764, "loss": 16.485, "step": 72060 }, { "epoch": 0.10677316331790791, "grad_norm": 6.375, "learning_rate": 0.0004823237985546701, "loss": 16.4513, "step": 72080 }, { "epoch": 0.1068027896118363, "grad_norm": 7.78125, "learning_rate": 0.0004823188596196625, "loss": 16.4513, "step": 72100 }, { "epoch": 0.10683241590576469, "grad_norm": 7.0, "learning_rate": 0.000482313920684655, "loss": 16.4768, "step": 72120 }, { "epoch": 0.10686204219969307, "grad_norm": 6.5625, "learning_rate": 0.0004823089817496474, "loss": 16.4426, "step": 72140 }, { "epoch": 0.10689166849362146, "grad_norm": 7.46875, "learning_rate": 0.0004823040428146398, "loss": 16.4733, "step": 72160 }, { "epoch": 0.10692129478754984, "grad_norm": 7.625, "learning_rate": 0.0004822991038796322, "loss": 16.459, "step": 72180 }, { "epoch": 0.10695092108147823, "grad_norm": 6.0, "learning_rate": 0.0004822941649446247, "loss": 16.4675, "step": 72200 }, { "epoch": 0.10698054737540662, "grad_norm": 6.28125, "learning_rate": 0.0004822892260096171, "loss": 16.517, "step": 72220 }, { "epoch": 0.107010173669335, "grad_norm": 6.375, "learning_rate": 0.0004822842870746095, "loss": 16.4864, "step": 72240 }, { "epoch": 0.10703979996326339, "grad_norm": 6.4375, "learning_rate": 0.00048227934813960195, "loss": 16.4141, "step": 72260 }, { "epoch": 0.10706942625719178, "grad_norm": 6.875, "learning_rate": 0.0004822744092045944, "loss": 16.5031, "step": 72280 }, { "epoch": 0.10709905255112016, "grad_norm": 6.4375, "learning_rate": 0.00048226947026958685, "loss": 16.4786, "step": 72300 }, { "epoch": 0.10712867884504856, "grad_norm": 6.4375, "learning_rate": 0.00048226453133457924, "loss": 16.4936, "step": 72320 }, { "epoch": 0.10715830513897695, "grad_norm": 7.3125, "learning_rate": 0.0004822595923995717, "loss": 16.4618, "step": 72340 }, { "epoch": 0.10718793143290534, "grad_norm": 6.28125, "learning_rate": 0.00048225465346456414, "loss": 16.4663, "step": 72360 }, { "epoch": 0.10721755772683372, "grad_norm": 6.25, "learning_rate": 0.0004822497145295566, "loss": 16.4996, "step": 72380 }, { "epoch": 0.10724718402076211, "grad_norm": 6.25, "learning_rate": 0.000482244775594549, "loss": 16.4604, "step": 72400 }, { "epoch": 0.1072768103146905, "grad_norm": 6.6875, "learning_rate": 0.0004822398366595415, "loss": 16.4964, "step": 72420 }, { "epoch": 0.10730643660861888, "grad_norm": 6.8125, "learning_rate": 0.0004822348977245339, "loss": 16.4576, "step": 72440 }, { "epoch": 0.10733606290254727, "grad_norm": 6.65625, "learning_rate": 0.0004822299587895263, "loss": 16.4561, "step": 72460 }, { "epoch": 0.10736568919647566, "grad_norm": 6.125, "learning_rate": 0.0004822250198545187, "loss": 16.4686, "step": 72480 }, { "epoch": 0.10739531549040404, "grad_norm": 6.4375, "learning_rate": 0.0004822200809195112, "loss": 16.477, "step": 72500 }, { "epoch": 0.10742494178433243, "grad_norm": 7.0, "learning_rate": 0.0004822151419845036, "loss": 16.4681, "step": 72520 }, { "epoch": 0.10745456807826081, "grad_norm": 6.59375, "learning_rate": 0.000482210203049496, "loss": 16.4467, "step": 72540 }, { "epoch": 0.1074841943721892, "grad_norm": 6.40625, "learning_rate": 0.00048220526411448845, "loss": 16.4432, "step": 72560 }, { "epoch": 0.10751382066611759, "grad_norm": 6.46875, "learning_rate": 0.0004822003251794809, "loss": 16.4936, "step": 72580 }, { "epoch": 0.10754344696004597, "grad_norm": 6.46875, "learning_rate": 0.00048219538624447335, "loss": 16.4488, "step": 72600 }, { "epoch": 0.10757307325397436, "grad_norm": 6.0, "learning_rate": 0.00048219044730946574, "loss": 16.4461, "step": 72620 }, { "epoch": 0.10760269954790276, "grad_norm": 6.625, "learning_rate": 0.0004821855083744582, "loss": 16.5009, "step": 72640 }, { "epoch": 0.10763232584183115, "grad_norm": 7.21875, "learning_rate": 0.00048218056943945064, "loss": 16.4108, "step": 72660 }, { "epoch": 0.10766195213575953, "grad_norm": 6.40625, "learning_rate": 0.0004821756305044431, "loss": 16.4139, "step": 72680 }, { "epoch": 0.10769157842968792, "grad_norm": 6.90625, "learning_rate": 0.0004821706915694355, "loss": 16.4898, "step": 72700 }, { "epoch": 0.1077212047236163, "grad_norm": 7.40625, "learning_rate": 0.000482165752634428, "loss": 16.5149, "step": 72720 }, { "epoch": 0.1077508310175447, "grad_norm": 6.5, "learning_rate": 0.0004821608136994204, "loss": 16.4915, "step": 72740 }, { "epoch": 0.10778045731147308, "grad_norm": 7.5, "learning_rate": 0.0004821558747644128, "loss": 16.428, "step": 72760 }, { "epoch": 0.10781008360540147, "grad_norm": 7.21875, "learning_rate": 0.0004821509358294052, "loss": 16.4355, "step": 72780 }, { "epoch": 0.10783970989932985, "grad_norm": 6.34375, "learning_rate": 0.0004821459968943977, "loss": 16.4379, "step": 72800 }, { "epoch": 0.10786933619325824, "grad_norm": 6.53125, "learning_rate": 0.0004821410579593901, "loss": 16.4117, "step": 72820 }, { "epoch": 0.10789896248718663, "grad_norm": 6.59375, "learning_rate": 0.00048213611902438256, "loss": 16.4863, "step": 72840 }, { "epoch": 0.10792858878111501, "grad_norm": 6.875, "learning_rate": 0.00048213118008937495, "loss": 16.4081, "step": 72860 }, { "epoch": 0.1079582150750434, "grad_norm": 6.03125, "learning_rate": 0.0004821262411543674, "loss": 16.4631, "step": 72880 }, { "epoch": 0.10798784136897178, "grad_norm": 6.34375, "learning_rate": 0.00048212130221935985, "loss": 16.4837, "step": 72900 }, { "epoch": 0.10801746766290017, "grad_norm": 6.53125, "learning_rate": 0.00048211636328435224, "loss": 16.4245, "step": 72920 }, { "epoch": 0.10804709395682856, "grad_norm": 6.90625, "learning_rate": 0.0004821114243493447, "loss": 16.4369, "step": 72940 }, { "epoch": 0.10807672025075696, "grad_norm": 6.8125, "learning_rate": 0.00048210648541433714, "loss": 16.4274, "step": 72960 }, { "epoch": 0.10810634654468534, "grad_norm": 5.90625, "learning_rate": 0.0004821015464793296, "loss": 16.4141, "step": 72980 }, { "epoch": 0.10813597283861373, "grad_norm": 6.6875, "learning_rate": 0.000482096607544322, "loss": 16.4235, "step": 73000 }, { "epoch": 0.10816559913254212, "grad_norm": 7.0625, "learning_rate": 0.0004820916686093145, "loss": 16.4474, "step": 73020 }, { "epoch": 0.1081952254264705, "grad_norm": 6.75, "learning_rate": 0.0004820867296743069, "loss": 16.4285, "step": 73040 }, { "epoch": 0.10822485172039889, "grad_norm": 6.40625, "learning_rate": 0.0004820817907392993, "loss": 16.5063, "step": 73060 }, { "epoch": 0.10825447801432728, "grad_norm": 6.3125, "learning_rate": 0.0004820768518042917, "loss": 16.421, "step": 73080 }, { "epoch": 0.10828410430825566, "grad_norm": 6.78125, "learning_rate": 0.0004820719128692842, "loss": 16.5147, "step": 73100 }, { "epoch": 0.10831373060218405, "grad_norm": 7.125, "learning_rate": 0.0004820669739342766, "loss": 16.4131, "step": 73120 }, { "epoch": 0.10834335689611244, "grad_norm": 6.375, "learning_rate": 0.00048206203499926906, "loss": 16.3763, "step": 73140 }, { "epoch": 0.10837298319004082, "grad_norm": 6.75, "learning_rate": 0.00048205709606426145, "loss": 16.402, "step": 73160 }, { "epoch": 0.10840260948396921, "grad_norm": 6.90625, "learning_rate": 0.00048205215712925396, "loss": 16.4398, "step": 73180 }, { "epoch": 0.1084322357778976, "grad_norm": 7.34375, "learning_rate": 0.00048204721819424635, "loss": 16.4245, "step": 73200 }, { "epoch": 0.10846186207182598, "grad_norm": 6.71875, "learning_rate": 0.00048204227925923874, "loss": 16.4691, "step": 73220 }, { "epoch": 0.10849148836575437, "grad_norm": 6.375, "learning_rate": 0.0004820373403242312, "loss": 16.4718, "step": 73240 }, { "epoch": 0.10852111465968275, "grad_norm": 6.90625, "learning_rate": 0.00048203240138922364, "loss": 16.4101, "step": 73260 }, { "epoch": 0.10855074095361116, "grad_norm": 6.875, "learning_rate": 0.0004820274624542161, "loss": 16.4164, "step": 73280 }, { "epoch": 0.10858036724753954, "grad_norm": 6.6875, "learning_rate": 0.0004820225235192085, "loss": 16.4198, "step": 73300 }, { "epoch": 0.10860999354146793, "grad_norm": 7.25, "learning_rate": 0.000482017584584201, "loss": 16.3898, "step": 73320 }, { "epoch": 0.10863961983539631, "grad_norm": 6.59375, "learning_rate": 0.0004820126456491934, "loss": 16.4542, "step": 73340 }, { "epoch": 0.1086692461293247, "grad_norm": 6.5625, "learning_rate": 0.0004820077067141858, "loss": 16.4689, "step": 73360 }, { "epoch": 0.10869887242325309, "grad_norm": 7.8125, "learning_rate": 0.0004820027677791782, "loss": 16.3908, "step": 73380 }, { "epoch": 0.10872849871718147, "grad_norm": 7.625, "learning_rate": 0.0004819978288441707, "loss": 16.4518, "step": 73400 }, { "epoch": 0.10875812501110986, "grad_norm": 5.65625, "learning_rate": 0.0004819928899091631, "loss": 16.4547, "step": 73420 }, { "epoch": 0.10878775130503825, "grad_norm": 7.21875, "learning_rate": 0.00048198795097415556, "loss": 16.4573, "step": 73440 }, { "epoch": 0.10881737759896663, "grad_norm": 6.59375, "learning_rate": 0.00048198301203914795, "loss": 16.4254, "step": 73460 }, { "epoch": 0.10884700389289502, "grad_norm": 6.53125, "learning_rate": 0.00048197807310414046, "loss": 16.4479, "step": 73480 }, { "epoch": 0.1088766301868234, "grad_norm": 6.59375, "learning_rate": 0.00048197313416913285, "loss": 16.374, "step": 73500 }, { "epoch": 0.10890625648075179, "grad_norm": 7.0625, "learning_rate": 0.0004819681952341253, "loss": 16.4854, "step": 73520 }, { "epoch": 0.10893588277468018, "grad_norm": 6.5625, "learning_rate": 0.0004819632562991177, "loss": 16.373, "step": 73540 }, { "epoch": 0.10896550906860857, "grad_norm": 5.6875, "learning_rate": 0.00048195831736411014, "loss": 16.4403, "step": 73560 }, { "epoch": 0.10899513536253697, "grad_norm": 7.28125, "learning_rate": 0.0004819533784291026, "loss": 16.4399, "step": 73580 }, { "epoch": 0.10902476165646535, "grad_norm": 6.0, "learning_rate": 0.000481948439494095, "loss": 16.4257, "step": 73600 }, { "epoch": 0.10905438795039374, "grad_norm": 7.0, "learning_rate": 0.0004819435005590875, "loss": 16.3843, "step": 73620 }, { "epoch": 0.10908401424432213, "grad_norm": 6.5625, "learning_rate": 0.0004819385616240799, "loss": 16.4221, "step": 73640 }, { "epoch": 0.10911364053825051, "grad_norm": 6.90625, "learning_rate": 0.0004819336226890723, "loss": 16.4464, "step": 73660 }, { "epoch": 0.1091432668321789, "grad_norm": 6.875, "learning_rate": 0.0004819286837540647, "loss": 16.4325, "step": 73680 }, { "epoch": 0.10917289312610728, "grad_norm": 6.09375, "learning_rate": 0.0004819237448190572, "loss": 16.4222, "step": 73700 }, { "epoch": 0.10920251942003567, "grad_norm": 6.9375, "learning_rate": 0.0004819188058840496, "loss": 16.4236, "step": 73720 }, { "epoch": 0.10923214571396406, "grad_norm": 6.0625, "learning_rate": 0.00048191386694904206, "loss": 16.4719, "step": 73740 }, { "epoch": 0.10926177200789244, "grad_norm": 6.78125, "learning_rate": 0.00048190892801403445, "loss": 16.4062, "step": 73760 }, { "epoch": 0.10929139830182083, "grad_norm": 7.0, "learning_rate": 0.00048190398907902696, "loss": 16.4468, "step": 73780 }, { "epoch": 0.10932102459574922, "grad_norm": 6.6875, "learning_rate": 0.00048189905014401935, "loss": 16.4426, "step": 73800 }, { "epoch": 0.1093506508896776, "grad_norm": 6.625, "learning_rate": 0.0004818941112090118, "loss": 16.4042, "step": 73820 }, { "epoch": 0.10938027718360599, "grad_norm": 6.53125, "learning_rate": 0.0004818891722740042, "loss": 16.489, "step": 73840 }, { "epoch": 0.10940990347753438, "grad_norm": 7.25, "learning_rate": 0.0004818842333389967, "loss": 16.4129, "step": 73860 }, { "epoch": 0.10943952977146276, "grad_norm": 6.59375, "learning_rate": 0.0004818792944039891, "loss": 16.4828, "step": 73880 }, { "epoch": 0.10946915606539116, "grad_norm": 6.03125, "learning_rate": 0.0004818743554689815, "loss": 16.4081, "step": 73900 }, { "epoch": 0.10949878235931955, "grad_norm": 6.40625, "learning_rate": 0.000481869416533974, "loss": 16.441, "step": 73920 }, { "epoch": 0.10952840865324794, "grad_norm": 6.96875, "learning_rate": 0.0004818644775989664, "loss": 16.4013, "step": 73940 }, { "epoch": 0.10955803494717632, "grad_norm": 7.46875, "learning_rate": 0.0004818595386639588, "loss": 16.4102, "step": 73960 }, { "epoch": 0.10958766124110471, "grad_norm": 7.09375, "learning_rate": 0.0004818545997289512, "loss": 16.3879, "step": 73980 }, { "epoch": 0.1096172875350331, "grad_norm": 7.0, "learning_rate": 0.0004818496607939437, "loss": 16.3994, "step": 74000 }, { "epoch": 0.10964691382896148, "grad_norm": 6.34375, "learning_rate": 0.0004818447218589361, "loss": 16.3548, "step": 74020 }, { "epoch": 0.10967654012288987, "grad_norm": 6.84375, "learning_rate": 0.00048183978292392856, "loss": 16.4066, "step": 74040 }, { "epoch": 0.10970616641681825, "grad_norm": 7.34375, "learning_rate": 0.00048183484398892096, "loss": 16.4287, "step": 74060 }, { "epoch": 0.10973579271074664, "grad_norm": 6.34375, "learning_rate": 0.00048182990505391346, "loss": 16.4596, "step": 74080 }, { "epoch": 0.10976541900467503, "grad_norm": 7.625, "learning_rate": 0.00048182496611890585, "loss": 16.4221, "step": 74100 }, { "epoch": 0.10979504529860341, "grad_norm": 6.9375, "learning_rate": 0.0004818200271838983, "loss": 16.4392, "step": 74120 }, { "epoch": 0.1098246715925318, "grad_norm": 6.5625, "learning_rate": 0.0004818150882488907, "loss": 16.4181, "step": 74140 }, { "epoch": 0.10985429788646019, "grad_norm": 7.40625, "learning_rate": 0.0004818101493138832, "loss": 16.4593, "step": 74160 }, { "epoch": 0.10988392418038857, "grad_norm": 6.8125, "learning_rate": 0.0004818052103788756, "loss": 16.4166, "step": 74180 }, { "epoch": 0.10991355047431696, "grad_norm": 6.90625, "learning_rate": 0.00048180027144386804, "loss": 16.422, "step": 74200 }, { "epoch": 0.10994317676824536, "grad_norm": 6.84375, "learning_rate": 0.0004817953325088605, "loss": 16.3946, "step": 74220 }, { "epoch": 0.10997280306217375, "grad_norm": 6.5, "learning_rate": 0.0004817903935738529, "loss": 16.4022, "step": 74240 }, { "epoch": 0.11000242935610213, "grad_norm": 7.25, "learning_rate": 0.0004817854546388453, "loss": 16.4411, "step": 74260 }, { "epoch": 0.11003205565003052, "grad_norm": 7.28125, "learning_rate": 0.0004817805157038377, "loss": 16.3874, "step": 74280 }, { "epoch": 0.1100616819439589, "grad_norm": 6.40625, "learning_rate": 0.0004817755767688302, "loss": 16.4294, "step": 74300 }, { "epoch": 0.11009130823788729, "grad_norm": 6.8125, "learning_rate": 0.0004817706378338226, "loss": 16.4261, "step": 74320 }, { "epoch": 0.11012093453181568, "grad_norm": 7.34375, "learning_rate": 0.00048176569889881506, "loss": 16.4238, "step": 74340 }, { "epoch": 0.11015056082574406, "grad_norm": 7.125, "learning_rate": 0.00048176075996380746, "loss": 16.3817, "step": 74360 }, { "epoch": 0.11018018711967245, "grad_norm": 6.65625, "learning_rate": 0.00048175582102879996, "loss": 16.3883, "step": 74380 }, { "epoch": 0.11020981341360084, "grad_norm": 6.71875, "learning_rate": 0.00048175088209379235, "loss": 16.4262, "step": 74400 }, { "epoch": 0.11023943970752922, "grad_norm": 7.0, "learning_rate": 0.0004817459431587848, "loss": 16.4212, "step": 74420 }, { "epoch": 0.11026906600145761, "grad_norm": 6.90625, "learning_rate": 0.0004817410042237772, "loss": 16.4718, "step": 74440 }, { "epoch": 0.110298692295386, "grad_norm": 7.8125, "learning_rate": 0.0004817360652887697, "loss": 16.4202, "step": 74460 }, { "epoch": 0.11032831858931438, "grad_norm": 7.28125, "learning_rate": 0.0004817311263537621, "loss": 16.3904, "step": 74480 }, { "epoch": 0.11035794488324277, "grad_norm": 6.75, "learning_rate": 0.00048172618741875454, "loss": 16.4918, "step": 74500 }, { "epoch": 0.11038757117717116, "grad_norm": 7.09375, "learning_rate": 0.000481721248483747, "loss": 16.3993, "step": 74520 }, { "epoch": 0.11041719747109956, "grad_norm": 6.5, "learning_rate": 0.00048171630954873943, "loss": 16.3485, "step": 74540 }, { "epoch": 0.11044682376502794, "grad_norm": 6.59375, "learning_rate": 0.0004817113706137318, "loss": 16.3389, "step": 74560 }, { "epoch": 0.11047645005895633, "grad_norm": 6.9375, "learning_rate": 0.0004817064316787242, "loss": 16.3738, "step": 74580 }, { "epoch": 0.11050607635288472, "grad_norm": 7.5, "learning_rate": 0.0004817014927437167, "loss": 16.3881, "step": 74600 }, { "epoch": 0.1105357026468131, "grad_norm": 6.1875, "learning_rate": 0.0004816965538087091, "loss": 16.3802, "step": 74620 }, { "epoch": 0.11056532894074149, "grad_norm": 7.375, "learning_rate": 0.00048169161487370156, "loss": 16.4216, "step": 74640 }, { "epoch": 0.11059495523466988, "grad_norm": 7.1875, "learning_rate": 0.00048168667593869396, "loss": 16.4166, "step": 74660 }, { "epoch": 0.11062458152859826, "grad_norm": 7.21875, "learning_rate": 0.00048168173700368646, "loss": 16.358, "step": 74680 }, { "epoch": 0.11065420782252665, "grad_norm": 7.59375, "learning_rate": 0.00048167679806867885, "loss": 16.4844, "step": 74700 }, { "epoch": 0.11068383411645503, "grad_norm": 7.59375, "learning_rate": 0.0004816718591336713, "loss": 16.4061, "step": 74720 }, { "epoch": 0.11071346041038342, "grad_norm": 7.09375, "learning_rate": 0.0004816669201986637, "loss": 16.4073, "step": 74740 }, { "epoch": 0.11074308670431181, "grad_norm": 6.28125, "learning_rate": 0.0004816619812636562, "loss": 16.3988, "step": 74760 }, { "epoch": 0.1107727129982402, "grad_norm": 5.96875, "learning_rate": 0.0004816570423286486, "loss": 16.4417, "step": 74780 }, { "epoch": 0.11080233929216858, "grad_norm": 7.15625, "learning_rate": 0.00048165210339364104, "loss": 16.3517, "step": 74800 }, { "epoch": 0.11083196558609697, "grad_norm": 6.40625, "learning_rate": 0.0004816471644586335, "loss": 16.3409, "step": 74820 }, { "epoch": 0.11086159188002535, "grad_norm": 6.65625, "learning_rate": 0.00048164222552362593, "loss": 16.3664, "step": 74840 }, { "epoch": 0.11089121817395375, "grad_norm": 6.75, "learning_rate": 0.0004816372865886183, "loss": 16.4146, "step": 74860 }, { "epoch": 0.11092084446788214, "grad_norm": 6.90625, "learning_rate": 0.0004816323476536108, "loss": 16.3548, "step": 74880 }, { "epoch": 0.11095047076181053, "grad_norm": 6.8125, "learning_rate": 0.0004816274087186032, "loss": 16.4546, "step": 74900 }, { "epoch": 0.11098009705573891, "grad_norm": 6.59375, "learning_rate": 0.0004816224697835956, "loss": 16.3883, "step": 74920 }, { "epoch": 0.1110097233496673, "grad_norm": 7.0, "learning_rate": 0.00048161753084858806, "loss": 16.4069, "step": 74940 }, { "epoch": 0.11103934964359569, "grad_norm": 7.15625, "learning_rate": 0.00048161259191358046, "loss": 16.3556, "step": 74960 }, { "epoch": 0.11106897593752407, "grad_norm": 6.65625, "learning_rate": 0.00048160765297857296, "loss": 16.4227, "step": 74980 }, { "epoch": 0.11109860223145246, "grad_norm": 6.90625, "learning_rate": 0.00048160271404356535, "loss": 16.4454, "step": 75000 }, { "epoch": 0.11112822852538085, "grad_norm": 5.6875, "learning_rate": 0.0004815977751085578, "loss": 16.438, "step": 75020 }, { "epoch": 0.11115785481930923, "grad_norm": 6.6875, "learning_rate": 0.0004815928361735502, "loss": 16.4092, "step": 75040 }, { "epoch": 0.11118748111323762, "grad_norm": 6.40625, "learning_rate": 0.0004815878972385427, "loss": 16.364, "step": 75060 }, { "epoch": 0.111217107407166, "grad_norm": 6.90625, "learning_rate": 0.0004815829583035351, "loss": 16.3725, "step": 75080 }, { "epoch": 0.11124673370109439, "grad_norm": 7.28125, "learning_rate": 0.00048157801936852754, "loss": 16.4758, "step": 75100 }, { "epoch": 0.11127635999502278, "grad_norm": 7.03125, "learning_rate": 0.00048157308043352, "loss": 16.3516, "step": 75120 }, { "epoch": 0.11130598628895116, "grad_norm": 7.375, "learning_rate": 0.00048156814149851243, "loss": 16.4029, "step": 75140 }, { "epoch": 0.11133561258287955, "grad_norm": 6.40625, "learning_rate": 0.0004815632025635048, "loss": 16.4283, "step": 75160 }, { "epoch": 0.11136523887680795, "grad_norm": 6.40625, "learning_rate": 0.0004815582636284973, "loss": 16.3905, "step": 75180 }, { "epoch": 0.11139486517073634, "grad_norm": 7.875, "learning_rate": 0.0004815533246934897, "loss": 16.3731, "step": 75200 }, { "epoch": 0.11142449146466472, "grad_norm": 6.1875, "learning_rate": 0.00048154838575848217, "loss": 16.4243, "step": 75220 }, { "epoch": 0.11145411775859311, "grad_norm": 7.28125, "learning_rate": 0.00048154344682347456, "loss": 16.3272, "step": 75240 }, { "epoch": 0.1114837440525215, "grad_norm": 7.125, "learning_rate": 0.00048153850788846696, "loss": 16.3074, "step": 75260 }, { "epoch": 0.11151337034644988, "grad_norm": 7.78125, "learning_rate": 0.00048153356895345946, "loss": 16.4066, "step": 75280 }, { "epoch": 0.11154299664037827, "grad_norm": 7.125, "learning_rate": 0.00048152863001845185, "loss": 16.3641, "step": 75300 }, { "epoch": 0.11157262293430666, "grad_norm": 7.0625, "learning_rate": 0.0004815236910834443, "loss": 16.3674, "step": 75320 }, { "epoch": 0.11160224922823504, "grad_norm": 7.03125, "learning_rate": 0.0004815187521484367, "loss": 16.3299, "step": 75340 }, { "epoch": 0.11163187552216343, "grad_norm": 6.65625, "learning_rate": 0.0004815138132134292, "loss": 16.4177, "step": 75360 }, { "epoch": 0.11166150181609182, "grad_norm": 7.1875, "learning_rate": 0.0004815088742784216, "loss": 16.3523, "step": 75380 }, { "epoch": 0.1116911281100202, "grad_norm": 7.125, "learning_rate": 0.00048150393534341404, "loss": 16.3449, "step": 75400 }, { "epoch": 0.11172075440394859, "grad_norm": 6.34375, "learning_rate": 0.0004814989964084065, "loss": 16.3556, "step": 75420 }, { "epoch": 0.11175038069787697, "grad_norm": 7.28125, "learning_rate": 0.00048149405747339893, "loss": 16.4188, "step": 75440 }, { "epoch": 0.11178000699180536, "grad_norm": 6.65625, "learning_rate": 0.0004814891185383913, "loss": 16.3877, "step": 75460 }, { "epoch": 0.11180963328573375, "grad_norm": 6.71875, "learning_rate": 0.0004814841796033838, "loss": 16.3696, "step": 75480 }, { "epoch": 0.11183925957966215, "grad_norm": 6.8125, "learning_rate": 0.0004814792406683762, "loss": 16.3543, "step": 75500 }, { "epoch": 0.11186888587359053, "grad_norm": 7.21875, "learning_rate": 0.00048147430173336867, "loss": 16.4576, "step": 75520 }, { "epoch": 0.11189851216751892, "grad_norm": 6.625, "learning_rate": 0.00048146936279836106, "loss": 16.3673, "step": 75540 }, { "epoch": 0.11192813846144731, "grad_norm": 7.25, "learning_rate": 0.0004814644238633535, "loss": 16.4263, "step": 75560 }, { "epoch": 0.1119577647553757, "grad_norm": 6.75, "learning_rate": 0.00048145948492834596, "loss": 16.3437, "step": 75580 }, { "epoch": 0.11198739104930408, "grad_norm": 6.21875, "learning_rate": 0.00048145454599333835, "loss": 16.3618, "step": 75600 }, { "epoch": 0.11201701734323247, "grad_norm": 6.71875, "learning_rate": 0.0004814496070583308, "loss": 16.3418, "step": 75620 }, { "epoch": 0.11204664363716085, "grad_norm": 6.90625, "learning_rate": 0.0004814446681233232, "loss": 16.3916, "step": 75640 }, { "epoch": 0.11207626993108924, "grad_norm": 7.21875, "learning_rate": 0.0004814397291883157, "loss": 16.3521, "step": 75660 }, { "epoch": 0.11210589622501763, "grad_norm": 6.84375, "learning_rate": 0.0004814347902533081, "loss": 16.3464, "step": 75680 }, { "epoch": 0.11213552251894601, "grad_norm": 6.40625, "learning_rate": 0.00048142985131830054, "loss": 16.3965, "step": 75700 }, { "epoch": 0.1121651488128744, "grad_norm": 6.53125, "learning_rate": 0.000481424912383293, "loss": 16.379, "step": 75720 }, { "epoch": 0.11219477510680279, "grad_norm": 6.375, "learning_rate": 0.00048141997344828543, "loss": 16.3858, "step": 75740 }, { "epoch": 0.11222440140073117, "grad_norm": 6.75, "learning_rate": 0.0004814150345132778, "loss": 16.3445, "step": 75760 }, { "epoch": 0.11225402769465956, "grad_norm": 6.34375, "learning_rate": 0.0004814100955782703, "loss": 16.2839, "step": 75780 }, { "epoch": 0.11228365398858794, "grad_norm": 6.15625, "learning_rate": 0.0004814051566432627, "loss": 16.3684, "step": 75800 }, { "epoch": 0.11231328028251635, "grad_norm": 7.375, "learning_rate": 0.00048140021770825517, "loss": 16.3344, "step": 75820 }, { "epoch": 0.11234290657644473, "grad_norm": 7.0625, "learning_rate": 0.00048139527877324756, "loss": 16.3251, "step": 75840 }, { "epoch": 0.11237253287037312, "grad_norm": 7.1875, "learning_rate": 0.00048139033983824, "loss": 16.3266, "step": 75860 }, { "epoch": 0.1124021591643015, "grad_norm": 6.5625, "learning_rate": 0.00048138540090323246, "loss": 16.3339, "step": 75880 }, { "epoch": 0.11243178545822989, "grad_norm": 7.59375, "learning_rate": 0.0004813804619682249, "loss": 16.3593, "step": 75900 }, { "epoch": 0.11246141175215828, "grad_norm": 6.1875, "learning_rate": 0.0004813755230332173, "loss": 16.3293, "step": 75920 }, { "epoch": 0.11249103804608666, "grad_norm": 7.3125, "learning_rate": 0.0004813705840982097, "loss": 16.3553, "step": 75940 }, { "epoch": 0.11252066434001505, "grad_norm": 6.9375, "learning_rate": 0.0004813656451632022, "loss": 16.375, "step": 75960 }, { "epoch": 0.11255029063394344, "grad_norm": 6.65625, "learning_rate": 0.0004813607062281946, "loss": 16.3077, "step": 75980 }, { "epoch": 0.11257991692787182, "grad_norm": 7.25, "learning_rate": 0.00048135576729318704, "loss": 16.3442, "step": 76000 }, { "epoch": 0.11260954322180021, "grad_norm": 6.1875, "learning_rate": 0.0004813508283581795, "loss": 16.3857, "step": 76020 }, { "epoch": 0.1126391695157286, "grad_norm": 6.8125, "learning_rate": 0.00048134588942317193, "loss": 16.4228, "step": 76040 }, { "epoch": 0.11266879580965698, "grad_norm": 6.6875, "learning_rate": 0.0004813409504881643, "loss": 16.4156, "step": 76060 }, { "epoch": 0.11269842210358537, "grad_norm": 7.40625, "learning_rate": 0.0004813360115531568, "loss": 16.3843, "step": 76080 }, { "epoch": 0.11272804839751376, "grad_norm": 6.53125, "learning_rate": 0.0004813310726181492, "loss": 16.3459, "step": 76100 }, { "epoch": 0.11275767469144214, "grad_norm": 7.09375, "learning_rate": 0.00048132613368314167, "loss": 16.3973, "step": 76120 }, { "epoch": 0.11278730098537054, "grad_norm": 8.1875, "learning_rate": 0.00048132119474813406, "loss": 16.3245, "step": 76140 }, { "epoch": 0.11281692727929893, "grad_norm": 7.65625, "learning_rate": 0.0004813162558131265, "loss": 16.3705, "step": 76160 }, { "epoch": 0.11284655357322732, "grad_norm": 6.8125, "learning_rate": 0.00048131131687811896, "loss": 16.3244, "step": 76180 }, { "epoch": 0.1128761798671557, "grad_norm": 7.0625, "learning_rate": 0.0004813063779431114, "loss": 16.3466, "step": 76200 }, { "epoch": 0.11290580616108409, "grad_norm": 6.28125, "learning_rate": 0.0004813014390081038, "loss": 16.345, "step": 76220 }, { "epoch": 0.11293543245501247, "grad_norm": 6.46875, "learning_rate": 0.00048129650007309625, "loss": 16.3821, "step": 76240 }, { "epoch": 0.11296505874894086, "grad_norm": 6.78125, "learning_rate": 0.0004812915611380887, "loss": 16.3541, "step": 76260 }, { "epoch": 0.11299468504286925, "grad_norm": 7.0, "learning_rate": 0.0004812866222030811, "loss": 16.2916, "step": 76280 }, { "epoch": 0.11302431133679763, "grad_norm": 6.59375, "learning_rate": 0.00048128168326807354, "loss": 16.2938, "step": 76300 }, { "epoch": 0.11305393763072602, "grad_norm": 6.5, "learning_rate": 0.000481276744333066, "loss": 16.4026, "step": 76320 }, { "epoch": 0.1130835639246544, "grad_norm": 6.34375, "learning_rate": 0.00048127180539805843, "loss": 16.3102, "step": 76340 }, { "epoch": 0.1131131902185828, "grad_norm": 6.71875, "learning_rate": 0.0004812668664630508, "loss": 16.338, "step": 76360 }, { "epoch": 0.11314281651251118, "grad_norm": 6.0, "learning_rate": 0.0004812619275280433, "loss": 16.3304, "step": 76380 }, { "epoch": 0.11317244280643957, "grad_norm": 6.4375, "learning_rate": 0.0004812569885930357, "loss": 16.353, "step": 76400 }, { "epoch": 0.11320206910036795, "grad_norm": 7.03125, "learning_rate": 0.00048125204965802817, "loss": 16.3146, "step": 76420 }, { "epoch": 0.11323169539429635, "grad_norm": 7.125, "learning_rate": 0.00048124711072302056, "loss": 16.3556, "step": 76440 }, { "epoch": 0.11326132168822474, "grad_norm": 7.03125, "learning_rate": 0.000481242171788013, "loss": 16.4044, "step": 76460 }, { "epoch": 0.11329094798215313, "grad_norm": 6.125, "learning_rate": 0.00048123723285300546, "loss": 16.2708, "step": 76480 }, { "epoch": 0.11332057427608151, "grad_norm": 6.96875, "learning_rate": 0.0004812322939179979, "loss": 16.3705, "step": 76500 }, { "epoch": 0.1133502005700099, "grad_norm": 6.46875, "learning_rate": 0.0004812273549829903, "loss": 16.3579, "step": 76520 }, { "epoch": 0.11337982686393829, "grad_norm": 9.4375, "learning_rate": 0.00048122241604798275, "loss": 16.3504, "step": 76540 }, { "epoch": 0.11340945315786667, "grad_norm": 6.84375, "learning_rate": 0.0004812174771129752, "loss": 16.3338, "step": 76560 }, { "epoch": 0.11343907945179506, "grad_norm": 7.75, "learning_rate": 0.00048121253817796764, "loss": 16.337, "step": 76580 }, { "epoch": 0.11346870574572344, "grad_norm": 6.1875, "learning_rate": 0.00048120759924296004, "loss": 16.3507, "step": 76600 }, { "epoch": 0.11349833203965183, "grad_norm": 6.0, "learning_rate": 0.0004812026603079525, "loss": 16.311, "step": 76620 }, { "epoch": 0.11352795833358022, "grad_norm": 6.15625, "learning_rate": 0.00048119772137294493, "loss": 16.3014, "step": 76640 }, { "epoch": 0.1135575846275086, "grad_norm": 7.1875, "learning_rate": 0.00048119278243793733, "loss": 16.3813, "step": 76660 }, { "epoch": 0.11358721092143699, "grad_norm": 6.375, "learning_rate": 0.0004811878435029298, "loss": 16.3528, "step": 76680 }, { "epoch": 0.11361683721536538, "grad_norm": 7.34375, "learning_rate": 0.0004811829045679222, "loss": 16.308, "step": 76700 }, { "epoch": 0.11364646350929376, "grad_norm": 6.28125, "learning_rate": 0.00048117796563291467, "loss": 16.3711, "step": 76720 }, { "epoch": 0.11367608980322215, "grad_norm": 6.90625, "learning_rate": 0.00048117302669790706, "loss": 16.2906, "step": 76740 }, { "epoch": 0.11370571609715055, "grad_norm": 6.8125, "learning_rate": 0.0004811680877628995, "loss": 16.3626, "step": 76760 }, { "epoch": 0.11373534239107894, "grad_norm": 5.75, "learning_rate": 0.00048116314882789196, "loss": 16.2853, "step": 76780 }, { "epoch": 0.11376496868500732, "grad_norm": 7.0625, "learning_rate": 0.0004811582098928844, "loss": 16.3697, "step": 76800 }, { "epoch": 0.11379459497893571, "grad_norm": 6.28125, "learning_rate": 0.0004811532709578768, "loss": 16.3249, "step": 76820 }, { "epoch": 0.1138242212728641, "grad_norm": 7.59375, "learning_rate": 0.00048114833202286925, "loss": 16.3022, "step": 76840 }, { "epoch": 0.11385384756679248, "grad_norm": 7.375, "learning_rate": 0.0004811433930878617, "loss": 16.3372, "step": 76860 }, { "epoch": 0.11388347386072087, "grad_norm": 7.25, "learning_rate": 0.00048113845415285414, "loss": 16.3621, "step": 76880 }, { "epoch": 0.11391310015464925, "grad_norm": 6.4375, "learning_rate": 0.00048113351521784654, "loss": 16.3544, "step": 76900 }, { "epoch": 0.11394272644857764, "grad_norm": 6.40625, "learning_rate": 0.00048112857628283904, "loss": 16.3275, "step": 76920 }, { "epoch": 0.11397235274250603, "grad_norm": 6.90625, "learning_rate": 0.00048112363734783143, "loss": 16.359, "step": 76940 }, { "epoch": 0.11400197903643441, "grad_norm": 6.6875, "learning_rate": 0.00048111869841282383, "loss": 16.3528, "step": 76960 }, { "epoch": 0.1140316053303628, "grad_norm": 7.28125, "learning_rate": 0.0004811137594778163, "loss": 16.3342, "step": 76980 }, { "epoch": 0.11406123162429119, "grad_norm": 6.53125, "learning_rate": 0.0004811088205428087, "loss": 16.3042, "step": 77000 }, { "epoch": 0.11409085791821957, "grad_norm": 7.53125, "learning_rate": 0.00048110388160780117, "loss": 16.3761, "step": 77020 }, { "epoch": 0.11412048421214796, "grad_norm": 6.46875, "learning_rate": 0.00048109894267279356, "loss": 16.3388, "step": 77040 }, { "epoch": 0.11415011050607635, "grad_norm": 6.40625, "learning_rate": 0.000481094003737786, "loss": 16.3605, "step": 77060 }, { "epoch": 0.11417973680000475, "grad_norm": 7.65625, "learning_rate": 0.00048108906480277846, "loss": 16.3422, "step": 77080 }, { "epoch": 0.11420936309393313, "grad_norm": 7.28125, "learning_rate": 0.0004810841258677709, "loss": 16.3088, "step": 77100 }, { "epoch": 0.11423898938786152, "grad_norm": 6.375, "learning_rate": 0.0004810791869327633, "loss": 16.3281, "step": 77120 }, { "epoch": 0.1142686156817899, "grad_norm": 6.71875, "learning_rate": 0.00048107424799775575, "loss": 16.3102, "step": 77140 }, { "epoch": 0.11429824197571829, "grad_norm": 8.1875, "learning_rate": 0.0004810693090627482, "loss": 16.31, "step": 77160 }, { "epoch": 0.11432786826964668, "grad_norm": 6.5625, "learning_rate": 0.00048106437012774065, "loss": 16.3352, "step": 77180 }, { "epoch": 0.11435749456357507, "grad_norm": 6.125, "learning_rate": 0.00048105943119273304, "loss": 16.3811, "step": 77200 }, { "epoch": 0.11438712085750345, "grad_norm": 6.90625, "learning_rate": 0.00048105449225772554, "loss": 16.338, "step": 77220 }, { "epoch": 0.11441674715143184, "grad_norm": 6.46875, "learning_rate": 0.00048104955332271793, "loss": 16.3367, "step": 77240 }, { "epoch": 0.11444637344536022, "grad_norm": 6.4375, "learning_rate": 0.0004810446143877104, "loss": 16.347, "step": 77260 }, { "epoch": 0.11447599973928861, "grad_norm": 6.59375, "learning_rate": 0.0004810396754527028, "loss": 16.2845, "step": 77280 }, { "epoch": 0.114505626033217, "grad_norm": 6.1875, "learning_rate": 0.0004810347365176952, "loss": 16.2749, "step": 77300 }, { "epoch": 0.11453525232714538, "grad_norm": 6.34375, "learning_rate": 0.00048102979758268767, "loss": 16.3295, "step": 77320 }, { "epoch": 0.11456487862107377, "grad_norm": 6.3125, "learning_rate": 0.00048102485864768006, "loss": 16.3742, "step": 77340 }, { "epoch": 0.11459450491500216, "grad_norm": 7.96875, "learning_rate": 0.0004810199197126725, "loss": 16.3376, "step": 77360 }, { "epoch": 0.11462413120893054, "grad_norm": 5.28125, "learning_rate": 0.00048101498077766496, "loss": 16.3579, "step": 77380 }, { "epoch": 0.11465375750285894, "grad_norm": 6.75, "learning_rate": 0.0004810100418426574, "loss": 16.278, "step": 77400 }, { "epoch": 0.11468338379678733, "grad_norm": 6.21875, "learning_rate": 0.0004810051029076498, "loss": 16.2762, "step": 77420 }, { "epoch": 0.11471301009071572, "grad_norm": 7.34375, "learning_rate": 0.00048100016397264225, "loss": 16.3069, "step": 77440 }, { "epoch": 0.1147426363846441, "grad_norm": 6.5, "learning_rate": 0.0004809952250376347, "loss": 16.2691, "step": 77460 }, { "epoch": 0.11477226267857249, "grad_norm": 6.59375, "learning_rate": 0.00048099028610262715, "loss": 16.3231, "step": 77480 }, { "epoch": 0.11480188897250088, "grad_norm": 7.5625, "learning_rate": 0.00048098534716761954, "loss": 16.3429, "step": 77500 }, { "epoch": 0.11483151526642926, "grad_norm": 7.5, "learning_rate": 0.00048098040823261204, "loss": 16.2408, "step": 77520 }, { "epoch": 0.11486114156035765, "grad_norm": 7.46875, "learning_rate": 0.00048097546929760443, "loss": 16.3317, "step": 77540 }, { "epoch": 0.11489076785428604, "grad_norm": 6.5, "learning_rate": 0.0004809705303625969, "loss": 16.3055, "step": 77560 }, { "epoch": 0.11492039414821442, "grad_norm": 6.4375, "learning_rate": 0.0004809655914275893, "loss": 16.3141, "step": 77580 }, { "epoch": 0.11495002044214281, "grad_norm": 6.5, "learning_rate": 0.0004809606524925818, "loss": 16.2921, "step": 77600 }, { "epoch": 0.1149796467360712, "grad_norm": 6.0625, "learning_rate": 0.00048095571355757417, "loss": 16.3596, "step": 77620 }, { "epoch": 0.11500927302999958, "grad_norm": 7.21875, "learning_rate": 0.00048095077462256657, "loss": 16.3384, "step": 77640 }, { "epoch": 0.11503889932392797, "grad_norm": 7.53125, "learning_rate": 0.000480945835687559, "loss": 16.2963, "step": 77660 }, { "epoch": 0.11506852561785635, "grad_norm": 6.96875, "learning_rate": 0.00048094089675255146, "loss": 16.2884, "step": 77680 }, { "epoch": 0.11509815191178474, "grad_norm": 6.78125, "learning_rate": 0.0004809359578175439, "loss": 16.2678, "step": 77700 }, { "epoch": 0.11512777820571314, "grad_norm": 5.96875, "learning_rate": 0.0004809310188825363, "loss": 16.2579, "step": 77720 }, { "epoch": 0.11515740449964153, "grad_norm": 7.78125, "learning_rate": 0.00048092607994752875, "loss": 16.3299, "step": 77740 }, { "epoch": 0.11518703079356991, "grad_norm": 6.46875, "learning_rate": 0.0004809211410125212, "loss": 16.3104, "step": 77760 }, { "epoch": 0.1152166570874983, "grad_norm": 6.84375, "learning_rate": 0.00048091620207751365, "loss": 16.3141, "step": 77780 }, { "epoch": 0.11524628338142669, "grad_norm": 7.25, "learning_rate": 0.00048091126314250604, "loss": 16.3056, "step": 77800 }, { "epoch": 0.11527590967535507, "grad_norm": 7.59375, "learning_rate": 0.00048090632420749854, "loss": 16.3257, "step": 77820 }, { "epoch": 0.11530553596928346, "grad_norm": 7.59375, "learning_rate": 0.00048090138527249093, "loss": 16.3078, "step": 77840 }, { "epoch": 0.11533516226321185, "grad_norm": 6.15625, "learning_rate": 0.0004808964463374834, "loss": 16.3119, "step": 77860 }, { "epoch": 0.11536478855714023, "grad_norm": 6.78125, "learning_rate": 0.0004808915074024758, "loss": 16.2992, "step": 77880 }, { "epoch": 0.11539441485106862, "grad_norm": 6.9375, "learning_rate": 0.0004808865684674683, "loss": 16.3227, "step": 77900 }, { "epoch": 0.115424041144997, "grad_norm": 6.6875, "learning_rate": 0.00048088162953246067, "loss": 16.3171, "step": 77920 }, { "epoch": 0.11545366743892539, "grad_norm": 6.1875, "learning_rate": 0.00048087669059745307, "loss": 16.2978, "step": 77940 }, { "epoch": 0.11548329373285378, "grad_norm": 6.8125, "learning_rate": 0.0004808717516624455, "loss": 16.3063, "step": 77960 }, { "epoch": 0.11551292002678216, "grad_norm": 7.75, "learning_rate": 0.00048086681272743796, "loss": 16.3239, "step": 77980 }, { "epoch": 0.11554254632071055, "grad_norm": 6.625, "learning_rate": 0.0004808618737924304, "loss": 16.2962, "step": 78000 }, { "epoch": 0.11557217261463894, "grad_norm": 6.46875, "learning_rate": 0.0004808569348574228, "loss": 16.2507, "step": 78020 }, { "epoch": 0.11560179890856734, "grad_norm": 6.28125, "learning_rate": 0.00048085199592241525, "loss": 16.3604, "step": 78040 }, { "epoch": 0.11563142520249572, "grad_norm": 6.09375, "learning_rate": 0.0004808470569874077, "loss": 16.3038, "step": 78060 }, { "epoch": 0.11566105149642411, "grad_norm": 7.4375, "learning_rate": 0.00048084211805240015, "loss": 16.3236, "step": 78080 }, { "epoch": 0.1156906777903525, "grad_norm": 6.0625, "learning_rate": 0.00048083717911739254, "loss": 16.3219, "step": 78100 }, { "epoch": 0.11572030408428088, "grad_norm": 6.625, "learning_rate": 0.00048083224018238504, "loss": 16.345, "step": 78120 }, { "epoch": 0.11574993037820927, "grad_norm": 6.34375, "learning_rate": 0.00048082730124737744, "loss": 16.3579, "step": 78140 }, { "epoch": 0.11577955667213766, "grad_norm": 7.0, "learning_rate": 0.0004808223623123699, "loss": 16.3603, "step": 78160 }, { "epoch": 0.11580918296606604, "grad_norm": 5.9375, "learning_rate": 0.0004808174233773623, "loss": 16.2884, "step": 78180 }, { "epoch": 0.11583880925999443, "grad_norm": 6.03125, "learning_rate": 0.0004808124844423548, "loss": 16.3097, "step": 78200 }, { "epoch": 0.11586843555392282, "grad_norm": 6.75, "learning_rate": 0.00048080754550734717, "loss": 16.3178, "step": 78220 }, { "epoch": 0.1158980618478512, "grad_norm": 7.15625, "learning_rate": 0.0004808026065723396, "loss": 16.3025, "step": 78240 }, { "epoch": 0.11592768814177959, "grad_norm": 6.15625, "learning_rate": 0.000480797667637332, "loss": 16.2479, "step": 78260 }, { "epoch": 0.11595731443570798, "grad_norm": 7.65625, "learning_rate": 0.0004807927287023245, "loss": 16.3029, "step": 78280 }, { "epoch": 0.11598694072963636, "grad_norm": 6.28125, "learning_rate": 0.0004807877897673169, "loss": 16.316, "step": 78300 }, { "epoch": 0.11601656702356475, "grad_norm": 6.9375, "learning_rate": 0.0004807828508323093, "loss": 16.2811, "step": 78320 }, { "epoch": 0.11604619331749313, "grad_norm": 6.125, "learning_rate": 0.00048077791189730175, "loss": 16.2406, "step": 78340 }, { "epoch": 0.11607581961142154, "grad_norm": 6.4375, "learning_rate": 0.0004807729729622942, "loss": 16.323, "step": 78360 }, { "epoch": 0.11610544590534992, "grad_norm": 6.25, "learning_rate": 0.00048076803402728665, "loss": 16.3224, "step": 78380 }, { "epoch": 0.11613507219927831, "grad_norm": 6.875, "learning_rate": 0.00048076309509227904, "loss": 16.2637, "step": 78400 }, { "epoch": 0.1161646984932067, "grad_norm": 5.71875, "learning_rate": 0.00048075815615727154, "loss": 16.2791, "step": 78420 }, { "epoch": 0.11619432478713508, "grad_norm": 6.34375, "learning_rate": 0.00048075321722226394, "loss": 16.2131, "step": 78440 }, { "epoch": 0.11622395108106347, "grad_norm": 6.59375, "learning_rate": 0.0004807482782872564, "loss": 16.2892, "step": 78460 }, { "epoch": 0.11625357737499185, "grad_norm": 6.90625, "learning_rate": 0.0004807433393522488, "loss": 16.3267, "step": 78480 }, { "epoch": 0.11628320366892024, "grad_norm": 7.09375, "learning_rate": 0.0004807384004172413, "loss": 16.2359, "step": 78500 }, { "epoch": 0.11631282996284863, "grad_norm": 6.46875, "learning_rate": 0.00048073346148223367, "loss": 16.2849, "step": 78520 }, { "epoch": 0.11634245625677701, "grad_norm": 7.125, "learning_rate": 0.0004807285225472261, "loss": 16.2734, "step": 78540 }, { "epoch": 0.1163720825507054, "grad_norm": 7.21875, "learning_rate": 0.0004807235836122185, "loss": 16.2993, "step": 78560 }, { "epoch": 0.11640170884463379, "grad_norm": 7.0625, "learning_rate": 0.000480718644677211, "loss": 16.2808, "step": 78580 }, { "epoch": 0.11643133513856217, "grad_norm": 5.96875, "learning_rate": 0.0004807137057422034, "loss": 16.2739, "step": 78600 }, { "epoch": 0.11646096143249056, "grad_norm": 7.03125, "learning_rate": 0.0004807087668071958, "loss": 16.27, "step": 78620 }, { "epoch": 0.11649058772641895, "grad_norm": 6.5, "learning_rate": 0.00048070382787218825, "loss": 16.3228, "step": 78640 }, { "epoch": 0.11652021402034733, "grad_norm": 6.78125, "learning_rate": 0.0004806988889371807, "loss": 16.2983, "step": 78660 }, { "epoch": 0.11654984031427573, "grad_norm": 6.90625, "learning_rate": 0.00048069395000217315, "loss": 16.3329, "step": 78680 }, { "epoch": 0.11657946660820412, "grad_norm": 7.0625, "learning_rate": 0.00048068901106716554, "loss": 16.2225, "step": 78700 }, { "epoch": 0.1166090929021325, "grad_norm": 6.125, "learning_rate": 0.00048068407213215804, "loss": 16.3264, "step": 78720 }, { "epoch": 0.11663871919606089, "grad_norm": 6.03125, "learning_rate": 0.00048067913319715044, "loss": 16.2698, "step": 78740 }, { "epoch": 0.11666834548998928, "grad_norm": 7.40625, "learning_rate": 0.0004806741942621429, "loss": 16.2736, "step": 78760 }, { "epoch": 0.11669797178391766, "grad_norm": 5.875, "learning_rate": 0.0004806692553271353, "loss": 16.2845, "step": 78780 }, { "epoch": 0.11672759807784605, "grad_norm": 7.34375, "learning_rate": 0.0004806643163921278, "loss": 16.3404, "step": 78800 }, { "epoch": 0.11675722437177444, "grad_norm": 6.71875, "learning_rate": 0.0004806593774571202, "loss": 16.3419, "step": 78820 }, { "epoch": 0.11678685066570282, "grad_norm": 7.0625, "learning_rate": 0.0004806544385221126, "loss": 16.3002, "step": 78840 }, { "epoch": 0.11681647695963121, "grad_norm": 6.0625, "learning_rate": 0.000480649499587105, "loss": 16.2805, "step": 78860 }, { "epoch": 0.1168461032535596, "grad_norm": 6.90625, "learning_rate": 0.0004806445606520975, "loss": 16.2746, "step": 78880 }, { "epoch": 0.11687572954748798, "grad_norm": 6.21875, "learning_rate": 0.0004806396217170899, "loss": 16.3208, "step": 78900 }, { "epoch": 0.11690535584141637, "grad_norm": 6.375, "learning_rate": 0.00048063468278208236, "loss": 16.3402, "step": 78920 }, { "epoch": 0.11693498213534476, "grad_norm": 7.9375, "learning_rate": 0.00048062974384707475, "loss": 16.4001, "step": 78940 }, { "epoch": 0.11696460842927314, "grad_norm": 6.9375, "learning_rate": 0.0004806248049120672, "loss": 16.2514, "step": 78960 }, { "epoch": 0.11699423472320153, "grad_norm": 6.84375, "learning_rate": 0.00048061986597705965, "loss": 16.2722, "step": 78980 }, { "epoch": 0.11702386101712993, "grad_norm": 6.28125, "learning_rate": 0.00048061492704205204, "loss": 16.3266, "step": 79000 }, { "epoch": 0.11705348731105832, "grad_norm": 6.46875, "learning_rate": 0.00048060998810704454, "loss": 16.2829, "step": 79020 }, { "epoch": 0.1170831136049867, "grad_norm": 6.3125, "learning_rate": 0.00048060504917203694, "loss": 16.2504, "step": 79040 }, { "epoch": 0.11711273989891509, "grad_norm": 6.59375, "learning_rate": 0.0004806001102370294, "loss": 16.2487, "step": 79060 }, { "epoch": 0.11714236619284348, "grad_norm": 6.375, "learning_rate": 0.0004805951713020218, "loss": 16.3028, "step": 79080 }, { "epoch": 0.11717199248677186, "grad_norm": 7.59375, "learning_rate": 0.0004805902323670143, "loss": 16.2891, "step": 79100 }, { "epoch": 0.11720161878070025, "grad_norm": 6.15625, "learning_rate": 0.0004805852934320067, "loss": 16.3023, "step": 79120 }, { "epoch": 0.11723124507462863, "grad_norm": 8.875, "learning_rate": 0.0004805803544969991, "loss": 16.3369, "step": 79140 }, { "epoch": 0.11726087136855702, "grad_norm": 6.875, "learning_rate": 0.0004805754155619915, "loss": 16.2611, "step": 79160 }, { "epoch": 0.11729049766248541, "grad_norm": 6.84375, "learning_rate": 0.000480570476626984, "loss": 16.3287, "step": 79180 }, { "epoch": 0.1173201239564138, "grad_norm": 6.8125, "learning_rate": 0.0004805655376919764, "loss": 16.322, "step": 79200 }, { "epoch": 0.11734975025034218, "grad_norm": 6.34375, "learning_rate": 0.00048056059875696886, "loss": 16.2184, "step": 79220 }, { "epoch": 0.11737937654427057, "grad_norm": 7.3125, "learning_rate": 0.00048055565982196125, "loss": 16.3263, "step": 79240 }, { "epoch": 0.11740900283819895, "grad_norm": 6.59375, "learning_rate": 0.00048055072088695375, "loss": 16.2757, "step": 79260 }, { "epoch": 0.11743862913212734, "grad_norm": 6.71875, "learning_rate": 0.00048054578195194615, "loss": 16.2582, "step": 79280 }, { "epoch": 0.11746825542605574, "grad_norm": 7.0, "learning_rate": 0.00048054084301693854, "loss": 16.2983, "step": 79300 }, { "epoch": 0.11749788171998413, "grad_norm": 6.3125, "learning_rate": 0.00048053590408193104, "loss": 16.314, "step": 79320 }, { "epoch": 0.11752750801391251, "grad_norm": 6.375, "learning_rate": 0.00048053096514692344, "loss": 16.2981, "step": 79340 }, { "epoch": 0.1175571343078409, "grad_norm": 7.15625, "learning_rate": 0.0004805260262119159, "loss": 16.2972, "step": 79360 }, { "epoch": 0.11758676060176929, "grad_norm": 7.5, "learning_rate": 0.0004805210872769083, "loss": 16.2882, "step": 79380 }, { "epoch": 0.11761638689569767, "grad_norm": 6.8125, "learning_rate": 0.0004805161483419008, "loss": 16.2462, "step": 79400 }, { "epoch": 0.11764601318962606, "grad_norm": 6.78125, "learning_rate": 0.0004805112094068932, "loss": 16.2808, "step": 79420 }, { "epoch": 0.11767563948355445, "grad_norm": 6.09375, "learning_rate": 0.0004805062704718856, "loss": 16.265, "step": 79440 }, { "epoch": 0.11770526577748283, "grad_norm": 6.78125, "learning_rate": 0.000480501331536878, "loss": 16.2757, "step": 79460 }, { "epoch": 0.11773489207141122, "grad_norm": 6.09375, "learning_rate": 0.0004804963926018705, "loss": 16.3099, "step": 79480 }, { "epoch": 0.1177645183653396, "grad_norm": 6.65625, "learning_rate": 0.0004804914536668629, "loss": 16.2713, "step": 79500 }, { "epoch": 0.11779414465926799, "grad_norm": 6.3125, "learning_rate": 0.00048048651473185536, "loss": 16.2622, "step": 79520 }, { "epoch": 0.11782377095319638, "grad_norm": 5.875, "learning_rate": 0.00048048157579684775, "loss": 16.2833, "step": 79540 }, { "epoch": 0.11785339724712476, "grad_norm": 6.1875, "learning_rate": 0.00048047663686184025, "loss": 16.2429, "step": 79560 }, { "epoch": 0.11788302354105315, "grad_norm": 6.6875, "learning_rate": 0.00048047169792683265, "loss": 16.3167, "step": 79580 }, { "epoch": 0.11791264983498154, "grad_norm": 6.59375, "learning_rate": 0.0004804667589918251, "loss": 16.2256, "step": 79600 }, { "epoch": 0.11794227612890994, "grad_norm": 6.71875, "learning_rate": 0.00048046182005681754, "loss": 16.2095, "step": 79620 }, { "epoch": 0.11797190242283832, "grad_norm": 7.5, "learning_rate": 0.00048045688112180994, "loss": 16.2748, "step": 79640 }, { "epoch": 0.11800152871676671, "grad_norm": 6.5, "learning_rate": 0.0004804519421868024, "loss": 16.2733, "step": 79660 }, { "epoch": 0.1180311550106951, "grad_norm": 6.46875, "learning_rate": 0.0004804470032517948, "loss": 16.2384, "step": 79680 }, { "epoch": 0.11806078130462348, "grad_norm": 6.03125, "learning_rate": 0.0004804420643167873, "loss": 16.2747, "step": 79700 }, { "epoch": 0.11809040759855187, "grad_norm": 6.59375, "learning_rate": 0.0004804371253817797, "loss": 16.2826, "step": 79720 }, { "epoch": 0.11812003389248026, "grad_norm": 6.875, "learning_rate": 0.0004804321864467721, "loss": 16.2646, "step": 79740 }, { "epoch": 0.11814966018640864, "grad_norm": 7.625, "learning_rate": 0.0004804272475117645, "loss": 16.2691, "step": 79760 }, { "epoch": 0.11817928648033703, "grad_norm": 7.0625, "learning_rate": 0.000480422308576757, "loss": 16.2096, "step": 79780 }, { "epoch": 0.11820891277426541, "grad_norm": 7.09375, "learning_rate": 0.0004804173696417494, "loss": 16.2607, "step": 79800 }, { "epoch": 0.1182385390681938, "grad_norm": 6.1875, "learning_rate": 0.00048041243070674186, "loss": 16.2072, "step": 79820 }, { "epoch": 0.11826816536212219, "grad_norm": 5.96875, "learning_rate": 0.00048040749177173425, "loss": 16.2251, "step": 79840 }, { "epoch": 0.11829779165605057, "grad_norm": 7.0, "learning_rate": 0.00048040255283672675, "loss": 16.2256, "step": 79860 }, { "epoch": 0.11832741794997896, "grad_norm": 6.84375, "learning_rate": 0.00048039761390171915, "loss": 16.3018, "step": 79880 }, { "epoch": 0.11835704424390735, "grad_norm": 6.65625, "learning_rate": 0.0004803926749667116, "loss": 16.273, "step": 79900 }, { "epoch": 0.11838667053783573, "grad_norm": 7.1875, "learning_rate": 0.00048038773603170404, "loss": 16.2277, "step": 79920 }, { "epoch": 0.11841629683176413, "grad_norm": 6.34375, "learning_rate": 0.0004803827970966965, "loss": 16.2374, "step": 79940 }, { "epoch": 0.11844592312569252, "grad_norm": 6.25, "learning_rate": 0.0004803778581616889, "loss": 16.2584, "step": 79960 }, { "epoch": 0.1184755494196209, "grad_norm": 6.5, "learning_rate": 0.0004803729192266813, "loss": 16.2437, "step": 79980 }, { "epoch": 0.1185051757135493, "grad_norm": 7.375, "learning_rate": 0.0004803679802916738, "loss": 16.2565, "step": 80000 }, { "epoch": 0.11853480200747768, "grad_norm": 6.65625, "learning_rate": 0.0004803630413566662, "loss": 16.3588, "step": 80020 }, { "epoch": 0.11856442830140607, "grad_norm": 6.46875, "learning_rate": 0.0004803581024216586, "loss": 16.2388, "step": 80040 }, { "epoch": 0.11859405459533445, "grad_norm": 6.03125, "learning_rate": 0.000480353163486651, "loss": 16.2429, "step": 80060 }, { "epoch": 0.11862368088926284, "grad_norm": 6.8125, "learning_rate": 0.0004803482245516435, "loss": 16.2946, "step": 80080 }, { "epoch": 0.11865330718319123, "grad_norm": 6.90625, "learning_rate": 0.0004803432856166359, "loss": 16.2342, "step": 80100 }, { "epoch": 0.11868293347711961, "grad_norm": 6.71875, "learning_rate": 0.00048033834668162836, "loss": 16.2357, "step": 80120 }, { "epoch": 0.118712559771048, "grad_norm": 6.34375, "learning_rate": 0.00048033340774662075, "loss": 16.2403, "step": 80140 }, { "epoch": 0.11874218606497638, "grad_norm": 6.4375, "learning_rate": 0.00048032846881161325, "loss": 16.2697, "step": 80160 }, { "epoch": 0.11877181235890477, "grad_norm": 6.625, "learning_rate": 0.00048032352987660565, "loss": 16.264, "step": 80180 }, { "epoch": 0.11880143865283316, "grad_norm": 7.125, "learning_rate": 0.0004803185909415981, "loss": 16.2029, "step": 80200 }, { "epoch": 0.11883106494676154, "grad_norm": 7.1875, "learning_rate": 0.00048031365200659054, "loss": 16.2628, "step": 80220 }, { "epoch": 0.11886069124068993, "grad_norm": 6.65625, "learning_rate": 0.000480308713071583, "loss": 16.2777, "step": 80240 }, { "epoch": 0.11889031753461833, "grad_norm": 6.65625, "learning_rate": 0.0004803037741365754, "loss": 16.2869, "step": 80260 }, { "epoch": 0.11891994382854672, "grad_norm": 6.875, "learning_rate": 0.00048029883520156783, "loss": 16.2643, "step": 80280 }, { "epoch": 0.1189495701224751, "grad_norm": 7.03125, "learning_rate": 0.0004802938962665603, "loss": 16.2566, "step": 80300 }, { "epoch": 0.11897919641640349, "grad_norm": 7.125, "learning_rate": 0.0004802889573315527, "loss": 16.2704, "step": 80320 }, { "epoch": 0.11900882271033188, "grad_norm": 7.03125, "learning_rate": 0.0004802840183965451, "loss": 16.2605, "step": 80340 }, { "epoch": 0.11903844900426026, "grad_norm": 6.03125, "learning_rate": 0.0004802790794615375, "loss": 16.2133, "step": 80360 }, { "epoch": 0.11906807529818865, "grad_norm": 6.6875, "learning_rate": 0.00048027414052653, "loss": 16.2492, "step": 80380 }, { "epoch": 0.11909770159211704, "grad_norm": 6.5625, "learning_rate": 0.0004802692015915224, "loss": 16.2386, "step": 80400 }, { "epoch": 0.11912732788604542, "grad_norm": 7.09375, "learning_rate": 0.00048026426265651486, "loss": 16.1856, "step": 80420 }, { "epoch": 0.11915695417997381, "grad_norm": 6.71875, "learning_rate": 0.00048025932372150725, "loss": 16.2642, "step": 80440 }, { "epoch": 0.1191865804739022, "grad_norm": 6.59375, "learning_rate": 0.00048025438478649975, "loss": 16.2441, "step": 80460 }, { "epoch": 0.11921620676783058, "grad_norm": 6.9375, "learning_rate": 0.00048024944585149215, "loss": 16.2837, "step": 80480 }, { "epoch": 0.11924583306175897, "grad_norm": 6.25, "learning_rate": 0.0004802445069164846, "loss": 16.2611, "step": 80500 }, { "epoch": 0.11927545935568735, "grad_norm": 6.4375, "learning_rate": 0.00048023956798147704, "loss": 16.2182, "step": 80520 }, { "epoch": 0.11930508564961574, "grad_norm": 7.03125, "learning_rate": 0.0004802346290464695, "loss": 16.3038, "step": 80540 }, { "epoch": 0.11933471194354413, "grad_norm": 6.5, "learning_rate": 0.0004802296901114619, "loss": 16.2456, "step": 80560 }, { "epoch": 0.11936433823747253, "grad_norm": 6.3125, "learning_rate": 0.00048022475117645433, "loss": 16.2482, "step": 80580 }, { "epoch": 0.11939396453140091, "grad_norm": 7.53125, "learning_rate": 0.0004802198122414468, "loss": 16.2082, "step": 80600 }, { "epoch": 0.1194235908253293, "grad_norm": 6.90625, "learning_rate": 0.00048021487330643923, "loss": 16.2473, "step": 80620 }, { "epoch": 0.11945321711925769, "grad_norm": 6.03125, "learning_rate": 0.0004802099343714316, "loss": 16.229, "step": 80640 }, { "epoch": 0.11948284341318607, "grad_norm": 7.59375, "learning_rate": 0.000480204995436424, "loss": 16.249, "step": 80660 }, { "epoch": 0.11951246970711446, "grad_norm": 7.40625, "learning_rate": 0.0004802000565014165, "loss": 16.1933, "step": 80680 }, { "epoch": 0.11954209600104285, "grad_norm": 7.125, "learning_rate": 0.0004801951175664089, "loss": 16.2134, "step": 80700 }, { "epoch": 0.11957172229497123, "grad_norm": 7.53125, "learning_rate": 0.00048019017863140136, "loss": 16.2105, "step": 80720 }, { "epoch": 0.11960134858889962, "grad_norm": 5.875, "learning_rate": 0.00048018523969639375, "loss": 16.2044, "step": 80740 }, { "epoch": 0.119630974882828, "grad_norm": 6.28125, "learning_rate": 0.00048018030076138626, "loss": 16.2605, "step": 80760 }, { "epoch": 0.11966060117675639, "grad_norm": 6.6875, "learning_rate": 0.00048017536182637865, "loss": 16.2417, "step": 80780 }, { "epoch": 0.11969022747068478, "grad_norm": 5.96875, "learning_rate": 0.0004801704228913711, "loss": 16.2244, "step": 80800 }, { "epoch": 0.11971985376461317, "grad_norm": 7.375, "learning_rate": 0.00048016548395636354, "loss": 16.1888, "step": 80820 }, { "epoch": 0.11974948005854155, "grad_norm": 5.875, "learning_rate": 0.000480160545021356, "loss": 16.2616, "step": 80840 }, { "epoch": 0.11977910635246994, "grad_norm": 6.4375, "learning_rate": 0.0004801556060863484, "loss": 16.2743, "step": 80860 }, { "epoch": 0.11980873264639832, "grad_norm": 6.875, "learning_rate": 0.00048015066715134083, "loss": 16.2129, "step": 80880 }, { "epoch": 0.11983835894032673, "grad_norm": 6.59375, "learning_rate": 0.0004801457282163333, "loss": 16.2663, "step": 80900 }, { "epoch": 0.11986798523425511, "grad_norm": 6.9375, "learning_rate": 0.00048014078928132573, "loss": 16.2575, "step": 80920 }, { "epoch": 0.1198976115281835, "grad_norm": 7.03125, "learning_rate": 0.0004801358503463181, "loss": 16.2014, "step": 80940 }, { "epoch": 0.11992723782211188, "grad_norm": 7.0, "learning_rate": 0.00048013091141131057, "loss": 16.2794, "step": 80960 }, { "epoch": 0.11995686411604027, "grad_norm": 7.25, "learning_rate": 0.000480125972476303, "loss": 16.1728, "step": 80980 }, { "epoch": 0.11998649040996866, "grad_norm": 7.03125, "learning_rate": 0.0004801210335412954, "loss": 16.2376, "step": 81000 }, { "epoch": 0.12001611670389704, "grad_norm": 6.78125, "learning_rate": 0.00048011609460628786, "loss": 16.2201, "step": 81020 }, { "epoch": 0.12004574299782543, "grad_norm": 7.0625, "learning_rate": 0.00048011115567128025, "loss": 16.2081, "step": 81040 }, { "epoch": 0.12007536929175382, "grad_norm": 6.40625, "learning_rate": 0.00048010621673627276, "loss": 16.1728, "step": 81060 }, { "epoch": 0.1201049955856822, "grad_norm": 5.96875, "learning_rate": 0.00048010127780126515, "loss": 16.2446, "step": 81080 }, { "epoch": 0.12013462187961059, "grad_norm": 6.8125, "learning_rate": 0.0004800963388662576, "loss": 16.2341, "step": 81100 }, { "epoch": 0.12016424817353898, "grad_norm": 7.21875, "learning_rate": 0.00048009139993125004, "loss": 16.2402, "step": 81120 }, { "epoch": 0.12019387446746736, "grad_norm": 6.3125, "learning_rate": 0.0004800864609962425, "loss": 16.2117, "step": 81140 }, { "epoch": 0.12022350076139575, "grad_norm": 6.09375, "learning_rate": 0.0004800815220612349, "loss": 16.2817, "step": 81160 }, { "epoch": 0.12025312705532414, "grad_norm": 6.59375, "learning_rate": 0.00048007658312622733, "loss": 16.2479, "step": 81180 }, { "epoch": 0.12028275334925252, "grad_norm": 7.03125, "learning_rate": 0.0004800716441912198, "loss": 16.2402, "step": 81200 }, { "epoch": 0.12031237964318092, "grad_norm": 6.59375, "learning_rate": 0.00048006670525621223, "loss": 16.2495, "step": 81220 }, { "epoch": 0.12034200593710931, "grad_norm": 6.75, "learning_rate": 0.0004800617663212046, "loss": 16.2373, "step": 81240 }, { "epoch": 0.1203716322310377, "grad_norm": 7.0625, "learning_rate": 0.00048005682738619707, "loss": 16.1696, "step": 81260 }, { "epoch": 0.12040125852496608, "grad_norm": 6.375, "learning_rate": 0.0004800518884511895, "loss": 16.1894, "step": 81280 }, { "epoch": 0.12043088481889447, "grad_norm": 7.0, "learning_rate": 0.00048004694951618197, "loss": 16.214, "step": 81300 }, { "epoch": 0.12046051111282285, "grad_norm": 7.125, "learning_rate": 0.00048004201058117436, "loss": 16.2855, "step": 81320 }, { "epoch": 0.12049013740675124, "grad_norm": 6.84375, "learning_rate": 0.00048003707164616675, "loss": 16.2033, "step": 81340 }, { "epoch": 0.12051976370067963, "grad_norm": 7.25, "learning_rate": 0.00048003213271115926, "loss": 16.1961, "step": 81360 }, { "epoch": 0.12054938999460801, "grad_norm": 9.8125, "learning_rate": 0.00048002719377615165, "loss": 16.1959, "step": 81380 }, { "epoch": 0.1205790162885364, "grad_norm": 6.0625, "learning_rate": 0.0004800222548411441, "loss": 16.2231, "step": 81400 }, { "epoch": 0.12060864258246479, "grad_norm": 6.6875, "learning_rate": 0.00048001731590613655, "loss": 16.206, "step": 81420 }, { "epoch": 0.12063826887639317, "grad_norm": 6.375, "learning_rate": 0.000480012376971129, "loss": 16.2459, "step": 81440 }, { "epoch": 0.12066789517032156, "grad_norm": 5.90625, "learning_rate": 0.0004800074380361214, "loss": 16.2108, "step": 81460 }, { "epoch": 0.12069752146424995, "grad_norm": 6.3125, "learning_rate": 0.00048000249910111383, "loss": 16.2263, "step": 81480 }, { "epoch": 0.12072714775817833, "grad_norm": 6.65625, "learning_rate": 0.0004799975601661063, "loss": 16.2835, "step": 81500 }, { "epoch": 0.12075677405210672, "grad_norm": 6.625, "learning_rate": 0.00047999262123109873, "loss": 16.246, "step": 81520 }, { "epoch": 0.12078640034603512, "grad_norm": 7.71875, "learning_rate": 0.0004799876822960911, "loss": 16.2665, "step": 81540 }, { "epoch": 0.1208160266399635, "grad_norm": 6.46875, "learning_rate": 0.00047998274336108357, "loss": 16.2341, "step": 81560 }, { "epoch": 0.12084565293389189, "grad_norm": 6.84375, "learning_rate": 0.000479977804426076, "loss": 16.2343, "step": 81580 }, { "epoch": 0.12087527922782028, "grad_norm": 6.375, "learning_rate": 0.00047997286549106847, "loss": 16.1953, "step": 81600 }, { "epoch": 0.12090490552174867, "grad_norm": 8.125, "learning_rate": 0.00047996792655606086, "loss": 16.2518, "step": 81620 }, { "epoch": 0.12093453181567705, "grad_norm": 8.0625, "learning_rate": 0.0004799629876210533, "loss": 16.1682, "step": 81640 }, { "epoch": 0.12096415810960544, "grad_norm": 6.59375, "learning_rate": 0.00047995804868604576, "loss": 16.2499, "step": 81660 }, { "epoch": 0.12099378440353382, "grad_norm": 9.5, "learning_rate": 0.00047995310975103815, "loss": 16.2143, "step": 81680 }, { "epoch": 0.12102341069746221, "grad_norm": 6.59375, "learning_rate": 0.0004799481708160306, "loss": 16.178, "step": 81700 }, { "epoch": 0.1210530369913906, "grad_norm": 7.03125, "learning_rate": 0.00047994323188102305, "loss": 16.2018, "step": 81720 }, { "epoch": 0.12108266328531898, "grad_norm": 6.9375, "learning_rate": 0.0004799382929460155, "loss": 16.2976, "step": 81740 }, { "epoch": 0.12111228957924737, "grad_norm": 6.4375, "learning_rate": 0.0004799333540110079, "loss": 16.2988, "step": 81760 }, { "epoch": 0.12114191587317576, "grad_norm": 8.8125, "learning_rate": 0.00047992841507600033, "loss": 16.2317, "step": 81780 }, { "epoch": 0.12117154216710414, "grad_norm": 7.6875, "learning_rate": 0.0004799234761409928, "loss": 16.1929, "step": 81800 }, { "epoch": 0.12120116846103253, "grad_norm": 6.1875, "learning_rate": 0.00047991853720598523, "loss": 16.1955, "step": 81820 }, { "epoch": 0.12123079475496092, "grad_norm": 6.09375, "learning_rate": 0.0004799135982709776, "loss": 16.2418, "step": 81840 }, { "epoch": 0.12126042104888932, "grad_norm": 6.90625, "learning_rate": 0.00047990865933597007, "loss": 16.218, "step": 81860 }, { "epoch": 0.1212900473428177, "grad_norm": 9.5625, "learning_rate": 0.0004799037204009625, "loss": 16.265, "step": 81880 }, { "epoch": 0.12131967363674609, "grad_norm": 6.78125, "learning_rate": 0.00047989878146595497, "loss": 16.2145, "step": 81900 }, { "epoch": 0.12134929993067448, "grad_norm": 6.59375, "learning_rate": 0.00047989384253094736, "loss": 16.1485, "step": 81920 }, { "epoch": 0.12137892622460286, "grad_norm": 6.40625, "learning_rate": 0.0004798889035959398, "loss": 16.2334, "step": 81940 }, { "epoch": 0.12140855251853125, "grad_norm": 7.5625, "learning_rate": 0.00047988396466093226, "loss": 16.1556, "step": 81960 }, { "epoch": 0.12143817881245964, "grad_norm": 6.9375, "learning_rate": 0.0004798790257259247, "loss": 16.1977, "step": 81980 }, { "epoch": 0.12146780510638802, "grad_norm": 6.625, "learning_rate": 0.0004798740867909171, "loss": 16.2152, "step": 82000 }, { "epoch": 0.12149743140031641, "grad_norm": 6.03125, "learning_rate": 0.00047986914785590955, "loss": 16.2106, "step": 82020 }, { "epoch": 0.1215270576942448, "grad_norm": 6.4375, "learning_rate": 0.000479864208920902, "loss": 16.2421, "step": 82040 }, { "epoch": 0.12155668398817318, "grad_norm": 6.6875, "learning_rate": 0.0004798592699858944, "loss": 16.2096, "step": 82060 }, { "epoch": 0.12158631028210157, "grad_norm": 6.40625, "learning_rate": 0.00047985433105088683, "loss": 16.2556, "step": 82080 }, { "epoch": 0.12161593657602995, "grad_norm": 6.75, "learning_rate": 0.0004798493921158793, "loss": 16.183, "step": 82100 }, { "epoch": 0.12164556286995834, "grad_norm": 6.21875, "learning_rate": 0.00047984445318087173, "loss": 16.232, "step": 82120 }, { "epoch": 0.12167518916388673, "grad_norm": 7.3125, "learning_rate": 0.0004798395142458641, "loss": 16.2333, "step": 82140 }, { "epoch": 0.12170481545781513, "grad_norm": 6.15625, "learning_rate": 0.00047983457531085657, "loss": 16.1709, "step": 82160 }, { "epoch": 0.12173444175174351, "grad_norm": 6.96875, "learning_rate": 0.000479829636375849, "loss": 16.243, "step": 82180 }, { "epoch": 0.1217640680456719, "grad_norm": 6.40625, "learning_rate": 0.00047982469744084147, "loss": 16.2207, "step": 82200 }, { "epoch": 0.12179369433960029, "grad_norm": 6.90625, "learning_rate": 0.00047981975850583386, "loss": 16.2444, "step": 82220 }, { "epoch": 0.12182332063352867, "grad_norm": 6.65625, "learning_rate": 0.0004798148195708263, "loss": 16.1667, "step": 82240 }, { "epoch": 0.12185294692745706, "grad_norm": 6.53125, "learning_rate": 0.00047980988063581876, "loss": 16.1988, "step": 82260 }, { "epoch": 0.12188257322138545, "grad_norm": 6.59375, "learning_rate": 0.0004798049417008112, "loss": 16.216, "step": 82280 }, { "epoch": 0.12191219951531383, "grad_norm": 7.34375, "learning_rate": 0.0004798000027658036, "loss": 16.1921, "step": 82300 }, { "epoch": 0.12194182580924222, "grad_norm": 6.65625, "learning_rate": 0.0004797950638307961, "loss": 16.2033, "step": 82320 }, { "epoch": 0.1219714521031706, "grad_norm": 5.875, "learning_rate": 0.0004797901248957885, "loss": 16.1843, "step": 82340 }, { "epoch": 0.12200107839709899, "grad_norm": 6.5625, "learning_rate": 0.0004797851859607809, "loss": 16.1888, "step": 82360 }, { "epoch": 0.12203070469102738, "grad_norm": 5.6875, "learning_rate": 0.00047978024702577334, "loss": 16.1824, "step": 82380 }, { "epoch": 0.12206033098495576, "grad_norm": 6.125, "learning_rate": 0.0004797753080907658, "loss": 16.1561, "step": 82400 }, { "epoch": 0.12208995727888415, "grad_norm": 7.25, "learning_rate": 0.00047977036915575823, "loss": 16.2548, "step": 82420 }, { "epoch": 0.12211958357281254, "grad_norm": 8.0625, "learning_rate": 0.0004797654302207506, "loss": 16.2019, "step": 82440 }, { "epoch": 0.12214920986674092, "grad_norm": 7.1875, "learning_rate": 0.00047976049128574307, "loss": 16.1525, "step": 82460 }, { "epoch": 0.12217883616066932, "grad_norm": 7.03125, "learning_rate": 0.0004797555523507355, "loss": 16.2534, "step": 82480 }, { "epoch": 0.12220846245459771, "grad_norm": 6.9375, "learning_rate": 0.00047975061341572797, "loss": 16.1858, "step": 82500 }, { "epoch": 0.1222380887485261, "grad_norm": 6.6875, "learning_rate": 0.00047974567448072036, "loss": 16.1439, "step": 82520 }, { "epoch": 0.12226771504245448, "grad_norm": 6.4375, "learning_rate": 0.0004797407355457128, "loss": 16.2245, "step": 82540 }, { "epoch": 0.12229734133638287, "grad_norm": 6.46875, "learning_rate": 0.00047973579661070526, "loss": 16.1173, "step": 82560 }, { "epoch": 0.12232696763031126, "grad_norm": 6.96875, "learning_rate": 0.0004797308576756977, "loss": 16.1917, "step": 82580 }, { "epoch": 0.12235659392423964, "grad_norm": 6.46875, "learning_rate": 0.0004797259187406901, "loss": 16.2421, "step": 82600 }, { "epoch": 0.12238622021816803, "grad_norm": 6.875, "learning_rate": 0.0004797209798056826, "loss": 16.2, "step": 82620 }, { "epoch": 0.12241584651209642, "grad_norm": 6.28125, "learning_rate": 0.000479716040870675, "loss": 16.2038, "step": 82640 }, { "epoch": 0.1224454728060248, "grad_norm": 5.96875, "learning_rate": 0.00047971110193566744, "loss": 16.2384, "step": 82660 }, { "epoch": 0.12247509909995319, "grad_norm": 6.1875, "learning_rate": 0.00047970616300065984, "loss": 16.2127, "step": 82680 }, { "epoch": 0.12250472539388158, "grad_norm": 6.28125, "learning_rate": 0.0004797012240656523, "loss": 16.235, "step": 82700 }, { "epoch": 0.12253435168780996, "grad_norm": 6.125, "learning_rate": 0.00047969628513064473, "loss": 16.1783, "step": 82720 }, { "epoch": 0.12256397798173835, "grad_norm": 7.28125, "learning_rate": 0.0004796913461956371, "loss": 16.227, "step": 82740 }, { "epoch": 0.12259360427566673, "grad_norm": 6.15625, "learning_rate": 0.00047968640726062957, "loss": 16.2354, "step": 82760 }, { "epoch": 0.12262323056959512, "grad_norm": 6.125, "learning_rate": 0.000479681468325622, "loss": 16.1881, "step": 82780 }, { "epoch": 0.12265285686352352, "grad_norm": 6.375, "learning_rate": 0.00047967652939061447, "loss": 16.2082, "step": 82800 }, { "epoch": 0.12268248315745191, "grad_norm": 6.375, "learning_rate": 0.00047967159045560686, "loss": 16.1181, "step": 82820 }, { "epoch": 0.1227121094513803, "grad_norm": 6.59375, "learning_rate": 0.0004796666515205993, "loss": 16.1534, "step": 82840 }, { "epoch": 0.12274173574530868, "grad_norm": 6.4375, "learning_rate": 0.00047966171258559176, "loss": 16.1316, "step": 82860 }, { "epoch": 0.12277136203923707, "grad_norm": 6.5, "learning_rate": 0.0004796567736505842, "loss": 16.1864, "step": 82880 }, { "epoch": 0.12280098833316545, "grad_norm": 7.15625, "learning_rate": 0.0004796518347155766, "loss": 16.1988, "step": 82900 }, { "epoch": 0.12283061462709384, "grad_norm": 6.5625, "learning_rate": 0.0004796468957805691, "loss": 16.2569, "step": 82920 }, { "epoch": 0.12286024092102223, "grad_norm": 6.40625, "learning_rate": 0.0004796419568455615, "loss": 16.1792, "step": 82940 }, { "epoch": 0.12288986721495061, "grad_norm": 6.53125, "learning_rate": 0.00047963701791055394, "loss": 16.2019, "step": 82960 }, { "epoch": 0.122919493508879, "grad_norm": 6.6875, "learning_rate": 0.00047963207897554634, "loss": 16.1815, "step": 82980 }, { "epoch": 0.12294911980280739, "grad_norm": 6.75, "learning_rate": 0.00047962714004053884, "loss": 16.2309, "step": 83000 }, { "epoch": 0.12297874609673577, "grad_norm": 6.6875, "learning_rate": 0.00047962220110553123, "loss": 16.139, "step": 83020 }, { "epoch": 0.12300837239066416, "grad_norm": 7.03125, "learning_rate": 0.0004796172621705236, "loss": 16.2056, "step": 83040 }, { "epoch": 0.12303799868459254, "grad_norm": 6.4375, "learning_rate": 0.0004796123232355161, "loss": 16.202, "step": 83060 }, { "epoch": 0.12306762497852093, "grad_norm": 7.03125, "learning_rate": 0.0004796073843005085, "loss": 16.1796, "step": 83080 }, { "epoch": 0.12309725127244932, "grad_norm": 6.84375, "learning_rate": 0.00047960244536550097, "loss": 16.1404, "step": 83100 }, { "epoch": 0.12312687756637772, "grad_norm": 7.59375, "learning_rate": 0.00047959750643049336, "loss": 16.1799, "step": 83120 }, { "epoch": 0.1231565038603061, "grad_norm": 7.375, "learning_rate": 0.0004795925674954858, "loss": 16.1692, "step": 83140 }, { "epoch": 0.12318613015423449, "grad_norm": 7.15625, "learning_rate": 0.00047958762856047826, "loss": 16.1584, "step": 83160 }, { "epoch": 0.12321575644816288, "grad_norm": 6.34375, "learning_rate": 0.0004795826896254707, "loss": 16.2001, "step": 83180 }, { "epoch": 0.12324538274209126, "grad_norm": 7.34375, "learning_rate": 0.0004795777506904631, "loss": 16.2, "step": 83200 }, { "epoch": 0.12327500903601965, "grad_norm": 7.0625, "learning_rate": 0.0004795728117554556, "loss": 16.1916, "step": 83220 }, { "epoch": 0.12330463532994804, "grad_norm": 7.375, "learning_rate": 0.000479567872820448, "loss": 16.1917, "step": 83240 }, { "epoch": 0.12333426162387642, "grad_norm": 7.1875, "learning_rate": 0.00047956293388544044, "loss": 16.173, "step": 83260 }, { "epoch": 0.12336388791780481, "grad_norm": 6.96875, "learning_rate": 0.00047955799495043284, "loss": 16.1653, "step": 83280 }, { "epoch": 0.1233935142117332, "grad_norm": 8.5625, "learning_rate": 0.00047955305601542534, "loss": 16.215, "step": 83300 }, { "epoch": 0.12342314050566158, "grad_norm": 6.21875, "learning_rate": 0.00047954811708041773, "loss": 16.1763, "step": 83320 }, { "epoch": 0.12345276679958997, "grad_norm": 6.34375, "learning_rate": 0.0004795431781454102, "loss": 16.2133, "step": 83340 }, { "epoch": 0.12348239309351836, "grad_norm": 6.125, "learning_rate": 0.0004795382392104026, "loss": 16.1325, "step": 83360 }, { "epoch": 0.12351201938744674, "grad_norm": 6.8125, "learning_rate": 0.000479533300275395, "loss": 16.1738, "step": 83380 }, { "epoch": 0.12354164568137513, "grad_norm": 6.71875, "learning_rate": 0.00047952836134038747, "loss": 16.1694, "step": 83400 }, { "epoch": 0.12357127197530351, "grad_norm": 7.78125, "learning_rate": 0.00047952342240537986, "loss": 16.1704, "step": 83420 }, { "epoch": 0.12360089826923192, "grad_norm": 6.9375, "learning_rate": 0.0004795184834703723, "loss": 16.1661, "step": 83440 }, { "epoch": 0.1236305245631603, "grad_norm": 6.8125, "learning_rate": 0.00047951354453536476, "loss": 16.1131, "step": 83460 }, { "epoch": 0.12366015085708869, "grad_norm": 5.90625, "learning_rate": 0.0004795086056003572, "loss": 16.2239, "step": 83480 }, { "epoch": 0.12368977715101707, "grad_norm": 5.9375, "learning_rate": 0.0004795036666653496, "loss": 16.1863, "step": 83500 }, { "epoch": 0.12371940344494546, "grad_norm": 6.6875, "learning_rate": 0.0004794987277303421, "loss": 16.1254, "step": 83520 }, { "epoch": 0.12374902973887385, "grad_norm": 7.125, "learning_rate": 0.0004794937887953345, "loss": 16.2149, "step": 83540 }, { "epoch": 0.12377865603280223, "grad_norm": 7.21875, "learning_rate": 0.00047948884986032694, "loss": 16.1831, "step": 83560 }, { "epoch": 0.12380828232673062, "grad_norm": 6.40625, "learning_rate": 0.00047948391092531934, "loss": 16.1633, "step": 83580 }, { "epoch": 0.123837908620659, "grad_norm": 7.5, "learning_rate": 0.00047947897199031184, "loss": 16.1849, "step": 83600 }, { "epoch": 0.1238675349145874, "grad_norm": 7.09375, "learning_rate": 0.00047947403305530423, "loss": 16.1654, "step": 83620 }, { "epoch": 0.12389716120851578, "grad_norm": 6.84375, "learning_rate": 0.0004794690941202967, "loss": 16.2204, "step": 83640 }, { "epoch": 0.12392678750244417, "grad_norm": 6.34375, "learning_rate": 0.0004794641551852891, "loss": 16.1609, "step": 83660 }, { "epoch": 0.12395641379637255, "grad_norm": 6.0, "learning_rate": 0.0004794592162502816, "loss": 16.1497, "step": 83680 }, { "epoch": 0.12398604009030094, "grad_norm": 7.09375, "learning_rate": 0.00047945427731527397, "loss": 16.2024, "step": 83700 }, { "epoch": 0.12401566638422933, "grad_norm": 7.3125, "learning_rate": 0.00047944933838026636, "loss": 16.2298, "step": 83720 }, { "epoch": 0.12404529267815771, "grad_norm": 7.625, "learning_rate": 0.0004794443994452588, "loss": 16.1594, "step": 83740 }, { "epoch": 0.12407491897208611, "grad_norm": 6.40625, "learning_rate": 0.00047943946051025126, "loss": 16.138, "step": 83760 }, { "epoch": 0.1241045452660145, "grad_norm": 7.625, "learning_rate": 0.0004794345215752437, "loss": 16.1493, "step": 83780 }, { "epoch": 0.12413417155994289, "grad_norm": 7.34375, "learning_rate": 0.0004794295826402361, "loss": 16.1952, "step": 83800 }, { "epoch": 0.12416379785387127, "grad_norm": 6.5625, "learning_rate": 0.0004794246437052286, "loss": 16.1847, "step": 83820 }, { "epoch": 0.12419342414779966, "grad_norm": 6.34375, "learning_rate": 0.000479419704770221, "loss": 16.2249, "step": 83840 }, { "epoch": 0.12422305044172804, "grad_norm": 6.15625, "learning_rate": 0.00047941476583521344, "loss": 16.1628, "step": 83860 }, { "epoch": 0.12425267673565643, "grad_norm": 6.3125, "learning_rate": 0.00047940982690020584, "loss": 16.1391, "step": 83880 }, { "epoch": 0.12428230302958482, "grad_norm": 6.21875, "learning_rate": 0.00047940488796519834, "loss": 16.1901, "step": 83900 }, { "epoch": 0.1243119293235132, "grad_norm": 6.84375, "learning_rate": 0.00047939994903019073, "loss": 16.136, "step": 83920 }, { "epoch": 0.12434155561744159, "grad_norm": 6.84375, "learning_rate": 0.0004793950100951832, "loss": 16.1843, "step": 83940 }, { "epoch": 0.12437118191136998, "grad_norm": 6.40625, "learning_rate": 0.0004793900711601756, "loss": 16.1407, "step": 83960 }, { "epoch": 0.12440080820529836, "grad_norm": 7.59375, "learning_rate": 0.0004793851322251681, "loss": 16.1422, "step": 83980 }, { "epoch": 0.12443043449922675, "grad_norm": 6.625, "learning_rate": 0.00047938019329016047, "loss": 16.1804, "step": 84000 }, { "epoch": 0.12446006079315514, "grad_norm": 5.90625, "learning_rate": 0.00047937525435515286, "loss": 16.1324, "step": 84020 }, { "epoch": 0.12448968708708352, "grad_norm": 6.96875, "learning_rate": 0.0004793703154201453, "loss": 16.1368, "step": 84040 }, { "epoch": 0.12451931338101191, "grad_norm": 7.09375, "learning_rate": 0.00047936537648513776, "loss": 16.176, "step": 84060 }, { "epoch": 0.12454893967494031, "grad_norm": 6.625, "learning_rate": 0.0004793604375501302, "loss": 16.1863, "step": 84080 }, { "epoch": 0.1245785659688687, "grad_norm": 6.5, "learning_rate": 0.0004793554986151226, "loss": 16.1856, "step": 84100 }, { "epoch": 0.12460819226279708, "grad_norm": 6.0625, "learning_rate": 0.0004793505596801151, "loss": 16.1133, "step": 84120 }, { "epoch": 0.12463781855672547, "grad_norm": 6.1875, "learning_rate": 0.0004793456207451075, "loss": 16.1372, "step": 84140 }, { "epoch": 0.12466744485065386, "grad_norm": 6.3125, "learning_rate": 0.00047934068181009994, "loss": 16.1905, "step": 84160 }, { "epoch": 0.12469707114458224, "grad_norm": 6.59375, "learning_rate": 0.00047933574287509234, "loss": 16.14, "step": 84180 }, { "epoch": 0.12472669743851063, "grad_norm": 6.375, "learning_rate": 0.00047933080394008484, "loss": 16.1066, "step": 84200 }, { "epoch": 0.12475632373243901, "grad_norm": 6.125, "learning_rate": 0.00047932586500507723, "loss": 16.0997, "step": 84220 }, { "epoch": 0.1247859500263674, "grad_norm": 6.625, "learning_rate": 0.0004793209260700697, "loss": 16.1655, "step": 84240 }, { "epoch": 0.12481557632029579, "grad_norm": 6.4375, "learning_rate": 0.0004793159871350621, "loss": 16.1648, "step": 84260 }, { "epoch": 0.12484520261422417, "grad_norm": 7.5, "learning_rate": 0.0004793110482000546, "loss": 16.2026, "step": 84280 }, { "epoch": 0.12487482890815256, "grad_norm": 7.15625, "learning_rate": 0.00047930610926504697, "loss": 16.1487, "step": 84300 }, { "epoch": 0.12490445520208095, "grad_norm": 7.0, "learning_rate": 0.0004793011703300394, "loss": 16.1867, "step": 84320 }, { "epoch": 0.12493408149600933, "grad_norm": 6.21875, "learning_rate": 0.0004792962313950318, "loss": 16.1449, "step": 84340 }, { "epoch": 0.12496370778993772, "grad_norm": 6.25, "learning_rate": 0.0004792912924600243, "loss": 16.2187, "step": 84360 }, { "epoch": 0.1249933340838661, "grad_norm": 7.09375, "learning_rate": 0.0004792863535250167, "loss": 16.1159, "step": 84380 }, { "epoch": 0.1250229603777945, "grad_norm": 6.84375, "learning_rate": 0.0004792814145900091, "loss": 16.0993, "step": 84400 }, { "epoch": 0.1250525866717229, "grad_norm": 9.125, "learning_rate": 0.0004792764756550016, "loss": 16.1437, "step": 84420 }, { "epoch": 0.12508221296565128, "grad_norm": 6.78125, "learning_rate": 0.000479271536719994, "loss": 16.138, "step": 84440 }, { "epoch": 0.12511183925957967, "grad_norm": 5.96875, "learning_rate": 0.00047926659778498644, "loss": 16.1112, "step": 84460 }, { "epoch": 0.12514146555350805, "grad_norm": 6.90625, "learning_rate": 0.00047926165884997884, "loss": 16.152, "step": 84480 }, { "epoch": 0.12517109184743644, "grad_norm": 7.53125, "learning_rate": 0.00047925671991497134, "loss": 16.1693, "step": 84500 }, { "epoch": 0.12520071814136483, "grad_norm": 7.5, "learning_rate": 0.00047925178097996373, "loss": 16.1587, "step": 84520 }, { "epoch": 0.1252303444352932, "grad_norm": 6.5625, "learning_rate": 0.0004792468420449562, "loss": 16.2107, "step": 84540 }, { "epoch": 0.1252599707292216, "grad_norm": 6.59375, "learning_rate": 0.0004792419031099486, "loss": 16.183, "step": 84560 }, { "epoch": 0.12528959702314998, "grad_norm": 7.375, "learning_rate": 0.0004792369641749411, "loss": 16.1448, "step": 84580 }, { "epoch": 0.12531922331707837, "grad_norm": 6.28125, "learning_rate": 0.00047923202523993347, "loss": 16.2097, "step": 84600 }, { "epoch": 0.12534884961100676, "grad_norm": 6.3125, "learning_rate": 0.0004792270863049259, "loss": 16.1846, "step": 84620 }, { "epoch": 0.12537847590493514, "grad_norm": 6.90625, "learning_rate": 0.0004792221473699183, "loss": 16.183, "step": 84640 }, { "epoch": 0.12540810219886353, "grad_norm": 6.0, "learning_rate": 0.0004792172084349108, "loss": 16.1176, "step": 84660 }, { "epoch": 0.12543772849279192, "grad_norm": 6.71875, "learning_rate": 0.0004792122694999032, "loss": 16.1527, "step": 84680 }, { "epoch": 0.1254673547867203, "grad_norm": 6.875, "learning_rate": 0.0004792073305648956, "loss": 16.1153, "step": 84700 }, { "epoch": 0.1254969810806487, "grad_norm": 6.90625, "learning_rate": 0.0004792023916298881, "loss": 16.137, "step": 84720 }, { "epoch": 0.12552660737457708, "grad_norm": 7.0, "learning_rate": 0.0004791974526948805, "loss": 16.1581, "step": 84740 }, { "epoch": 0.12555623366850546, "grad_norm": 7.03125, "learning_rate": 0.00047919251375987294, "loss": 16.131, "step": 84760 }, { "epoch": 0.12558585996243385, "grad_norm": 6.46875, "learning_rate": 0.00047918757482486534, "loss": 16.1786, "step": 84780 }, { "epoch": 0.12561548625636224, "grad_norm": 8.5, "learning_rate": 0.00047918263588985784, "loss": 16.1971, "step": 84800 }, { "epoch": 0.12564511255029062, "grad_norm": 6.53125, "learning_rate": 0.00047917769695485023, "loss": 16.1351, "step": 84820 }, { "epoch": 0.125674738844219, "grad_norm": 6.28125, "learning_rate": 0.0004791727580198427, "loss": 16.1387, "step": 84840 }, { "epoch": 0.1257043651381474, "grad_norm": 7.34375, "learning_rate": 0.0004791678190848351, "loss": 16.1013, "step": 84860 }, { "epoch": 0.1257339914320758, "grad_norm": 6.625, "learning_rate": 0.0004791628801498276, "loss": 16.1586, "step": 84880 }, { "epoch": 0.1257636177260042, "grad_norm": 6.5625, "learning_rate": 0.00047915794121481997, "loss": 16.1316, "step": 84900 }, { "epoch": 0.12579324401993258, "grad_norm": 6.5625, "learning_rate": 0.0004791530022798124, "loss": 16.1637, "step": 84920 }, { "epoch": 0.12582287031386097, "grad_norm": 5.96875, "learning_rate": 0.0004791480633448048, "loss": 16.2086, "step": 84940 }, { "epoch": 0.12585249660778935, "grad_norm": 7.09375, "learning_rate": 0.0004791431244097973, "loss": 16.1053, "step": 84960 }, { "epoch": 0.12588212290171774, "grad_norm": 7.25, "learning_rate": 0.0004791381854747897, "loss": 16.1161, "step": 84980 }, { "epoch": 0.12591174919564613, "grad_norm": 6.0625, "learning_rate": 0.00047913324653978216, "loss": 16.1565, "step": 85000 }, { "epoch": 0.12594137548957451, "grad_norm": 6.5625, "learning_rate": 0.0004791283076047746, "loss": 16.1232, "step": 85020 }, { "epoch": 0.1259710017835029, "grad_norm": 7.375, "learning_rate": 0.000479123368669767, "loss": 16.0785, "step": 85040 }, { "epoch": 0.1260006280774313, "grad_norm": 7.125, "learning_rate": 0.00047911842973475944, "loss": 16.1701, "step": 85060 }, { "epoch": 0.12603025437135967, "grad_norm": 6.28125, "learning_rate": 0.00047911349079975184, "loss": 16.13, "step": 85080 }, { "epoch": 0.12605988066528806, "grad_norm": 6.71875, "learning_rate": 0.00047910855186474434, "loss": 16.1251, "step": 85100 }, { "epoch": 0.12608950695921645, "grad_norm": 6.34375, "learning_rate": 0.00047910361292973673, "loss": 16.0507, "step": 85120 }, { "epoch": 0.12611913325314483, "grad_norm": 6.34375, "learning_rate": 0.0004790986739947292, "loss": 16.1865, "step": 85140 }, { "epoch": 0.12614875954707322, "grad_norm": 6.125, "learning_rate": 0.0004790937350597216, "loss": 16.1053, "step": 85160 }, { "epoch": 0.1261783858410016, "grad_norm": 6.5625, "learning_rate": 0.0004790887961247141, "loss": 16.1678, "step": 85180 }, { "epoch": 0.12620801213493, "grad_norm": 7.34375, "learning_rate": 0.00047908385718970647, "loss": 16.1359, "step": 85200 }, { "epoch": 0.12623763842885838, "grad_norm": 7.40625, "learning_rate": 0.0004790789182546989, "loss": 16.1783, "step": 85220 }, { "epoch": 0.12626726472278677, "grad_norm": 7.96875, "learning_rate": 0.0004790739793196913, "loss": 16.1511, "step": 85240 }, { "epoch": 0.12629689101671515, "grad_norm": 6.3125, "learning_rate": 0.0004790690403846838, "loss": 16.1451, "step": 85260 }, { "epoch": 0.12632651731064354, "grad_norm": 6.8125, "learning_rate": 0.0004790641014496762, "loss": 16.0706, "step": 85280 }, { "epoch": 0.12635614360457192, "grad_norm": 6.25, "learning_rate": 0.00047905916251466866, "loss": 16.114, "step": 85300 }, { "epoch": 0.1263857698985003, "grad_norm": 6.25, "learning_rate": 0.0004790542235796611, "loss": 16.1234, "step": 85320 }, { "epoch": 0.1264153961924287, "grad_norm": 6.5625, "learning_rate": 0.00047904928464465355, "loss": 16.1647, "step": 85340 }, { "epoch": 0.12644502248635708, "grad_norm": 7.1875, "learning_rate": 0.00047904434570964594, "loss": 16.1594, "step": 85360 }, { "epoch": 0.12647464878028547, "grad_norm": 6.40625, "learning_rate": 0.00047903940677463834, "loss": 16.1207, "step": 85380 }, { "epoch": 0.12650427507421386, "grad_norm": 7.28125, "learning_rate": 0.00047903446783963084, "loss": 16.1706, "step": 85400 }, { "epoch": 0.12653390136814224, "grad_norm": 6.875, "learning_rate": 0.00047902952890462323, "loss": 16.1254, "step": 85420 }, { "epoch": 0.12656352766207063, "grad_norm": 5.9375, "learning_rate": 0.0004790245899696157, "loss": 16.1301, "step": 85440 }, { "epoch": 0.12659315395599902, "grad_norm": 6.71875, "learning_rate": 0.0004790196510346081, "loss": 16.194, "step": 85460 }, { "epoch": 0.1266227802499274, "grad_norm": 6.3125, "learning_rate": 0.0004790147120996006, "loss": 16.1499, "step": 85480 }, { "epoch": 0.12665240654385582, "grad_norm": 6.6875, "learning_rate": 0.00047900977316459297, "loss": 16.1261, "step": 85500 }, { "epoch": 0.1266820328377842, "grad_norm": 7.09375, "learning_rate": 0.0004790048342295854, "loss": 16.1392, "step": 85520 }, { "epoch": 0.1267116591317126, "grad_norm": 6.125, "learning_rate": 0.0004789998952945778, "loss": 16.1372, "step": 85540 }, { "epoch": 0.12674128542564098, "grad_norm": 7.53125, "learning_rate": 0.0004789949563595703, "loss": 16.1964, "step": 85560 }, { "epoch": 0.12677091171956936, "grad_norm": 6.125, "learning_rate": 0.0004789900174245627, "loss": 16.0984, "step": 85580 }, { "epoch": 0.12680053801349775, "grad_norm": 6.21875, "learning_rate": 0.00047898507848955516, "loss": 16.1869, "step": 85600 }, { "epoch": 0.12683016430742614, "grad_norm": 6.75, "learning_rate": 0.0004789801395545476, "loss": 16.1501, "step": 85620 }, { "epoch": 0.12685979060135452, "grad_norm": 6.0625, "learning_rate": 0.00047897520061954005, "loss": 16.1608, "step": 85640 }, { "epoch": 0.1268894168952829, "grad_norm": 7.875, "learning_rate": 0.00047897026168453245, "loss": 16.1311, "step": 85660 }, { "epoch": 0.1269190431892113, "grad_norm": 6.4375, "learning_rate": 0.0004789653227495249, "loss": 16.1214, "step": 85680 }, { "epoch": 0.12694866948313968, "grad_norm": 6.5, "learning_rate": 0.00047896038381451734, "loss": 16.1118, "step": 85700 }, { "epoch": 0.12697829577706807, "grad_norm": 7.03125, "learning_rate": 0.00047895544487950973, "loss": 16.1178, "step": 85720 }, { "epoch": 0.12700792207099645, "grad_norm": 7.03125, "learning_rate": 0.0004789505059445022, "loss": 16.1188, "step": 85740 }, { "epoch": 0.12703754836492484, "grad_norm": 6.96875, "learning_rate": 0.0004789455670094946, "loss": 16.1155, "step": 85760 }, { "epoch": 0.12706717465885323, "grad_norm": 6.6875, "learning_rate": 0.0004789406280744871, "loss": 16.1502, "step": 85780 }, { "epoch": 0.1270968009527816, "grad_norm": 6.125, "learning_rate": 0.00047893568913947947, "loss": 16.1207, "step": 85800 }, { "epoch": 0.12712642724671, "grad_norm": 5.90625, "learning_rate": 0.0004789307502044719, "loss": 16.1228, "step": 85820 }, { "epoch": 0.1271560535406384, "grad_norm": 6.5625, "learning_rate": 0.0004789258112694643, "loss": 16.0974, "step": 85840 }, { "epoch": 0.12718567983456677, "grad_norm": 8.125, "learning_rate": 0.0004789208723344568, "loss": 16.1093, "step": 85860 }, { "epoch": 0.12721530612849516, "grad_norm": 6.71875, "learning_rate": 0.0004789159333994492, "loss": 16.115, "step": 85880 }, { "epoch": 0.12724493242242355, "grad_norm": 6.34375, "learning_rate": 0.00047891099446444166, "loss": 16.1984, "step": 85900 }, { "epoch": 0.12727455871635193, "grad_norm": 6.875, "learning_rate": 0.00047890605552943405, "loss": 16.1422, "step": 85920 }, { "epoch": 0.12730418501028032, "grad_norm": 6.5, "learning_rate": 0.00047890111659442655, "loss": 16.1491, "step": 85940 }, { "epoch": 0.1273338113042087, "grad_norm": 7.03125, "learning_rate": 0.00047889617765941895, "loss": 16.1366, "step": 85960 }, { "epoch": 0.1273634375981371, "grad_norm": 6.96875, "learning_rate": 0.0004788912387244114, "loss": 16.1178, "step": 85980 }, { "epoch": 0.12739306389206548, "grad_norm": 6.1875, "learning_rate": 0.00047888629978940384, "loss": 16.1072, "step": 86000 }, { "epoch": 0.12742269018599386, "grad_norm": 7.65625, "learning_rate": 0.0004788813608543963, "loss": 16.1032, "step": 86020 }, { "epoch": 0.12745231647992225, "grad_norm": 5.9375, "learning_rate": 0.0004788764219193887, "loss": 16.1622, "step": 86040 }, { "epoch": 0.12748194277385064, "grad_norm": 7.0, "learning_rate": 0.0004788714829843811, "loss": 16.121, "step": 86060 }, { "epoch": 0.12751156906777902, "grad_norm": 5.78125, "learning_rate": 0.0004788665440493736, "loss": 16.1018, "step": 86080 }, { "epoch": 0.1275411953617074, "grad_norm": 6.65625, "learning_rate": 0.00047886160511436597, "loss": 16.1504, "step": 86100 }, { "epoch": 0.1275708216556358, "grad_norm": 6.6875, "learning_rate": 0.0004788566661793584, "loss": 16.1307, "step": 86120 }, { "epoch": 0.1276004479495642, "grad_norm": 6.3125, "learning_rate": 0.0004788517272443508, "loss": 16.063, "step": 86140 }, { "epoch": 0.1276300742434926, "grad_norm": 7.0, "learning_rate": 0.0004788467883093433, "loss": 16.0927, "step": 86160 }, { "epoch": 0.12765970053742098, "grad_norm": 6.5, "learning_rate": 0.0004788418493743357, "loss": 16.1392, "step": 86180 }, { "epoch": 0.12768932683134937, "grad_norm": 5.78125, "learning_rate": 0.00047883691043932816, "loss": 16.113, "step": 86200 }, { "epoch": 0.12771895312527776, "grad_norm": 6.15625, "learning_rate": 0.00047883197150432055, "loss": 16.0678, "step": 86220 }, { "epoch": 0.12774857941920614, "grad_norm": 7.09375, "learning_rate": 0.00047882703256931305, "loss": 16.1059, "step": 86240 }, { "epoch": 0.12777820571313453, "grad_norm": 6.34375, "learning_rate": 0.00047882209363430545, "loss": 16.1377, "step": 86260 }, { "epoch": 0.12780783200706292, "grad_norm": 6.4375, "learning_rate": 0.0004788171546992979, "loss": 16.1358, "step": 86280 }, { "epoch": 0.1278374583009913, "grad_norm": 5.96875, "learning_rate": 0.00047881221576429034, "loss": 16.0783, "step": 86300 }, { "epoch": 0.1278670845949197, "grad_norm": 7.15625, "learning_rate": 0.0004788072768292828, "loss": 16.0874, "step": 86320 }, { "epoch": 0.12789671088884808, "grad_norm": 7.5625, "learning_rate": 0.0004788023378942752, "loss": 16.0472, "step": 86340 }, { "epoch": 0.12792633718277646, "grad_norm": 6.4375, "learning_rate": 0.00047879739895926763, "loss": 16.1412, "step": 86360 }, { "epoch": 0.12795596347670485, "grad_norm": 8.1875, "learning_rate": 0.0004787924600242601, "loss": 16.148, "step": 86380 }, { "epoch": 0.12798558977063323, "grad_norm": 6.25, "learning_rate": 0.00047878752108925247, "loss": 16.1553, "step": 86400 }, { "epoch": 0.12801521606456162, "grad_norm": 7.5, "learning_rate": 0.0004787825821542449, "loss": 16.1151, "step": 86420 }, { "epoch": 0.12804484235849, "grad_norm": 6.1875, "learning_rate": 0.0004787776432192373, "loss": 16.1381, "step": 86440 }, { "epoch": 0.1280744686524184, "grad_norm": 6.59375, "learning_rate": 0.0004787727042842298, "loss": 16.1238, "step": 86460 }, { "epoch": 0.12810409494634678, "grad_norm": 6.96875, "learning_rate": 0.0004787677653492222, "loss": 16.1561, "step": 86480 }, { "epoch": 0.12813372124027517, "grad_norm": 6.625, "learning_rate": 0.00047876282641421466, "loss": 16.0832, "step": 86500 }, { "epoch": 0.12816334753420355, "grad_norm": 6.75, "learning_rate": 0.00047875788747920705, "loss": 16.1503, "step": 86520 }, { "epoch": 0.12819297382813194, "grad_norm": 6.84375, "learning_rate": 0.00047875294854419955, "loss": 16.1081, "step": 86540 }, { "epoch": 0.12822260012206033, "grad_norm": 6.53125, "learning_rate": 0.00047874800960919195, "loss": 16.1777, "step": 86560 }, { "epoch": 0.1282522264159887, "grad_norm": 7.125, "learning_rate": 0.0004787430706741844, "loss": 16.1116, "step": 86580 }, { "epoch": 0.1282818527099171, "grad_norm": 7.78125, "learning_rate": 0.00047873813173917684, "loss": 16.0625, "step": 86600 }, { "epoch": 0.12831147900384549, "grad_norm": 6.71875, "learning_rate": 0.0004787331928041693, "loss": 16.1032, "step": 86620 }, { "epoch": 0.12834110529777387, "grad_norm": 6.375, "learning_rate": 0.0004787282538691617, "loss": 16.0908, "step": 86640 }, { "epoch": 0.12837073159170226, "grad_norm": 7.5625, "learning_rate": 0.00047872331493415413, "loss": 16.0726, "step": 86660 }, { "epoch": 0.12840035788563064, "grad_norm": 7.375, "learning_rate": 0.0004787183759991466, "loss": 16.1811, "step": 86680 }, { "epoch": 0.12842998417955903, "grad_norm": 7.875, "learning_rate": 0.000478713437064139, "loss": 16.0884, "step": 86700 }, { "epoch": 0.12845961047348742, "grad_norm": 6.65625, "learning_rate": 0.0004787084981291314, "loss": 16.1461, "step": 86720 }, { "epoch": 0.1284892367674158, "grad_norm": 6.28125, "learning_rate": 0.0004787035591941238, "loss": 16.0708, "step": 86740 }, { "epoch": 0.1285188630613442, "grad_norm": 6.65625, "learning_rate": 0.0004786986202591163, "loss": 16.1098, "step": 86760 }, { "epoch": 0.1285484893552726, "grad_norm": 5.78125, "learning_rate": 0.0004786936813241087, "loss": 16.1061, "step": 86780 }, { "epoch": 0.128578115649201, "grad_norm": 6.46875, "learning_rate": 0.00047868874238910116, "loss": 16.1557, "step": 86800 }, { "epoch": 0.12860774194312938, "grad_norm": 6.6875, "learning_rate": 0.00047868380345409355, "loss": 16.111, "step": 86820 }, { "epoch": 0.12863736823705776, "grad_norm": 7.15625, "learning_rate": 0.00047867886451908605, "loss": 16.1384, "step": 86840 }, { "epoch": 0.12866699453098615, "grad_norm": 6.75, "learning_rate": 0.00047867392558407845, "loss": 16.0868, "step": 86860 }, { "epoch": 0.12869662082491454, "grad_norm": 6.65625, "learning_rate": 0.0004786689866490709, "loss": 16.1282, "step": 86880 }, { "epoch": 0.12872624711884292, "grad_norm": 6.84375, "learning_rate": 0.00047866404771406334, "loss": 16.1349, "step": 86900 }, { "epoch": 0.1287558734127713, "grad_norm": 6.8125, "learning_rate": 0.0004786591087790558, "loss": 16.1477, "step": 86920 }, { "epoch": 0.1287854997066997, "grad_norm": 6.65625, "learning_rate": 0.0004786541698440482, "loss": 16.1732, "step": 86940 }, { "epoch": 0.12881512600062808, "grad_norm": 7.71875, "learning_rate": 0.00047864923090904063, "loss": 16.1419, "step": 86960 }, { "epoch": 0.12884475229455647, "grad_norm": 6.5625, "learning_rate": 0.0004786442919740331, "loss": 16.1201, "step": 86980 }, { "epoch": 0.12887437858848486, "grad_norm": 7.8125, "learning_rate": 0.0004786393530390255, "loss": 16.075, "step": 87000 }, { "epoch": 0.12890400488241324, "grad_norm": 6.9375, "learning_rate": 0.0004786344141040179, "loss": 16.1136, "step": 87020 }, { "epoch": 0.12893363117634163, "grad_norm": 6.8125, "learning_rate": 0.00047862947516901037, "loss": 16.1183, "step": 87040 }, { "epoch": 0.12896325747027002, "grad_norm": 6.09375, "learning_rate": 0.0004786245362340028, "loss": 16.0734, "step": 87060 }, { "epoch": 0.1289928837641984, "grad_norm": 6.125, "learning_rate": 0.0004786195972989952, "loss": 16.1581, "step": 87080 }, { "epoch": 0.1290225100581268, "grad_norm": 6.09375, "learning_rate": 0.00047861465836398766, "loss": 16.1307, "step": 87100 }, { "epoch": 0.12905213635205517, "grad_norm": 5.9375, "learning_rate": 0.00047860971942898005, "loss": 16.1619, "step": 87120 }, { "epoch": 0.12908176264598356, "grad_norm": 6.84375, "learning_rate": 0.00047860478049397255, "loss": 16.1827, "step": 87140 }, { "epoch": 0.12911138893991195, "grad_norm": 6.625, "learning_rate": 0.00047859984155896495, "loss": 16.0758, "step": 87160 }, { "epoch": 0.12914101523384033, "grad_norm": 6.78125, "learning_rate": 0.0004785949026239574, "loss": 16.1126, "step": 87180 }, { "epoch": 0.12917064152776872, "grad_norm": 7.0625, "learning_rate": 0.00047858996368894984, "loss": 16.0976, "step": 87200 }, { "epoch": 0.1292002678216971, "grad_norm": 6.46875, "learning_rate": 0.0004785850247539423, "loss": 16.0743, "step": 87220 }, { "epoch": 0.1292298941156255, "grad_norm": 6.8125, "learning_rate": 0.0004785800858189347, "loss": 16.1437, "step": 87240 }, { "epoch": 0.12925952040955388, "grad_norm": 7.78125, "learning_rate": 0.00047857514688392713, "loss": 16.1146, "step": 87260 }, { "epoch": 0.12928914670348227, "grad_norm": 6.96875, "learning_rate": 0.0004785702079489196, "loss": 16.1394, "step": 87280 }, { "epoch": 0.12931877299741065, "grad_norm": 7.125, "learning_rate": 0.000478565269013912, "loss": 16.0493, "step": 87300 }, { "epoch": 0.12934839929133904, "grad_norm": 6.96875, "learning_rate": 0.0004785603300789044, "loss": 16.1101, "step": 87320 }, { "epoch": 0.12937802558526743, "grad_norm": 6.84375, "learning_rate": 0.00047855539114389687, "loss": 16.0971, "step": 87340 }, { "epoch": 0.1294076518791958, "grad_norm": 6.21875, "learning_rate": 0.0004785504522088893, "loss": 16.0971, "step": 87360 }, { "epoch": 0.1294372781731242, "grad_norm": 6.46875, "learning_rate": 0.00047854551327388176, "loss": 16.1321, "step": 87380 }, { "epoch": 0.12946690446705258, "grad_norm": 6.53125, "learning_rate": 0.00047854057433887416, "loss": 16.0754, "step": 87400 }, { "epoch": 0.129496530760981, "grad_norm": 6.90625, "learning_rate": 0.00047853563540386655, "loss": 16.1406, "step": 87420 }, { "epoch": 0.12952615705490939, "grad_norm": 6.75, "learning_rate": 0.00047853069646885905, "loss": 16.1537, "step": 87440 }, { "epoch": 0.12955578334883777, "grad_norm": 7.28125, "learning_rate": 0.00047852575753385145, "loss": 16.1045, "step": 87460 }, { "epoch": 0.12958540964276616, "grad_norm": 6.625, "learning_rate": 0.0004785208185988439, "loss": 16.076, "step": 87480 }, { "epoch": 0.12961503593669454, "grad_norm": 5.84375, "learning_rate": 0.00047851587966383634, "loss": 16.0469, "step": 87500 }, { "epoch": 0.12964466223062293, "grad_norm": 6.21875, "learning_rate": 0.0004785109407288288, "loss": 16.0519, "step": 87520 }, { "epoch": 0.12967428852455132, "grad_norm": 6.375, "learning_rate": 0.0004785060017938212, "loss": 16.1471, "step": 87540 }, { "epoch": 0.1297039148184797, "grad_norm": 6.78125, "learning_rate": 0.00047850106285881363, "loss": 16.0749, "step": 87560 }, { "epoch": 0.1297335411124081, "grad_norm": 6.03125, "learning_rate": 0.0004784961239238061, "loss": 16.104, "step": 87580 }, { "epoch": 0.12976316740633648, "grad_norm": 6.15625, "learning_rate": 0.00047849118498879853, "loss": 16.0497, "step": 87600 }, { "epoch": 0.12979279370026486, "grad_norm": 6.875, "learning_rate": 0.0004784862460537909, "loss": 16.1002, "step": 87620 }, { "epoch": 0.12982241999419325, "grad_norm": 6.4375, "learning_rate": 0.00047848130711878337, "loss": 16.0845, "step": 87640 }, { "epoch": 0.12985204628812164, "grad_norm": 6.71875, "learning_rate": 0.0004784763681837758, "loss": 16.1041, "step": 87660 }, { "epoch": 0.12988167258205002, "grad_norm": 6.125, "learning_rate": 0.00047847142924876826, "loss": 16.0805, "step": 87680 }, { "epoch": 0.1299112988759784, "grad_norm": 6.9375, "learning_rate": 0.00047846649031376066, "loss": 16.0844, "step": 87700 }, { "epoch": 0.1299409251699068, "grad_norm": 6.5625, "learning_rate": 0.0004784615513787531, "loss": 16.1, "step": 87720 }, { "epoch": 0.12997055146383518, "grad_norm": 6.28125, "learning_rate": 0.00047845661244374555, "loss": 16.1036, "step": 87740 }, { "epoch": 0.13000017775776357, "grad_norm": 7.875, "learning_rate": 0.00047845167350873795, "loss": 16.0607, "step": 87760 }, { "epoch": 0.13002980405169196, "grad_norm": 5.90625, "learning_rate": 0.0004784467345737304, "loss": 16.1243, "step": 87780 }, { "epoch": 0.13005943034562034, "grad_norm": 6.6875, "learning_rate": 0.00047844179563872284, "loss": 16.0593, "step": 87800 }, { "epoch": 0.13008905663954873, "grad_norm": 6.53125, "learning_rate": 0.0004784368567037153, "loss": 16.0683, "step": 87820 }, { "epoch": 0.13011868293347711, "grad_norm": 6.46875, "learning_rate": 0.0004784319177687077, "loss": 16.1351, "step": 87840 }, { "epoch": 0.1301483092274055, "grad_norm": 7.375, "learning_rate": 0.00047842697883370013, "loss": 16.1374, "step": 87860 }, { "epoch": 0.1301779355213339, "grad_norm": 6.25, "learning_rate": 0.0004784220398986926, "loss": 16.101, "step": 87880 }, { "epoch": 0.13020756181526227, "grad_norm": 7.9375, "learning_rate": 0.00047841710096368503, "loss": 16.0285, "step": 87900 }, { "epoch": 0.13023718810919066, "grad_norm": 6.34375, "learning_rate": 0.0004784121620286774, "loss": 16.0748, "step": 87920 }, { "epoch": 0.13026681440311905, "grad_norm": 6.71875, "learning_rate": 0.00047840722309366987, "loss": 16.1014, "step": 87940 }, { "epoch": 0.13029644069704743, "grad_norm": 6.375, "learning_rate": 0.0004784022841586623, "loss": 16.0728, "step": 87960 }, { "epoch": 0.13032606699097582, "grad_norm": 7.0625, "learning_rate": 0.00047839734522365476, "loss": 16.0792, "step": 87980 }, { "epoch": 0.1303556932849042, "grad_norm": 6.71875, "learning_rate": 0.00047839240628864716, "loss": 16.0904, "step": 88000 }, { "epoch": 0.1303853195788326, "grad_norm": 6.96875, "learning_rate": 0.0004783874673536396, "loss": 16.1439, "step": 88020 }, { "epoch": 0.130414945872761, "grad_norm": 6.59375, "learning_rate": 0.00047838252841863205, "loss": 16.0969, "step": 88040 }, { "epoch": 0.1304445721666894, "grad_norm": 7.53125, "learning_rate": 0.0004783775894836245, "loss": 16.1171, "step": 88060 }, { "epoch": 0.13047419846061778, "grad_norm": 8.3125, "learning_rate": 0.0004783726505486169, "loss": 16.0381, "step": 88080 }, { "epoch": 0.13050382475454617, "grad_norm": 7.5, "learning_rate": 0.00047836771161360934, "loss": 16.0382, "step": 88100 }, { "epoch": 0.13053345104847455, "grad_norm": 6.9375, "learning_rate": 0.0004783627726786018, "loss": 16.0994, "step": 88120 }, { "epoch": 0.13056307734240294, "grad_norm": 6.90625, "learning_rate": 0.0004783578337435942, "loss": 16.129, "step": 88140 }, { "epoch": 0.13059270363633133, "grad_norm": 7.125, "learning_rate": 0.00047835289480858663, "loss": 16.0095, "step": 88160 }, { "epoch": 0.1306223299302597, "grad_norm": 6.84375, "learning_rate": 0.0004783479558735791, "loss": 16.0916, "step": 88180 }, { "epoch": 0.1306519562241881, "grad_norm": 7.90625, "learning_rate": 0.00047834301693857153, "loss": 16.0117, "step": 88200 }, { "epoch": 0.13068158251811648, "grad_norm": 7.65625, "learning_rate": 0.0004783380780035639, "loss": 16.0946, "step": 88220 }, { "epoch": 0.13071120881204487, "grad_norm": 6.90625, "learning_rate": 0.00047833313906855637, "loss": 16.0911, "step": 88240 }, { "epoch": 0.13074083510597326, "grad_norm": 6.21875, "learning_rate": 0.0004783282001335488, "loss": 16.1057, "step": 88260 }, { "epoch": 0.13077046139990164, "grad_norm": 6.65625, "learning_rate": 0.00047832326119854127, "loss": 16.0787, "step": 88280 }, { "epoch": 0.13080008769383003, "grad_norm": 6.3125, "learning_rate": 0.00047831832226353366, "loss": 16.0581, "step": 88300 }, { "epoch": 0.13082971398775842, "grad_norm": 6.4375, "learning_rate": 0.0004783133833285261, "loss": 16.1227, "step": 88320 }, { "epoch": 0.1308593402816868, "grad_norm": 6.78125, "learning_rate": 0.00047830844439351855, "loss": 16.1202, "step": 88340 }, { "epoch": 0.1308889665756152, "grad_norm": 6.03125, "learning_rate": 0.000478303505458511, "loss": 16.0693, "step": 88360 }, { "epoch": 0.13091859286954358, "grad_norm": 6.46875, "learning_rate": 0.0004782985665235034, "loss": 16.0505, "step": 88380 }, { "epoch": 0.13094821916347196, "grad_norm": 6.71875, "learning_rate": 0.0004782936275884959, "loss": 16.0617, "step": 88400 }, { "epoch": 0.13097784545740035, "grad_norm": 7.25, "learning_rate": 0.0004782886886534883, "loss": 16.0875, "step": 88420 }, { "epoch": 0.13100747175132874, "grad_norm": 6.21875, "learning_rate": 0.0004782837497184807, "loss": 16.0946, "step": 88440 }, { "epoch": 0.13103709804525712, "grad_norm": 6.46875, "learning_rate": 0.00047827881078347313, "loss": 16.0611, "step": 88460 }, { "epoch": 0.1310667243391855, "grad_norm": 7.46875, "learning_rate": 0.0004782738718484656, "loss": 16.0316, "step": 88480 }, { "epoch": 0.1310963506331139, "grad_norm": 6.53125, "learning_rate": 0.00047826893291345803, "loss": 16.1465, "step": 88500 }, { "epoch": 0.13112597692704228, "grad_norm": 6.0625, "learning_rate": 0.0004782639939784504, "loss": 16.1271, "step": 88520 }, { "epoch": 0.13115560322097067, "grad_norm": 6.375, "learning_rate": 0.00047825905504344287, "loss": 16.0312, "step": 88540 }, { "epoch": 0.13118522951489905, "grad_norm": 6.53125, "learning_rate": 0.0004782541161084353, "loss": 16.1015, "step": 88560 }, { "epoch": 0.13121485580882744, "grad_norm": 7.375, "learning_rate": 0.00047824917717342777, "loss": 16.1309, "step": 88580 }, { "epoch": 0.13124448210275583, "grad_norm": 6.9375, "learning_rate": 0.00047824423823842016, "loss": 16.1211, "step": 88600 }, { "epoch": 0.1312741083966842, "grad_norm": 6.28125, "learning_rate": 0.0004782392993034126, "loss": 16.1132, "step": 88620 }, { "epoch": 0.1313037346906126, "grad_norm": 6.125, "learning_rate": 0.00047823436036840505, "loss": 16.062, "step": 88640 }, { "epoch": 0.131333360984541, "grad_norm": 6.90625, "learning_rate": 0.0004782294214333975, "loss": 16.0588, "step": 88660 }, { "epoch": 0.1313629872784694, "grad_norm": 7.59375, "learning_rate": 0.0004782244824983899, "loss": 16.0695, "step": 88680 }, { "epoch": 0.1313926135723978, "grad_norm": 6.40625, "learning_rate": 0.0004782195435633824, "loss": 16.0756, "step": 88700 }, { "epoch": 0.13142223986632617, "grad_norm": 6.28125, "learning_rate": 0.0004782146046283748, "loss": 16.0961, "step": 88720 }, { "epoch": 0.13145186616025456, "grad_norm": 6.53125, "learning_rate": 0.00047820966569336724, "loss": 16.1183, "step": 88740 }, { "epoch": 0.13148149245418295, "grad_norm": 6.5, "learning_rate": 0.00047820472675835963, "loss": 16.0588, "step": 88760 }, { "epoch": 0.13151111874811133, "grad_norm": 7.0625, "learning_rate": 0.0004781997878233521, "loss": 16.0188, "step": 88780 }, { "epoch": 0.13154074504203972, "grad_norm": 7.125, "learning_rate": 0.00047819484888834453, "loss": 16.0652, "step": 88800 }, { "epoch": 0.1315703713359681, "grad_norm": 7.8125, "learning_rate": 0.0004781899099533369, "loss": 16.094, "step": 88820 }, { "epoch": 0.1315999976298965, "grad_norm": 5.46875, "learning_rate": 0.00047818497101832937, "loss": 16.081, "step": 88840 }, { "epoch": 0.13162962392382488, "grad_norm": 6.5625, "learning_rate": 0.0004781800320833218, "loss": 16.0257, "step": 88860 }, { "epoch": 0.13165925021775327, "grad_norm": 6.9375, "learning_rate": 0.00047817509314831427, "loss": 16.0584, "step": 88880 }, { "epoch": 0.13168887651168165, "grad_norm": 6.875, "learning_rate": 0.00047817015421330666, "loss": 16.0718, "step": 88900 }, { "epoch": 0.13171850280561004, "grad_norm": 7.46875, "learning_rate": 0.0004781652152782991, "loss": 16.0725, "step": 88920 }, { "epoch": 0.13174812909953842, "grad_norm": 7.03125, "learning_rate": 0.00047816027634329155, "loss": 16.1068, "step": 88940 }, { "epoch": 0.1317777553934668, "grad_norm": 6.75, "learning_rate": 0.000478155337408284, "loss": 16.0576, "step": 88960 }, { "epoch": 0.1318073816873952, "grad_norm": 7.3125, "learning_rate": 0.0004781503984732764, "loss": 16.0679, "step": 88980 }, { "epoch": 0.13183700798132358, "grad_norm": 6.0625, "learning_rate": 0.0004781454595382689, "loss": 16.037, "step": 89000 }, { "epoch": 0.13186663427525197, "grad_norm": 6.90625, "learning_rate": 0.0004781405206032613, "loss": 16.0445, "step": 89020 }, { "epoch": 0.13189626056918036, "grad_norm": 6.3125, "learning_rate": 0.00047813558166825374, "loss": 16.039, "step": 89040 }, { "epoch": 0.13192588686310874, "grad_norm": 7.46875, "learning_rate": 0.00047813064273324613, "loss": 16.0682, "step": 89060 }, { "epoch": 0.13195551315703713, "grad_norm": 6.4375, "learning_rate": 0.00047812570379823864, "loss": 16.0807, "step": 89080 }, { "epoch": 0.13198513945096552, "grad_norm": 5.78125, "learning_rate": 0.00047812076486323103, "loss": 16.0518, "step": 89100 }, { "epoch": 0.1320147657448939, "grad_norm": 6.125, "learning_rate": 0.0004781158259282234, "loss": 16.0448, "step": 89120 }, { "epoch": 0.1320443920388223, "grad_norm": 7.34375, "learning_rate": 0.00047811088699321587, "loss": 16.0833, "step": 89140 }, { "epoch": 0.13207401833275068, "grad_norm": 7.3125, "learning_rate": 0.0004781059480582083, "loss": 16.0778, "step": 89160 }, { "epoch": 0.13210364462667906, "grad_norm": 6.3125, "learning_rate": 0.00047810100912320077, "loss": 16.0741, "step": 89180 }, { "epoch": 0.13213327092060745, "grad_norm": 6.75, "learning_rate": 0.00047809607018819316, "loss": 16.0538, "step": 89200 }, { "epoch": 0.13216289721453583, "grad_norm": 6.78125, "learning_rate": 0.0004780911312531856, "loss": 16.0552, "step": 89220 }, { "epoch": 0.13219252350846422, "grad_norm": 7.4375, "learning_rate": 0.00047808619231817806, "loss": 16.1538, "step": 89240 }, { "epoch": 0.1322221498023926, "grad_norm": 6.53125, "learning_rate": 0.0004780812533831705, "loss": 16.0342, "step": 89260 }, { "epoch": 0.132251776096321, "grad_norm": 6.4375, "learning_rate": 0.0004780763144481629, "loss": 16.0851, "step": 89280 }, { "epoch": 0.13228140239024938, "grad_norm": 7.15625, "learning_rate": 0.0004780713755131554, "loss": 16.0903, "step": 89300 }, { "epoch": 0.1323110286841778, "grad_norm": 6.0, "learning_rate": 0.0004780664365781478, "loss": 16.0417, "step": 89320 }, { "epoch": 0.13234065497810618, "grad_norm": 5.96875, "learning_rate": 0.00047806149764314024, "loss": 16.0735, "step": 89340 }, { "epoch": 0.13237028127203457, "grad_norm": 7.65625, "learning_rate": 0.00047805655870813263, "loss": 16.019, "step": 89360 }, { "epoch": 0.13239990756596295, "grad_norm": 7.1875, "learning_rate": 0.00047805161977312514, "loss": 16.0661, "step": 89380 }, { "epoch": 0.13242953385989134, "grad_norm": 7.6875, "learning_rate": 0.00047804668083811753, "loss": 16.1395, "step": 89400 }, { "epoch": 0.13245916015381973, "grad_norm": 7.78125, "learning_rate": 0.00047804174190311, "loss": 16.0057, "step": 89420 }, { "epoch": 0.1324887864477481, "grad_norm": 6.5, "learning_rate": 0.00047803680296810237, "loss": 16.0673, "step": 89440 }, { "epoch": 0.1325184127416765, "grad_norm": 6.4375, "learning_rate": 0.0004780318640330948, "loss": 16.1021, "step": 89460 }, { "epoch": 0.1325480390356049, "grad_norm": 6.5, "learning_rate": 0.00047802692509808727, "loss": 16.0441, "step": 89480 }, { "epoch": 0.13257766532953327, "grad_norm": 6.46875, "learning_rate": 0.00047802198616307966, "loss": 16.1323, "step": 89500 }, { "epoch": 0.13260729162346166, "grad_norm": 6.4375, "learning_rate": 0.0004780170472280721, "loss": 16.0594, "step": 89520 }, { "epoch": 0.13263691791739005, "grad_norm": 6.46875, "learning_rate": 0.00047801210829306456, "loss": 16.0408, "step": 89540 }, { "epoch": 0.13266654421131843, "grad_norm": 6.0, "learning_rate": 0.000478007169358057, "loss": 16.0795, "step": 89560 }, { "epoch": 0.13269617050524682, "grad_norm": 6.09375, "learning_rate": 0.0004780022304230494, "loss": 16.0649, "step": 89580 }, { "epoch": 0.1327257967991752, "grad_norm": 6.9375, "learning_rate": 0.0004779972914880419, "loss": 16.0827, "step": 89600 }, { "epoch": 0.1327554230931036, "grad_norm": 6.90625, "learning_rate": 0.0004779923525530343, "loss": 16.0837, "step": 89620 }, { "epoch": 0.13278504938703198, "grad_norm": 7.78125, "learning_rate": 0.00047798741361802674, "loss": 15.9838, "step": 89640 }, { "epoch": 0.13281467568096036, "grad_norm": 7.21875, "learning_rate": 0.00047798247468301913, "loss": 16.1065, "step": 89660 }, { "epoch": 0.13284430197488875, "grad_norm": 6.59375, "learning_rate": 0.00047797753574801164, "loss": 16.0947, "step": 89680 }, { "epoch": 0.13287392826881714, "grad_norm": 7.09375, "learning_rate": 0.00047797259681300403, "loss": 16.0241, "step": 89700 }, { "epoch": 0.13290355456274552, "grad_norm": 6.71875, "learning_rate": 0.0004779676578779965, "loss": 16.0839, "step": 89720 }, { "epoch": 0.1329331808566739, "grad_norm": 6.34375, "learning_rate": 0.00047796271894298887, "loss": 16.0686, "step": 89740 }, { "epoch": 0.1329628071506023, "grad_norm": 6.71875, "learning_rate": 0.0004779577800079814, "loss": 16.0511, "step": 89760 }, { "epoch": 0.13299243344453068, "grad_norm": 6.5, "learning_rate": 0.00047795284107297377, "loss": 16.0528, "step": 89780 }, { "epoch": 0.13302205973845907, "grad_norm": 7.9375, "learning_rate": 0.00047794790213796616, "loss": 16.0406, "step": 89800 }, { "epoch": 0.13305168603238746, "grad_norm": 8.125, "learning_rate": 0.0004779429632029586, "loss": 16.0392, "step": 89820 }, { "epoch": 0.13308131232631584, "grad_norm": 6.03125, "learning_rate": 0.00047793802426795106, "loss": 16.0873, "step": 89840 }, { "epoch": 0.13311093862024423, "grad_norm": 6.625, "learning_rate": 0.0004779330853329435, "loss": 16.0604, "step": 89860 }, { "epoch": 0.13314056491417262, "grad_norm": 6.65625, "learning_rate": 0.0004779281463979359, "loss": 16.0262, "step": 89880 }, { "epoch": 0.133170191208101, "grad_norm": 6.25, "learning_rate": 0.0004779232074629284, "loss": 16.023, "step": 89900 }, { "epoch": 0.1331998175020294, "grad_norm": 7.03125, "learning_rate": 0.0004779182685279208, "loss": 16.0208, "step": 89920 }, { "epoch": 0.13322944379595777, "grad_norm": 7.34375, "learning_rate": 0.00047791332959291324, "loss": 15.9956, "step": 89940 }, { "epoch": 0.1332590700898862, "grad_norm": 7.125, "learning_rate": 0.00047790839065790563, "loss": 16.1001, "step": 89960 }, { "epoch": 0.13328869638381458, "grad_norm": 6.625, "learning_rate": 0.00047790345172289814, "loss": 16.0388, "step": 89980 }, { "epoch": 0.13331832267774296, "grad_norm": 6.59375, "learning_rate": 0.00047789851278789053, "loss": 15.9953, "step": 90000 }, { "epoch": 0.13334794897167135, "grad_norm": 5.96875, "learning_rate": 0.000477893573852883, "loss": 16.009, "step": 90020 }, { "epoch": 0.13337757526559973, "grad_norm": 7.15625, "learning_rate": 0.00047788863491787537, "loss": 16.01, "step": 90040 }, { "epoch": 0.13340720155952812, "grad_norm": 7.03125, "learning_rate": 0.0004778836959828679, "loss": 16.0743, "step": 90060 }, { "epoch": 0.1334368278534565, "grad_norm": 7.71875, "learning_rate": 0.00047787875704786027, "loss": 16.035, "step": 90080 }, { "epoch": 0.1334664541473849, "grad_norm": 7.40625, "learning_rate": 0.00047787381811285266, "loss": 16.0019, "step": 90100 }, { "epoch": 0.13349608044131328, "grad_norm": 6.75, "learning_rate": 0.0004778688791778451, "loss": 16.0189, "step": 90120 }, { "epoch": 0.13352570673524167, "grad_norm": 7.1875, "learning_rate": 0.00047786394024283756, "loss": 16.0577, "step": 90140 }, { "epoch": 0.13355533302917005, "grad_norm": 7.0625, "learning_rate": 0.00047785900130783, "loss": 16.0064, "step": 90160 }, { "epoch": 0.13358495932309844, "grad_norm": 6.09375, "learning_rate": 0.0004778540623728224, "loss": 16.0614, "step": 90180 }, { "epoch": 0.13361458561702683, "grad_norm": 6.3125, "learning_rate": 0.0004778491234378149, "loss": 16.061, "step": 90200 }, { "epoch": 0.1336442119109552, "grad_norm": 7.3125, "learning_rate": 0.0004778441845028073, "loss": 16.0628, "step": 90220 }, { "epoch": 0.1336738382048836, "grad_norm": 7.0625, "learning_rate": 0.00047783924556779974, "loss": 16.0715, "step": 90240 }, { "epoch": 0.13370346449881199, "grad_norm": 6.875, "learning_rate": 0.00047783430663279213, "loss": 16.0733, "step": 90260 }, { "epoch": 0.13373309079274037, "grad_norm": 7.3125, "learning_rate": 0.00047782936769778464, "loss": 16.0501, "step": 90280 }, { "epoch": 0.13376271708666876, "grad_norm": 6.5, "learning_rate": 0.00047782442876277703, "loss": 16.0822, "step": 90300 }, { "epoch": 0.13379234338059715, "grad_norm": 6.84375, "learning_rate": 0.0004778194898277695, "loss": 16.0176, "step": 90320 }, { "epoch": 0.13382196967452553, "grad_norm": 8.125, "learning_rate": 0.00047781455089276187, "loss": 16.0326, "step": 90340 }, { "epoch": 0.13385159596845392, "grad_norm": 6.78125, "learning_rate": 0.0004778096119577544, "loss": 16.0307, "step": 90360 }, { "epoch": 0.1338812222623823, "grad_norm": 5.96875, "learning_rate": 0.00047780467302274677, "loss": 15.9871, "step": 90380 }, { "epoch": 0.1339108485563107, "grad_norm": 6.71875, "learning_rate": 0.0004777997340877392, "loss": 16.0519, "step": 90400 }, { "epoch": 0.13394047485023908, "grad_norm": 7.5625, "learning_rate": 0.0004777947951527316, "loss": 16.0471, "step": 90420 }, { "epoch": 0.13397010114416746, "grad_norm": 5.96875, "learning_rate": 0.0004777898562177241, "loss": 16.0483, "step": 90440 }, { "epoch": 0.13399972743809585, "grad_norm": 6.59375, "learning_rate": 0.0004777849172827165, "loss": 16.0374, "step": 90460 }, { "epoch": 0.13402935373202424, "grad_norm": 7.03125, "learning_rate": 0.0004777799783477089, "loss": 16.0536, "step": 90480 }, { "epoch": 0.13405898002595262, "grad_norm": 6.65625, "learning_rate": 0.0004777750394127014, "loss": 16.0637, "step": 90500 }, { "epoch": 0.134088606319881, "grad_norm": 6.84375, "learning_rate": 0.0004777701004776938, "loss": 16.0353, "step": 90520 }, { "epoch": 0.1341182326138094, "grad_norm": 7.0, "learning_rate": 0.00047776516154268624, "loss": 15.9818, "step": 90540 }, { "epoch": 0.13414785890773778, "grad_norm": 8.125, "learning_rate": 0.00047776022260767863, "loss": 15.9927, "step": 90560 }, { "epoch": 0.13417748520166617, "grad_norm": 6.4375, "learning_rate": 0.00047775528367267114, "loss": 16.0196, "step": 90580 }, { "epoch": 0.13420711149559458, "grad_norm": 5.90625, "learning_rate": 0.00047775034473766353, "loss": 15.9633, "step": 90600 }, { "epoch": 0.13423673778952297, "grad_norm": 6.625, "learning_rate": 0.000477745405802656, "loss": 15.9705, "step": 90620 }, { "epoch": 0.13426636408345136, "grad_norm": 6.0625, "learning_rate": 0.00047774046686764837, "loss": 16.0181, "step": 90640 }, { "epoch": 0.13429599037737974, "grad_norm": 6.6875, "learning_rate": 0.0004777355279326409, "loss": 16.0206, "step": 90660 }, { "epoch": 0.13432561667130813, "grad_norm": 7.0625, "learning_rate": 0.00047773058899763327, "loss": 16.0629, "step": 90680 }, { "epoch": 0.13435524296523652, "grad_norm": 6.78125, "learning_rate": 0.0004777256500626257, "loss": 15.9731, "step": 90700 }, { "epoch": 0.1343848692591649, "grad_norm": 6.3125, "learning_rate": 0.0004777207111276181, "loss": 16.0571, "step": 90720 }, { "epoch": 0.1344144955530933, "grad_norm": 6.5, "learning_rate": 0.0004777157721926106, "loss": 16.0438, "step": 90740 }, { "epoch": 0.13444412184702167, "grad_norm": 6.71875, "learning_rate": 0.000477710833257603, "loss": 15.9468, "step": 90760 }, { "epoch": 0.13447374814095006, "grad_norm": 6.96875, "learning_rate": 0.0004777058943225954, "loss": 16.0521, "step": 90780 }, { "epoch": 0.13450337443487845, "grad_norm": 6.9375, "learning_rate": 0.0004777009553875879, "loss": 16.0237, "step": 90800 }, { "epoch": 0.13453300072880683, "grad_norm": 6.65625, "learning_rate": 0.0004776960164525803, "loss": 15.9838, "step": 90820 }, { "epoch": 0.13456262702273522, "grad_norm": 6.75, "learning_rate": 0.00047769107751757274, "loss": 16.0025, "step": 90840 }, { "epoch": 0.1345922533166636, "grad_norm": 6.21875, "learning_rate": 0.00047768613858256514, "loss": 16.0497, "step": 90860 }, { "epoch": 0.134621879610592, "grad_norm": 6.65625, "learning_rate": 0.00047768119964755764, "loss": 16.0069, "step": 90880 }, { "epoch": 0.13465150590452038, "grad_norm": 6.59375, "learning_rate": 0.00047767626071255003, "loss": 16.0293, "step": 90900 }, { "epoch": 0.13468113219844877, "grad_norm": 6.90625, "learning_rate": 0.0004776713217775425, "loss": 16.0807, "step": 90920 }, { "epoch": 0.13471075849237715, "grad_norm": 6.75, "learning_rate": 0.00047766638284253487, "loss": 16.0399, "step": 90940 }, { "epoch": 0.13474038478630554, "grad_norm": 7.4375, "learning_rate": 0.0004776614439075274, "loss": 16.0325, "step": 90960 }, { "epoch": 0.13477001108023393, "grad_norm": 6.0625, "learning_rate": 0.00047765650497251977, "loss": 16.0465, "step": 90980 }, { "epoch": 0.1347996373741623, "grad_norm": 6.84375, "learning_rate": 0.0004776515660375122, "loss": 16.0153, "step": 91000 }, { "epoch": 0.1348292636680907, "grad_norm": 7.0625, "learning_rate": 0.0004776466271025046, "loss": 15.968, "step": 91020 }, { "epoch": 0.13485888996201909, "grad_norm": 7.53125, "learning_rate": 0.0004776416881674971, "loss": 15.9768, "step": 91040 }, { "epoch": 0.13488851625594747, "grad_norm": 6.21875, "learning_rate": 0.0004776367492324895, "loss": 16.0407, "step": 91060 }, { "epoch": 0.13491814254987586, "grad_norm": 6.5625, "learning_rate": 0.00047763181029748195, "loss": 15.9734, "step": 91080 }, { "epoch": 0.13494776884380424, "grad_norm": 6.4375, "learning_rate": 0.0004776268713624744, "loss": 16.0097, "step": 91100 }, { "epoch": 0.13497739513773263, "grad_norm": 6.40625, "learning_rate": 0.0004776219324274668, "loss": 15.9685, "step": 91120 }, { "epoch": 0.13500702143166102, "grad_norm": 6.09375, "learning_rate": 0.00047761699349245924, "loss": 15.9655, "step": 91140 }, { "epoch": 0.1350366477255894, "grad_norm": 6.03125, "learning_rate": 0.00047761205455745164, "loss": 16.031, "step": 91160 }, { "epoch": 0.1350662740195178, "grad_norm": 7.40625, "learning_rate": 0.00047760711562244414, "loss": 16.0488, "step": 91180 }, { "epoch": 0.13509590031344618, "grad_norm": 7.03125, "learning_rate": 0.00047760217668743653, "loss": 15.9942, "step": 91200 }, { "epoch": 0.1351255266073746, "grad_norm": 7.03125, "learning_rate": 0.000477597237752429, "loss": 16.0135, "step": 91220 }, { "epoch": 0.13515515290130298, "grad_norm": 7.34375, "learning_rate": 0.00047759229881742137, "loss": 16.0146, "step": 91240 }, { "epoch": 0.13518477919523136, "grad_norm": 7.5625, "learning_rate": 0.0004775873598824139, "loss": 15.958, "step": 91260 }, { "epoch": 0.13521440548915975, "grad_norm": 6.03125, "learning_rate": 0.00047758242094740627, "loss": 16.0392, "step": 91280 }, { "epoch": 0.13524403178308814, "grad_norm": 6.6875, "learning_rate": 0.0004775774820123987, "loss": 16.077, "step": 91300 }, { "epoch": 0.13527365807701652, "grad_norm": 6.34375, "learning_rate": 0.0004775725430773911, "loss": 16.0345, "step": 91320 }, { "epoch": 0.1353032843709449, "grad_norm": 6.84375, "learning_rate": 0.0004775676041423836, "loss": 15.9955, "step": 91340 }, { "epoch": 0.1353329106648733, "grad_norm": 6.78125, "learning_rate": 0.000477562665207376, "loss": 16.0064, "step": 91360 }, { "epoch": 0.13536253695880168, "grad_norm": 6.75, "learning_rate": 0.00047755772627236845, "loss": 16.0733, "step": 91380 }, { "epoch": 0.13539216325273007, "grad_norm": 6.75, "learning_rate": 0.0004775527873373609, "loss": 16.0265, "step": 91400 }, { "epoch": 0.13542178954665846, "grad_norm": 5.78125, "learning_rate": 0.00047754784840235335, "loss": 15.9694, "step": 91420 }, { "epoch": 0.13545141584058684, "grad_norm": 6.28125, "learning_rate": 0.00047754290946734574, "loss": 15.9989, "step": 91440 }, { "epoch": 0.13548104213451523, "grad_norm": 7.0625, "learning_rate": 0.00047753797053233814, "loss": 15.9802, "step": 91460 }, { "epoch": 0.13551066842844361, "grad_norm": 6.875, "learning_rate": 0.00047753303159733064, "loss": 15.9944, "step": 91480 }, { "epoch": 0.135540294722372, "grad_norm": 6.1875, "learning_rate": 0.00047752809266232303, "loss": 15.9921, "step": 91500 }, { "epoch": 0.1355699210163004, "grad_norm": 5.84375, "learning_rate": 0.0004775231537273155, "loss": 16.0429, "step": 91520 }, { "epoch": 0.13559954731022877, "grad_norm": 7.53125, "learning_rate": 0.00047751821479230787, "loss": 16.0484, "step": 91540 }, { "epoch": 0.13562917360415716, "grad_norm": 6.15625, "learning_rate": 0.0004775132758573004, "loss": 16.0053, "step": 91560 }, { "epoch": 0.13565879989808555, "grad_norm": 7.0625, "learning_rate": 0.00047750833692229277, "loss": 16.0835, "step": 91580 }, { "epoch": 0.13568842619201393, "grad_norm": 6.125, "learning_rate": 0.0004775033979872852, "loss": 15.9618, "step": 91600 }, { "epoch": 0.13571805248594232, "grad_norm": 6.1875, "learning_rate": 0.0004774984590522776, "loss": 16.1083, "step": 91620 }, { "epoch": 0.1357476787798707, "grad_norm": 6.4375, "learning_rate": 0.0004774935201172701, "loss": 16.0322, "step": 91640 }, { "epoch": 0.1357773050737991, "grad_norm": 7.15625, "learning_rate": 0.0004774885811822625, "loss": 16.0207, "step": 91660 }, { "epoch": 0.13580693136772748, "grad_norm": 6.71875, "learning_rate": 0.00047748364224725495, "loss": 16.0397, "step": 91680 }, { "epoch": 0.13583655766165587, "grad_norm": 6.71875, "learning_rate": 0.0004774787033122474, "loss": 16.0449, "step": 91700 }, { "epoch": 0.13586618395558425, "grad_norm": 6.5625, "learning_rate": 0.00047747376437723985, "loss": 15.9897, "step": 91720 }, { "epoch": 0.13589581024951264, "grad_norm": 6.5625, "learning_rate": 0.00047746882544223224, "loss": 15.996, "step": 91740 }, { "epoch": 0.13592543654344102, "grad_norm": 6.59375, "learning_rate": 0.0004774638865072247, "loss": 16.0702, "step": 91760 }, { "epoch": 0.1359550628373694, "grad_norm": 7.5, "learning_rate": 0.00047745894757221714, "loss": 15.9937, "step": 91780 }, { "epoch": 0.1359846891312978, "grad_norm": 5.96875, "learning_rate": 0.00047745400863720953, "loss": 16.0173, "step": 91800 }, { "epoch": 0.13601431542522618, "grad_norm": 7.1875, "learning_rate": 0.000477449069702202, "loss": 16.0222, "step": 91820 }, { "epoch": 0.13604394171915457, "grad_norm": 6.9375, "learning_rate": 0.0004774441307671944, "loss": 15.9858, "step": 91840 }, { "epoch": 0.13607356801308298, "grad_norm": 6.15625, "learning_rate": 0.0004774391918321869, "loss": 16.0199, "step": 91860 }, { "epoch": 0.13610319430701137, "grad_norm": 7.6875, "learning_rate": 0.00047743425289717927, "loss": 16.0404, "step": 91880 }, { "epoch": 0.13613282060093976, "grad_norm": 5.96875, "learning_rate": 0.0004774293139621717, "loss": 15.9827, "step": 91900 }, { "epoch": 0.13616244689486814, "grad_norm": 5.75, "learning_rate": 0.0004774243750271641, "loss": 15.9697, "step": 91920 }, { "epoch": 0.13619207318879653, "grad_norm": 6.65625, "learning_rate": 0.0004774194360921566, "loss": 16.0088, "step": 91940 }, { "epoch": 0.13622169948272492, "grad_norm": 6.1875, "learning_rate": 0.000477414497157149, "loss": 15.9676, "step": 91960 }, { "epoch": 0.1362513257766533, "grad_norm": 6.28125, "learning_rate": 0.00047740955822214145, "loss": 15.9717, "step": 91980 }, { "epoch": 0.1362809520705817, "grad_norm": 6.34375, "learning_rate": 0.0004774046192871339, "loss": 15.9875, "step": 92000 }, { "epoch": 0.13631057836451008, "grad_norm": 6.65625, "learning_rate": 0.00047739968035212635, "loss": 15.9507, "step": 92020 }, { "epoch": 0.13634020465843846, "grad_norm": 7.09375, "learning_rate": 0.00047739474141711874, "loss": 16.0014, "step": 92040 }, { "epoch": 0.13636983095236685, "grad_norm": 6.75, "learning_rate": 0.0004773898024821112, "loss": 15.9416, "step": 92060 }, { "epoch": 0.13639945724629524, "grad_norm": 6.1875, "learning_rate": 0.00047738486354710364, "loss": 15.9874, "step": 92080 }, { "epoch": 0.13642908354022362, "grad_norm": 5.96875, "learning_rate": 0.0004773799246120961, "loss": 15.9741, "step": 92100 }, { "epoch": 0.136458709834152, "grad_norm": 6.40625, "learning_rate": 0.0004773749856770885, "loss": 16.0328, "step": 92120 }, { "epoch": 0.1364883361280804, "grad_norm": 6.40625, "learning_rate": 0.0004773700467420809, "loss": 16.0521, "step": 92140 }, { "epoch": 0.13651796242200878, "grad_norm": 6.25, "learning_rate": 0.0004773651078070734, "loss": 15.9771, "step": 92160 }, { "epoch": 0.13654758871593717, "grad_norm": 6.5, "learning_rate": 0.00047736016887206577, "loss": 16.007, "step": 92180 }, { "epoch": 0.13657721500986555, "grad_norm": 7.625, "learning_rate": 0.0004773552299370582, "loss": 15.9574, "step": 92200 }, { "epoch": 0.13660684130379394, "grad_norm": 7.25, "learning_rate": 0.0004773502910020506, "loss": 15.9995, "step": 92220 }, { "epoch": 0.13663646759772233, "grad_norm": 8.0625, "learning_rate": 0.0004773453520670431, "loss": 15.9729, "step": 92240 }, { "epoch": 0.13666609389165071, "grad_norm": 6.5, "learning_rate": 0.0004773404131320355, "loss": 15.9253, "step": 92260 }, { "epoch": 0.1366957201855791, "grad_norm": 7.4375, "learning_rate": 0.00047733547419702795, "loss": 15.9889, "step": 92280 }, { "epoch": 0.1367253464795075, "grad_norm": 5.8125, "learning_rate": 0.0004773305352620204, "loss": 16.0373, "step": 92300 }, { "epoch": 0.13675497277343587, "grad_norm": 7.0625, "learning_rate": 0.00047732559632701285, "loss": 15.8976, "step": 92320 }, { "epoch": 0.13678459906736426, "grad_norm": 7.28125, "learning_rate": 0.00047732065739200524, "loss": 15.9445, "step": 92340 }, { "epoch": 0.13681422536129265, "grad_norm": 6.21875, "learning_rate": 0.0004773157184569977, "loss": 15.9649, "step": 92360 }, { "epoch": 0.13684385165522103, "grad_norm": 7.1875, "learning_rate": 0.00047731077952199014, "loss": 15.9297, "step": 92380 }, { "epoch": 0.13687347794914942, "grad_norm": 6.09375, "learning_rate": 0.0004773058405869826, "loss": 15.9745, "step": 92400 }, { "epoch": 0.1369031042430778, "grad_norm": 5.6875, "learning_rate": 0.000477300901651975, "loss": 16.0049, "step": 92420 }, { "epoch": 0.1369327305370062, "grad_norm": 6.34375, "learning_rate": 0.00047729596271696743, "loss": 15.9792, "step": 92440 }, { "epoch": 0.13696235683093458, "grad_norm": 6.78125, "learning_rate": 0.0004772910237819599, "loss": 16.0349, "step": 92460 }, { "epoch": 0.13699198312486296, "grad_norm": 7.8125, "learning_rate": 0.00047728608484695227, "loss": 15.9407, "step": 92480 }, { "epoch": 0.13702160941879138, "grad_norm": 6.90625, "learning_rate": 0.0004772811459119447, "loss": 16.0095, "step": 92500 }, { "epoch": 0.13705123571271977, "grad_norm": 6.46875, "learning_rate": 0.0004772762069769371, "loss": 16.0161, "step": 92520 }, { "epoch": 0.13708086200664815, "grad_norm": 6.9375, "learning_rate": 0.0004772712680419296, "loss": 15.9453, "step": 92540 }, { "epoch": 0.13711048830057654, "grad_norm": 6.5625, "learning_rate": 0.000477266329106922, "loss": 16.0047, "step": 92560 }, { "epoch": 0.13714011459450492, "grad_norm": 5.59375, "learning_rate": 0.00047726139017191445, "loss": 16.0046, "step": 92580 }, { "epoch": 0.1371697408884333, "grad_norm": 6.90625, "learning_rate": 0.0004772564512369069, "loss": 15.9658, "step": 92600 }, { "epoch": 0.1371993671823617, "grad_norm": 6.46875, "learning_rate": 0.00047725151230189935, "loss": 15.9865, "step": 92620 }, { "epoch": 0.13722899347629008, "grad_norm": 6.9375, "learning_rate": 0.00047724657336689174, "loss": 15.9971, "step": 92640 }, { "epoch": 0.13725861977021847, "grad_norm": 5.90625, "learning_rate": 0.0004772416344318842, "loss": 15.9189, "step": 92660 }, { "epoch": 0.13728824606414686, "grad_norm": 7.25, "learning_rate": 0.00047723669549687664, "loss": 15.9472, "step": 92680 }, { "epoch": 0.13731787235807524, "grad_norm": 6.15625, "learning_rate": 0.0004772317565618691, "loss": 15.9382, "step": 92700 }, { "epoch": 0.13734749865200363, "grad_norm": 8.9375, "learning_rate": 0.0004772268176268615, "loss": 15.9936, "step": 92720 }, { "epoch": 0.13737712494593202, "grad_norm": 7.75, "learning_rate": 0.00047722187869185393, "loss": 16.0302, "step": 92740 }, { "epoch": 0.1374067512398604, "grad_norm": 6.53125, "learning_rate": 0.0004772169397568464, "loss": 15.9393, "step": 92760 }, { "epoch": 0.1374363775337888, "grad_norm": 6.15625, "learning_rate": 0.0004772120008218388, "loss": 15.9335, "step": 92780 }, { "epoch": 0.13746600382771718, "grad_norm": 6.875, "learning_rate": 0.0004772070618868312, "loss": 15.9292, "step": 92800 }, { "epoch": 0.13749563012164556, "grad_norm": 7.53125, "learning_rate": 0.0004772021229518236, "loss": 15.9686, "step": 92820 }, { "epoch": 0.13752525641557395, "grad_norm": 7.59375, "learning_rate": 0.0004771971840168161, "loss": 15.884, "step": 92840 }, { "epoch": 0.13755488270950234, "grad_norm": 6.625, "learning_rate": 0.0004771922450818085, "loss": 15.9617, "step": 92860 }, { "epoch": 0.13758450900343072, "grad_norm": 6.71875, "learning_rate": 0.00047718730614680095, "loss": 16.0184, "step": 92880 }, { "epoch": 0.1376141352973591, "grad_norm": 6.90625, "learning_rate": 0.0004771823672117934, "loss": 16.0447, "step": 92900 }, { "epoch": 0.1376437615912875, "grad_norm": 6.8125, "learning_rate": 0.00047717742827678585, "loss": 15.9832, "step": 92920 }, { "epoch": 0.13767338788521588, "grad_norm": 7.03125, "learning_rate": 0.00047717248934177824, "loss": 15.9968, "step": 92940 }, { "epoch": 0.13770301417914427, "grad_norm": 6.46875, "learning_rate": 0.0004771675504067707, "loss": 16.0044, "step": 92960 }, { "epoch": 0.13773264047307265, "grad_norm": 7.09375, "learning_rate": 0.00047716261147176314, "loss": 16.0231, "step": 92980 }, { "epoch": 0.13776226676700104, "grad_norm": 6.8125, "learning_rate": 0.0004771576725367556, "loss": 15.9875, "step": 93000 }, { "epoch": 0.13779189306092943, "grad_norm": 7.46875, "learning_rate": 0.000477152733601748, "loss": 15.9861, "step": 93020 }, { "epoch": 0.1378215193548578, "grad_norm": 6.9375, "learning_rate": 0.00047714779466674043, "loss": 16.0143, "step": 93040 }, { "epoch": 0.1378511456487862, "grad_norm": 7.8125, "learning_rate": 0.0004771428557317329, "loss": 16.0079, "step": 93060 }, { "epoch": 0.1378807719427146, "grad_norm": 7.0625, "learning_rate": 0.0004771379167967253, "loss": 16.0306, "step": 93080 }, { "epoch": 0.13791039823664297, "grad_norm": 6.5, "learning_rate": 0.0004771329778617177, "loss": 15.9793, "step": 93100 }, { "epoch": 0.13794002453057136, "grad_norm": 6.375, "learning_rate": 0.00047712803892671017, "loss": 15.9984, "step": 93120 }, { "epoch": 0.13796965082449977, "grad_norm": 6.125, "learning_rate": 0.0004771230999917026, "loss": 15.9885, "step": 93140 }, { "epoch": 0.13799927711842816, "grad_norm": 5.90625, "learning_rate": 0.000477118161056695, "loss": 15.9994, "step": 93160 }, { "epoch": 0.13802890341235655, "grad_norm": 6.78125, "learning_rate": 0.00047711322212168745, "loss": 15.9989, "step": 93180 }, { "epoch": 0.13805852970628493, "grad_norm": 6.03125, "learning_rate": 0.0004771082831866799, "loss": 16.0155, "step": 93200 }, { "epoch": 0.13808815600021332, "grad_norm": 6.90625, "learning_rate": 0.00047710334425167235, "loss": 16.001, "step": 93220 }, { "epoch": 0.1381177822941417, "grad_norm": 6.21875, "learning_rate": 0.00047709840531666474, "loss": 15.9548, "step": 93240 }, { "epoch": 0.1381474085880701, "grad_norm": 6.53125, "learning_rate": 0.0004770934663816572, "loss": 15.9404, "step": 93260 }, { "epoch": 0.13817703488199848, "grad_norm": 6.28125, "learning_rate": 0.00047708852744664964, "loss": 16.0065, "step": 93280 }, { "epoch": 0.13820666117592686, "grad_norm": 6.90625, "learning_rate": 0.0004770835885116421, "loss": 15.9926, "step": 93300 }, { "epoch": 0.13823628746985525, "grad_norm": 6.375, "learning_rate": 0.0004770786495766345, "loss": 15.9589, "step": 93320 }, { "epoch": 0.13826591376378364, "grad_norm": 7.1875, "learning_rate": 0.00047707371064162693, "loss": 15.9406, "step": 93340 }, { "epoch": 0.13829554005771202, "grad_norm": 7.25, "learning_rate": 0.0004770687717066194, "loss": 15.9866, "step": 93360 }, { "epoch": 0.1383251663516404, "grad_norm": 6.53125, "learning_rate": 0.0004770638327716118, "loss": 16.0541, "step": 93380 }, { "epoch": 0.1383547926455688, "grad_norm": 6.25, "learning_rate": 0.0004770588938366042, "loss": 16.017, "step": 93400 }, { "epoch": 0.13838441893949718, "grad_norm": 5.875, "learning_rate": 0.00047705395490159667, "loss": 15.94, "step": 93420 }, { "epoch": 0.13841404523342557, "grad_norm": 6.9375, "learning_rate": 0.0004770490159665891, "loss": 15.9397, "step": 93440 }, { "epoch": 0.13844367152735396, "grad_norm": 6.5625, "learning_rate": 0.00047704407703158156, "loss": 15.999, "step": 93460 }, { "epoch": 0.13847329782128234, "grad_norm": 6.46875, "learning_rate": 0.00047703913809657396, "loss": 16.0074, "step": 93480 }, { "epoch": 0.13850292411521073, "grad_norm": 6.15625, "learning_rate": 0.0004770341991615664, "loss": 15.9781, "step": 93500 }, { "epoch": 0.13853255040913912, "grad_norm": 5.96875, "learning_rate": 0.00047702926022655885, "loss": 15.9099, "step": 93520 }, { "epoch": 0.1385621767030675, "grad_norm": 6.75, "learning_rate": 0.00047702432129155124, "loss": 16.0039, "step": 93540 }, { "epoch": 0.1385918029969959, "grad_norm": 6.875, "learning_rate": 0.0004770193823565437, "loss": 15.9684, "step": 93560 }, { "epoch": 0.13862142929092428, "grad_norm": 5.90625, "learning_rate": 0.00047701444342153614, "loss": 15.9484, "step": 93580 }, { "epoch": 0.13865105558485266, "grad_norm": 6.40625, "learning_rate": 0.0004770095044865286, "loss": 15.9525, "step": 93600 }, { "epoch": 0.13868068187878105, "grad_norm": 7.15625, "learning_rate": 0.000477004565551521, "loss": 15.93, "step": 93620 }, { "epoch": 0.13871030817270943, "grad_norm": 5.5, "learning_rate": 0.00047699962661651343, "loss": 15.974, "step": 93640 }, { "epoch": 0.13873993446663782, "grad_norm": 6.15625, "learning_rate": 0.0004769946876815059, "loss": 15.9693, "step": 93660 }, { "epoch": 0.1387695607605662, "grad_norm": 6.5625, "learning_rate": 0.0004769897487464983, "loss": 15.977, "step": 93680 }, { "epoch": 0.1387991870544946, "grad_norm": 8.1875, "learning_rate": 0.0004769848098114907, "loss": 15.9688, "step": 93700 }, { "epoch": 0.13882881334842298, "grad_norm": 7.40625, "learning_rate": 0.00047697987087648317, "loss": 15.9438, "step": 93720 }, { "epoch": 0.13885843964235137, "grad_norm": 7.125, "learning_rate": 0.0004769749319414756, "loss": 15.8898, "step": 93740 }, { "epoch": 0.13888806593627975, "grad_norm": 6.21875, "learning_rate": 0.00047696999300646806, "loss": 15.9557, "step": 93760 }, { "epoch": 0.13891769223020817, "grad_norm": 7.09375, "learning_rate": 0.00047696505407146046, "loss": 15.9536, "step": 93780 }, { "epoch": 0.13894731852413655, "grad_norm": 6.71875, "learning_rate": 0.00047696011513645296, "loss": 15.9966, "step": 93800 }, { "epoch": 0.13897694481806494, "grad_norm": 6.625, "learning_rate": 0.00047695517620144535, "loss": 15.9667, "step": 93820 }, { "epoch": 0.13900657111199333, "grad_norm": 7.28125, "learning_rate": 0.00047695023726643774, "loss": 15.9713, "step": 93840 }, { "epoch": 0.1390361974059217, "grad_norm": 6.9375, "learning_rate": 0.0004769452983314302, "loss": 15.9917, "step": 93860 }, { "epoch": 0.1390658236998501, "grad_norm": 8.125, "learning_rate": 0.00047694035939642264, "loss": 15.9838, "step": 93880 }, { "epoch": 0.13909544999377849, "grad_norm": 6.5625, "learning_rate": 0.0004769354204614151, "loss": 16.014, "step": 93900 }, { "epoch": 0.13912507628770687, "grad_norm": 7.1875, "learning_rate": 0.0004769304815264075, "loss": 15.9747, "step": 93920 }, { "epoch": 0.13915470258163526, "grad_norm": 5.75, "learning_rate": 0.00047692554259139993, "loss": 15.9309, "step": 93940 }, { "epoch": 0.13918432887556365, "grad_norm": 6.5, "learning_rate": 0.0004769206036563924, "loss": 15.9635, "step": 93960 }, { "epoch": 0.13921395516949203, "grad_norm": 6.5, "learning_rate": 0.0004769156647213848, "loss": 15.9224, "step": 93980 }, { "epoch": 0.13924358146342042, "grad_norm": 8.0625, "learning_rate": 0.0004769107257863772, "loss": 16.0037, "step": 94000 }, { "epoch": 0.1392732077573488, "grad_norm": 6.5, "learning_rate": 0.00047690578685136967, "loss": 15.9881, "step": 94020 }, { "epoch": 0.1393028340512772, "grad_norm": 5.84375, "learning_rate": 0.0004769008479163621, "loss": 15.9602, "step": 94040 }, { "epoch": 0.13933246034520558, "grad_norm": 6.125, "learning_rate": 0.00047689590898135456, "loss": 15.9185, "step": 94060 }, { "epoch": 0.13936208663913396, "grad_norm": 6.125, "learning_rate": 0.00047689097004634696, "loss": 15.9615, "step": 94080 }, { "epoch": 0.13939171293306235, "grad_norm": 6.46875, "learning_rate": 0.00047688603111133946, "loss": 15.9471, "step": 94100 }, { "epoch": 0.13942133922699074, "grad_norm": 6.65625, "learning_rate": 0.00047688109217633185, "loss": 15.9587, "step": 94120 }, { "epoch": 0.13945096552091912, "grad_norm": 6.28125, "learning_rate": 0.0004768761532413243, "loss": 15.9751, "step": 94140 }, { "epoch": 0.1394805918148475, "grad_norm": 7.9375, "learning_rate": 0.0004768712143063167, "loss": 15.9658, "step": 94160 }, { "epoch": 0.1395102181087759, "grad_norm": 6.0, "learning_rate": 0.00047686627537130914, "loss": 15.9596, "step": 94180 }, { "epoch": 0.13953984440270428, "grad_norm": 8.1875, "learning_rate": 0.0004768613364363016, "loss": 15.9488, "step": 94200 }, { "epoch": 0.13956947069663267, "grad_norm": 6.0, "learning_rate": 0.000476856397501294, "loss": 15.9295, "step": 94220 }, { "epoch": 0.13959909699056106, "grad_norm": 7.40625, "learning_rate": 0.00047685145856628643, "loss": 16.0092, "step": 94240 }, { "epoch": 0.13962872328448944, "grad_norm": 6.8125, "learning_rate": 0.0004768465196312789, "loss": 15.9665, "step": 94260 }, { "epoch": 0.13965834957841783, "grad_norm": 6.34375, "learning_rate": 0.0004768415806962713, "loss": 15.9973, "step": 94280 }, { "epoch": 0.13968797587234622, "grad_norm": 7.5, "learning_rate": 0.0004768366417612637, "loss": 16.0028, "step": 94300 }, { "epoch": 0.1397176021662746, "grad_norm": 6.15625, "learning_rate": 0.00047683170282625617, "loss": 16.0016, "step": 94320 }, { "epoch": 0.139747228460203, "grad_norm": 6.40625, "learning_rate": 0.0004768267638912486, "loss": 15.9076, "step": 94340 }, { "epoch": 0.13977685475413137, "grad_norm": 6.46875, "learning_rate": 0.00047682182495624106, "loss": 15.9703, "step": 94360 }, { "epoch": 0.13980648104805976, "grad_norm": 7.40625, "learning_rate": 0.00047681688602123346, "loss": 15.8935, "step": 94380 }, { "epoch": 0.13983610734198818, "grad_norm": 7.3125, "learning_rate": 0.00047681194708622596, "loss": 15.895, "step": 94400 }, { "epoch": 0.13986573363591656, "grad_norm": 6.0625, "learning_rate": 0.00047680700815121835, "loss": 15.9275, "step": 94420 }, { "epoch": 0.13989535992984495, "grad_norm": 6.5, "learning_rate": 0.0004768020692162108, "loss": 15.9594, "step": 94440 }, { "epoch": 0.13992498622377333, "grad_norm": 6.75, "learning_rate": 0.0004767971302812032, "loss": 16.0257, "step": 94460 }, { "epoch": 0.13995461251770172, "grad_norm": 6.90625, "learning_rate": 0.0004767921913461957, "loss": 15.9665, "step": 94480 }, { "epoch": 0.1399842388116301, "grad_norm": 6.84375, "learning_rate": 0.0004767872524111881, "loss": 15.8739, "step": 94500 }, { "epoch": 0.1400138651055585, "grad_norm": 6.53125, "learning_rate": 0.0004767823134761805, "loss": 15.9884, "step": 94520 }, { "epoch": 0.14004349139948688, "grad_norm": 6.53125, "learning_rate": 0.00047677737454117293, "loss": 15.9545, "step": 94540 }, { "epoch": 0.14007311769341527, "grad_norm": 7.8125, "learning_rate": 0.0004767724356061654, "loss": 15.9704, "step": 94560 }, { "epoch": 0.14010274398734365, "grad_norm": 6.15625, "learning_rate": 0.0004767674966711578, "loss": 15.9091, "step": 94580 }, { "epoch": 0.14013237028127204, "grad_norm": 5.75, "learning_rate": 0.0004767625577361502, "loss": 15.9386, "step": 94600 }, { "epoch": 0.14016199657520043, "grad_norm": 5.65625, "learning_rate": 0.00047675761880114267, "loss": 15.9673, "step": 94620 }, { "epoch": 0.1401916228691288, "grad_norm": 6.65625, "learning_rate": 0.0004767526798661351, "loss": 16.0197, "step": 94640 }, { "epoch": 0.1402212491630572, "grad_norm": 6.375, "learning_rate": 0.00047674774093112756, "loss": 15.9528, "step": 94660 }, { "epoch": 0.14025087545698559, "grad_norm": 6.90625, "learning_rate": 0.00047674280199611996, "loss": 15.984, "step": 94680 }, { "epoch": 0.14028050175091397, "grad_norm": 6.6875, "learning_rate": 0.00047673786306111246, "loss": 16.0564, "step": 94700 }, { "epoch": 0.14031012804484236, "grad_norm": 5.75, "learning_rate": 0.00047673292412610485, "loss": 15.9788, "step": 94720 }, { "epoch": 0.14033975433877074, "grad_norm": 6.21875, "learning_rate": 0.0004767279851910973, "loss": 15.9594, "step": 94740 }, { "epoch": 0.14036938063269913, "grad_norm": 7.0, "learning_rate": 0.0004767230462560897, "loss": 15.9897, "step": 94760 }, { "epoch": 0.14039900692662752, "grad_norm": 6.46875, "learning_rate": 0.0004767181073210822, "loss": 15.9421, "step": 94780 }, { "epoch": 0.1404286332205559, "grad_norm": 7.15625, "learning_rate": 0.0004767131683860746, "loss": 15.996, "step": 94800 }, { "epoch": 0.1404582595144843, "grad_norm": 7.0, "learning_rate": 0.00047670822945106704, "loss": 15.9606, "step": 94820 }, { "epoch": 0.14048788580841268, "grad_norm": 6.0625, "learning_rate": 0.00047670329051605943, "loss": 15.9885, "step": 94840 }, { "epoch": 0.14051751210234106, "grad_norm": 6.59375, "learning_rate": 0.0004766983515810519, "loss": 15.9617, "step": 94860 }, { "epoch": 0.14054713839626945, "grad_norm": 5.875, "learning_rate": 0.0004766934126460443, "loss": 15.9406, "step": 94880 }, { "epoch": 0.14057676469019784, "grad_norm": 6.78125, "learning_rate": 0.0004766884737110367, "loss": 15.9714, "step": 94900 }, { "epoch": 0.14060639098412622, "grad_norm": 7.0625, "learning_rate": 0.00047668353477602917, "loss": 15.9234, "step": 94920 }, { "epoch": 0.1406360172780546, "grad_norm": 7.15625, "learning_rate": 0.0004766785958410216, "loss": 15.9531, "step": 94940 }, { "epoch": 0.140665643571983, "grad_norm": 6.34375, "learning_rate": 0.00047667365690601406, "loss": 15.904, "step": 94960 }, { "epoch": 0.14069526986591138, "grad_norm": 6.625, "learning_rate": 0.00047666871797100646, "loss": 15.907, "step": 94980 }, { "epoch": 0.14072489615983977, "grad_norm": 7.125, "learning_rate": 0.00047666377903599896, "loss": 15.9427, "step": 95000 }, { "epoch": 0.14075452245376815, "grad_norm": 6.6875, "learning_rate": 0.00047665884010099135, "loss": 15.9363, "step": 95020 }, { "epoch": 0.14078414874769657, "grad_norm": 6.65625, "learning_rate": 0.0004766539011659838, "loss": 15.9379, "step": 95040 }, { "epoch": 0.14081377504162496, "grad_norm": 7.5, "learning_rate": 0.0004766489622309762, "loss": 15.8876, "step": 95060 }, { "epoch": 0.14084340133555334, "grad_norm": 6.875, "learning_rate": 0.0004766440232959687, "loss": 15.976, "step": 95080 }, { "epoch": 0.14087302762948173, "grad_norm": 6.46875, "learning_rate": 0.0004766390843609611, "loss": 15.9931, "step": 95100 }, { "epoch": 0.14090265392341011, "grad_norm": 6.96875, "learning_rate": 0.00047663414542595354, "loss": 15.9063, "step": 95120 }, { "epoch": 0.1409322802173385, "grad_norm": 7.15625, "learning_rate": 0.00047662920649094593, "loss": 15.9497, "step": 95140 }, { "epoch": 0.1409619065112669, "grad_norm": 6.6875, "learning_rate": 0.00047662426755593843, "loss": 15.9687, "step": 95160 }, { "epoch": 0.14099153280519527, "grad_norm": 6.78125, "learning_rate": 0.0004766193286209308, "loss": 15.9969, "step": 95180 }, { "epoch": 0.14102115909912366, "grad_norm": 6.65625, "learning_rate": 0.0004766143896859232, "loss": 15.9325, "step": 95200 }, { "epoch": 0.14105078539305205, "grad_norm": 6.0, "learning_rate": 0.00047660945075091567, "loss": 15.9289, "step": 95220 }, { "epoch": 0.14108041168698043, "grad_norm": 7.1875, "learning_rate": 0.0004766045118159081, "loss": 15.9302, "step": 95240 }, { "epoch": 0.14111003798090882, "grad_norm": 6.71875, "learning_rate": 0.00047659957288090056, "loss": 15.9066, "step": 95260 }, { "epoch": 0.1411396642748372, "grad_norm": 7.40625, "learning_rate": 0.00047659463394589296, "loss": 15.9399, "step": 95280 }, { "epoch": 0.1411692905687656, "grad_norm": 6.15625, "learning_rate": 0.00047658969501088546, "loss": 15.9334, "step": 95300 }, { "epoch": 0.14119891686269398, "grad_norm": 6.5, "learning_rate": 0.00047658475607587785, "loss": 15.9007, "step": 95320 }, { "epoch": 0.14122854315662237, "grad_norm": 6.40625, "learning_rate": 0.0004765798171408703, "loss": 15.9388, "step": 95340 }, { "epoch": 0.14125816945055075, "grad_norm": 8.125, "learning_rate": 0.0004765748782058627, "loss": 15.9264, "step": 95360 }, { "epoch": 0.14128779574447914, "grad_norm": 7.53125, "learning_rate": 0.0004765699392708552, "loss": 15.9991, "step": 95380 }, { "epoch": 0.14131742203840753, "grad_norm": 7.0625, "learning_rate": 0.0004765650003358476, "loss": 15.9428, "step": 95400 }, { "epoch": 0.1413470483323359, "grad_norm": 6.125, "learning_rate": 0.00047656006140084004, "loss": 15.9567, "step": 95420 }, { "epoch": 0.1413766746262643, "grad_norm": 6.53125, "learning_rate": 0.00047655512246583243, "loss": 15.963, "step": 95440 }, { "epoch": 0.14140630092019268, "grad_norm": 6.9375, "learning_rate": 0.00047655018353082493, "loss": 16.0006, "step": 95460 }, { "epoch": 0.14143592721412107, "grad_norm": 7.03125, "learning_rate": 0.0004765452445958173, "loss": 15.8883, "step": 95480 }, { "epoch": 0.14146555350804946, "grad_norm": 6.25, "learning_rate": 0.0004765403056608098, "loss": 15.895, "step": 95500 }, { "epoch": 0.14149517980197784, "grad_norm": 5.96875, "learning_rate": 0.00047653536672580217, "loss": 15.9855, "step": 95520 }, { "epoch": 0.14152480609590623, "grad_norm": 6.59375, "learning_rate": 0.0004765304277907946, "loss": 15.9098, "step": 95540 }, { "epoch": 0.14155443238983462, "grad_norm": 6.0625, "learning_rate": 0.00047652548885578706, "loss": 15.9547, "step": 95560 }, { "epoch": 0.141584058683763, "grad_norm": 6.34375, "learning_rate": 0.00047652054992077946, "loss": 15.8905, "step": 95580 }, { "epoch": 0.1416136849776914, "grad_norm": 6.5625, "learning_rate": 0.00047651561098577196, "loss": 15.9797, "step": 95600 }, { "epoch": 0.14164331127161978, "grad_norm": 6.34375, "learning_rate": 0.00047651067205076435, "loss": 15.9749, "step": 95620 }, { "epoch": 0.14167293756554816, "grad_norm": 7.25, "learning_rate": 0.0004765057331157568, "loss": 15.967, "step": 95640 }, { "epoch": 0.14170256385947655, "grad_norm": 7.03125, "learning_rate": 0.0004765007941807492, "loss": 15.9295, "step": 95660 }, { "epoch": 0.14173219015340496, "grad_norm": 6.96875, "learning_rate": 0.0004764958552457417, "loss": 15.9417, "step": 95680 }, { "epoch": 0.14176181644733335, "grad_norm": 6.1875, "learning_rate": 0.0004764909163107341, "loss": 15.9317, "step": 95700 }, { "epoch": 0.14179144274126174, "grad_norm": 7.65625, "learning_rate": 0.00047648597737572654, "loss": 15.958, "step": 95720 }, { "epoch": 0.14182106903519012, "grad_norm": 6.3125, "learning_rate": 0.00047648103844071893, "loss": 15.9309, "step": 95740 }, { "epoch": 0.1418506953291185, "grad_norm": 6.9375, "learning_rate": 0.00047647609950571143, "loss": 15.9321, "step": 95760 }, { "epoch": 0.1418803216230469, "grad_norm": 6.03125, "learning_rate": 0.0004764711605707038, "loss": 15.8771, "step": 95780 }, { "epoch": 0.14190994791697528, "grad_norm": 6.65625, "learning_rate": 0.0004764662216356963, "loss": 15.8825, "step": 95800 }, { "epoch": 0.14193957421090367, "grad_norm": 6.0, "learning_rate": 0.00047646128270068867, "loss": 16.0099, "step": 95820 }, { "epoch": 0.14196920050483205, "grad_norm": 7.125, "learning_rate": 0.00047645634376568117, "loss": 15.9222, "step": 95840 }, { "epoch": 0.14199882679876044, "grad_norm": 6.40625, "learning_rate": 0.00047645140483067356, "loss": 15.952, "step": 95860 }, { "epoch": 0.14202845309268883, "grad_norm": 6.59375, "learning_rate": 0.00047644646589566596, "loss": 15.954, "step": 95880 }, { "epoch": 0.14205807938661721, "grad_norm": 6.46875, "learning_rate": 0.00047644152696065846, "loss": 15.9364, "step": 95900 }, { "epoch": 0.1420877056805456, "grad_norm": 6.46875, "learning_rate": 0.00047643658802565085, "loss": 15.9383, "step": 95920 }, { "epoch": 0.142117331974474, "grad_norm": 6.21875, "learning_rate": 0.0004764316490906433, "loss": 15.9669, "step": 95940 }, { "epoch": 0.14214695826840237, "grad_norm": 6.0, "learning_rate": 0.0004764267101556357, "loss": 15.8971, "step": 95960 }, { "epoch": 0.14217658456233076, "grad_norm": 6.53125, "learning_rate": 0.0004764217712206282, "loss": 15.9354, "step": 95980 }, { "epoch": 0.14220621085625915, "grad_norm": 6.25, "learning_rate": 0.0004764168322856206, "loss": 15.9104, "step": 96000 }, { "epoch": 0.14223583715018753, "grad_norm": 5.9375, "learning_rate": 0.00047641189335061304, "loss": 15.9515, "step": 96020 }, { "epoch": 0.14226546344411592, "grad_norm": 6.03125, "learning_rate": 0.00047640695441560543, "loss": 15.9652, "step": 96040 }, { "epoch": 0.1422950897380443, "grad_norm": 7.71875, "learning_rate": 0.00047640201548059793, "loss": 15.9641, "step": 96060 }, { "epoch": 0.1423247160319727, "grad_norm": 6.8125, "learning_rate": 0.00047639707654559033, "loss": 15.9395, "step": 96080 }, { "epoch": 0.14235434232590108, "grad_norm": 6.5625, "learning_rate": 0.0004763921376105828, "loss": 15.9165, "step": 96100 }, { "epoch": 0.14238396861982947, "grad_norm": 7.46875, "learning_rate": 0.00047638719867557517, "loss": 15.9017, "step": 96120 }, { "epoch": 0.14241359491375785, "grad_norm": 6.5, "learning_rate": 0.00047638225974056767, "loss": 15.9457, "step": 96140 }, { "epoch": 0.14244322120768624, "grad_norm": 6.28125, "learning_rate": 0.00047637732080556006, "loss": 15.913, "step": 96160 }, { "epoch": 0.14247284750161462, "grad_norm": 6.21875, "learning_rate": 0.00047637238187055246, "loss": 15.8915, "step": 96180 }, { "epoch": 0.142502473795543, "grad_norm": 6.75, "learning_rate": 0.00047636744293554496, "loss": 15.9039, "step": 96200 }, { "epoch": 0.1425321000894714, "grad_norm": 6.65625, "learning_rate": 0.00047636250400053735, "loss": 15.9248, "step": 96220 }, { "epoch": 0.14256172638339978, "grad_norm": 6.5625, "learning_rate": 0.0004763575650655298, "loss": 15.9394, "step": 96240 }, { "epoch": 0.14259135267732817, "grad_norm": 6.96875, "learning_rate": 0.0004763526261305222, "loss": 15.9583, "step": 96260 }, { "epoch": 0.14262097897125656, "grad_norm": 6.15625, "learning_rate": 0.0004763476871955147, "loss": 15.8912, "step": 96280 }, { "epoch": 0.14265060526518494, "grad_norm": 7.25, "learning_rate": 0.0004763427482605071, "loss": 15.9595, "step": 96300 }, { "epoch": 0.14268023155911336, "grad_norm": 6.4375, "learning_rate": 0.00047633780932549954, "loss": 15.9521, "step": 96320 }, { "epoch": 0.14270985785304174, "grad_norm": 6.5, "learning_rate": 0.00047633287039049193, "loss": 15.9471, "step": 96340 }, { "epoch": 0.14273948414697013, "grad_norm": 7.84375, "learning_rate": 0.00047632793145548443, "loss": 15.9345, "step": 96360 }, { "epoch": 0.14276911044089852, "grad_norm": 7.09375, "learning_rate": 0.00047632299252047683, "loss": 15.9645, "step": 96380 }, { "epoch": 0.1427987367348269, "grad_norm": 6.4375, "learning_rate": 0.0004763180535854693, "loss": 15.9556, "step": 96400 }, { "epoch": 0.1428283630287553, "grad_norm": 7.03125, "learning_rate": 0.00047631311465046167, "loss": 15.8915, "step": 96420 }, { "epoch": 0.14285798932268368, "grad_norm": 6.375, "learning_rate": 0.00047630817571545417, "loss": 15.9162, "step": 96440 }, { "epoch": 0.14288761561661206, "grad_norm": 6.5625, "learning_rate": 0.00047630323678044656, "loss": 15.9399, "step": 96460 }, { "epoch": 0.14291724191054045, "grad_norm": 6.59375, "learning_rate": 0.000476298297845439, "loss": 15.954, "step": 96480 }, { "epoch": 0.14294686820446884, "grad_norm": 6.28125, "learning_rate": 0.00047629335891043146, "loss": 15.9193, "step": 96500 }, { "epoch": 0.14297649449839722, "grad_norm": 5.84375, "learning_rate": 0.0004762884199754239, "loss": 15.9811, "step": 96520 }, { "epoch": 0.1430061207923256, "grad_norm": 6.34375, "learning_rate": 0.0004762834810404163, "loss": 15.9729, "step": 96540 }, { "epoch": 0.143035747086254, "grad_norm": 6.625, "learning_rate": 0.0004762785421054087, "loss": 15.9127, "step": 96560 }, { "epoch": 0.14306537338018238, "grad_norm": 6.40625, "learning_rate": 0.0004762736031704012, "loss": 15.9495, "step": 96580 }, { "epoch": 0.14309499967411077, "grad_norm": 5.96875, "learning_rate": 0.0004762686642353936, "loss": 15.8956, "step": 96600 }, { "epoch": 0.14312462596803915, "grad_norm": 6.53125, "learning_rate": 0.00047626372530038604, "loss": 15.8788, "step": 96620 }, { "epoch": 0.14315425226196754, "grad_norm": 6.1875, "learning_rate": 0.00047625878636537843, "loss": 15.8981, "step": 96640 }, { "epoch": 0.14318387855589593, "grad_norm": 5.9375, "learning_rate": 0.00047625384743037093, "loss": 15.9403, "step": 96660 }, { "epoch": 0.1432135048498243, "grad_norm": 6.25, "learning_rate": 0.00047624890849536333, "loss": 15.8912, "step": 96680 }, { "epoch": 0.1432431311437527, "grad_norm": 6.9375, "learning_rate": 0.0004762439695603558, "loss": 15.9287, "step": 96700 }, { "epoch": 0.1432727574376811, "grad_norm": 6.0625, "learning_rate": 0.00047623903062534817, "loss": 15.9405, "step": 96720 }, { "epoch": 0.14330238373160947, "grad_norm": 6.59375, "learning_rate": 0.00047623409169034067, "loss": 15.9437, "step": 96740 }, { "epoch": 0.14333201002553786, "grad_norm": 6.53125, "learning_rate": 0.00047622915275533306, "loss": 15.9515, "step": 96760 }, { "epoch": 0.14336163631946625, "grad_norm": 6.53125, "learning_rate": 0.0004762242138203255, "loss": 15.9413, "step": 96780 }, { "epoch": 0.14339126261339463, "grad_norm": 6.5, "learning_rate": 0.00047621927488531796, "loss": 15.9623, "step": 96800 }, { "epoch": 0.14342088890732302, "grad_norm": 6.28125, "learning_rate": 0.0004762143359503104, "loss": 15.9309, "step": 96820 }, { "epoch": 0.1434505152012514, "grad_norm": 6.625, "learning_rate": 0.0004762093970153028, "loss": 15.8799, "step": 96840 }, { "epoch": 0.1434801414951798, "grad_norm": 6.1875, "learning_rate": 0.0004762044580802952, "loss": 15.9198, "step": 96860 }, { "epoch": 0.14350976778910818, "grad_norm": 5.84375, "learning_rate": 0.0004761995191452877, "loss": 15.9186, "step": 96880 }, { "epoch": 0.14353939408303656, "grad_norm": 6.8125, "learning_rate": 0.0004761945802102801, "loss": 15.8409, "step": 96900 }, { "epoch": 0.14356902037696495, "grad_norm": 6.65625, "learning_rate": 0.00047618964127527254, "loss": 15.9058, "step": 96920 }, { "epoch": 0.14359864667089337, "grad_norm": 6.71875, "learning_rate": 0.00047618470234026493, "loss": 15.9237, "step": 96940 }, { "epoch": 0.14362827296482175, "grad_norm": 5.90625, "learning_rate": 0.00047617976340525743, "loss": 15.9512, "step": 96960 }, { "epoch": 0.14365789925875014, "grad_norm": 6.0625, "learning_rate": 0.00047617482447024983, "loss": 15.967, "step": 96980 }, { "epoch": 0.14368752555267852, "grad_norm": 5.96875, "learning_rate": 0.0004761698855352423, "loss": 15.9211, "step": 97000 }, { "epoch": 0.1437171518466069, "grad_norm": 6.96875, "learning_rate": 0.00047616494660023467, "loss": 15.9396, "step": 97020 }, { "epoch": 0.1437467781405353, "grad_norm": 6.21875, "learning_rate": 0.00047616000766522717, "loss": 15.9094, "step": 97040 }, { "epoch": 0.14377640443446368, "grad_norm": 6.03125, "learning_rate": 0.00047615506873021957, "loss": 15.8821, "step": 97060 }, { "epoch": 0.14380603072839207, "grad_norm": 6.21875, "learning_rate": 0.000476150129795212, "loss": 15.9549, "step": 97080 }, { "epoch": 0.14383565702232046, "grad_norm": 7.0, "learning_rate": 0.00047614519086020446, "loss": 15.8942, "step": 97100 }, { "epoch": 0.14386528331624884, "grad_norm": 6.21875, "learning_rate": 0.0004761402519251969, "loss": 15.9032, "step": 97120 }, { "epoch": 0.14389490961017723, "grad_norm": 5.75, "learning_rate": 0.0004761353129901893, "loss": 15.8989, "step": 97140 }, { "epoch": 0.14392453590410562, "grad_norm": 7.28125, "learning_rate": 0.00047613037405518175, "loss": 15.9261, "step": 97160 }, { "epoch": 0.143954162198034, "grad_norm": 6.375, "learning_rate": 0.0004761254351201742, "loss": 15.9007, "step": 97180 }, { "epoch": 0.1439837884919624, "grad_norm": 9.3125, "learning_rate": 0.0004761204961851666, "loss": 15.9493, "step": 97200 }, { "epoch": 0.14401341478589078, "grad_norm": 6.96875, "learning_rate": 0.00047611555725015904, "loss": 15.9553, "step": 97220 }, { "epoch": 0.14404304107981916, "grad_norm": 6.84375, "learning_rate": 0.00047611061831515143, "loss": 15.9436, "step": 97240 }, { "epoch": 0.14407266737374755, "grad_norm": 7.1875, "learning_rate": 0.00047610567938014393, "loss": 15.936, "step": 97260 }, { "epoch": 0.14410229366767593, "grad_norm": 6.25, "learning_rate": 0.00047610074044513633, "loss": 15.9165, "step": 97280 }, { "epoch": 0.14413191996160432, "grad_norm": 6.75, "learning_rate": 0.0004760958015101288, "loss": 15.9197, "step": 97300 }, { "epoch": 0.1441615462555327, "grad_norm": 6.875, "learning_rate": 0.00047609086257512117, "loss": 15.9118, "step": 97320 }, { "epoch": 0.1441911725494611, "grad_norm": 6.96875, "learning_rate": 0.00047608592364011367, "loss": 15.9665, "step": 97340 }, { "epoch": 0.14422079884338948, "grad_norm": 6.5, "learning_rate": 0.00047608098470510607, "loss": 15.9324, "step": 97360 }, { "epoch": 0.14425042513731787, "grad_norm": 6.46875, "learning_rate": 0.0004760760457700985, "loss": 15.8946, "step": 97380 }, { "epoch": 0.14428005143124625, "grad_norm": 6.75, "learning_rate": 0.00047607110683509096, "loss": 15.9312, "step": 97400 }, { "epoch": 0.14430967772517464, "grad_norm": 6.625, "learning_rate": 0.0004760661679000834, "loss": 15.897, "step": 97420 }, { "epoch": 0.14433930401910303, "grad_norm": 5.84375, "learning_rate": 0.0004760612289650758, "loss": 15.8328, "step": 97440 }, { "epoch": 0.1443689303130314, "grad_norm": 7.1875, "learning_rate": 0.00047605629003006825, "loss": 15.923, "step": 97460 }, { "epoch": 0.1443985566069598, "grad_norm": 7.28125, "learning_rate": 0.0004760513510950607, "loss": 15.921, "step": 97480 }, { "epoch": 0.14442818290088819, "grad_norm": 6.375, "learning_rate": 0.00047604641216005315, "loss": 15.9104, "step": 97500 }, { "epoch": 0.14445780919481657, "grad_norm": 6.84375, "learning_rate": 0.00047604147322504554, "loss": 15.9116, "step": 97520 }, { "epoch": 0.14448743548874496, "grad_norm": 6.0, "learning_rate": 0.00047603653429003793, "loss": 15.862, "step": 97540 }, { "epoch": 0.14451706178267335, "grad_norm": 6.5625, "learning_rate": 0.00047603159535503044, "loss": 15.8753, "step": 97560 }, { "epoch": 0.14454668807660176, "grad_norm": 5.9375, "learning_rate": 0.00047602665642002283, "loss": 15.8975, "step": 97580 }, { "epoch": 0.14457631437053015, "grad_norm": 6.0625, "learning_rate": 0.0004760217174850153, "loss": 15.9515, "step": 97600 }, { "epoch": 0.14460594066445853, "grad_norm": 6.4375, "learning_rate": 0.00047601677855000767, "loss": 15.8932, "step": 97620 }, { "epoch": 0.14463556695838692, "grad_norm": 6.90625, "learning_rate": 0.00047601183961500017, "loss": 15.9027, "step": 97640 }, { "epoch": 0.1446651932523153, "grad_norm": 7.78125, "learning_rate": 0.00047600690067999257, "loss": 15.9253, "step": 97660 }, { "epoch": 0.1446948195462437, "grad_norm": 6.59375, "learning_rate": 0.000476001961744985, "loss": 15.9148, "step": 97680 }, { "epoch": 0.14472444584017208, "grad_norm": 5.6875, "learning_rate": 0.00047599702280997746, "loss": 15.9222, "step": 97700 }, { "epoch": 0.14475407213410046, "grad_norm": 7.1875, "learning_rate": 0.0004759920838749699, "loss": 15.9371, "step": 97720 }, { "epoch": 0.14478369842802885, "grad_norm": 7.03125, "learning_rate": 0.0004759871449399623, "loss": 15.8266, "step": 97740 }, { "epoch": 0.14481332472195724, "grad_norm": 6.53125, "learning_rate": 0.00047598220600495475, "loss": 15.8417, "step": 97760 }, { "epoch": 0.14484295101588562, "grad_norm": 6.5, "learning_rate": 0.0004759772670699472, "loss": 15.8389, "step": 97780 }, { "epoch": 0.144872577309814, "grad_norm": 7.09375, "learning_rate": 0.00047597232813493965, "loss": 15.9453, "step": 97800 }, { "epoch": 0.1449022036037424, "grad_norm": 6.78125, "learning_rate": 0.00047596738919993204, "loss": 15.9016, "step": 97820 }, { "epoch": 0.14493182989767078, "grad_norm": 6.25, "learning_rate": 0.0004759624502649245, "loss": 15.9767, "step": 97840 }, { "epoch": 0.14496145619159917, "grad_norm": 6.53125, "learning_rate": 0.00047595751132991694, "loss": 15.8949, "step": 97860 }, { "epoch": 0.14499108248552756, "grad_norm": 6.46875, "learning_rate": 0.00047595257239490933, "loss": 15.8645, "step": 97880 }, { "epoch": 0.14502070877945594, "grad_norm": 6.59375, "learning_rate": 0.0004759476334599018, "loss": 15.9169, "step": 97900 }, { "epoch": 0.14505033507338433, "grad_norm": 7.09375, "learning_rate": 0.00047594269452489417, "loss": 15.8729, "step": 97920 }, { "epoch": 0.14507996136731272, "grad_norm": 5.71875, "learning_rate": 0.00047593775558988667, "loss": 15.8148, "step": 97940 }, { "epoch": 0.1451095876612411, "grad_norm": 6.8125, "learning_rate": 0.00047593281665487907, "loss": 15.9166, "step": 97960 }, { "epoch": 0.1451392139551695, "grad_norm": 6.40625, "learning_rate": 0.0004759278777198715, "loss": 15.9122, "step": 97980 }, { "epoch": 0.14516884024909787, "grad_norm": 6.28125, "learning_rate": 0.00047592293878486396, "loss": 15.928, "step": 98000 }, { "epoch": 0.14519846654302626, "grad_norm": 6.125, "learning_rate": 0.0004759179998498564, "loss": 15.9109, "step": 98020 }, { "epoch": 0.14522809283695465, "grad_norm": 6.4375, "learning_rate": 0.0004759130609148488, "loss": 15.8922, "step": 98040 }, { "epoch": 0.14525771913088303, "grad_norm": 6.3125, "learning_rate": 0.00047590812197984125, "loss": 15.9058, "step": 98060 }, { "epoch": 0.14528734542481142, "grad_norm": 6.09375, "learning_rate": 0.0004759031830448337, "loss": 15.8909, "step": 98080 }, { "epoch": 0.1453169717187398, "grad_norm": 6.71875, "learning_rate": 0.00047589824410982615, "loss": 15.9371, "step": 98100 }, { "epoch": 0.1453465980126682, "grad_norm": 7.4375, "learning_rate": 0.00047589330517481854, "loss": 15.9048, "step": 98120 }, { "epoch": 0.14537622430659658, "grad_norm": 6.5625, "learning_rate": 0.000475888366239811, "loss": 15.9081, "step": 98140 }, { "epoch": 0.14540585060052497, "grad_norm": 6.28125, "learning_rate": 0.00047588342730480344, "loss": 15.9338, "step": 98160 }, { "epoch": 0.14543547689445335, "grad_norm": 7.03125, "learning_rate": 0.0004758784883697959, "loss": 15.9044, "step": 98180 }, { "epoch": 0.14546510318838174, "grad_norm": 6.625, "learning_rate": 0.0004758735494347883, "loss": 15.8738, "step": 98200 }, { "epoch": 0.14549472948231015, "grad_norm": 6.125, "learning_rate": 0.00047586861049978067, "loss": 15.8199, "step": 98220 }, { "epoch": 0.14552435577623854, "grad_norm": 6.34375, "learning_rate": 0.0004758636715647732, "loss": 15.9296, "step": 98240 }, { "epoch": 0.14555398207016693, "grad_norm": 7.25, "learning_rate": 0.00047585873262976557, "loss": 15.8733, "step": 98260 }, { "epoch": 0.1455836083640953, "grad_norm": 6.46875, "learning_rate": 0.000475853793694758, "loss": 15.8756, "step": 98280 }, { "epoch": 0.1456132346580237, "grad_norm": 6.5625, "learning_rate": 0.00047584885475975046, "loss": 15.9448, "step": 98300 }, { "epoch": 0.14564286095195209, "grad_norm": 6.65625, "learning_rate": 0.0004758439158247429, "loss": 15.9173, "step": 98320 }, { "epoch": 0.14567248724588047, "grad_norm": 6.5625, "learning_rate": 0.0004758389768897353, "loss": 15.8801, "step": 98340 }, { "epoch": 0.14570211353980886, "grad_norm": 6.46875, "learning_rate": 0.00047583403795472775, "loss": 15.8762, "step": 98360 }, { "epoch": 0.14573173983373724, "grad_norm": 7.125, "learning_rate": 0.0004758290990197202, "loss": 15.8996, "step": 98380 }, { "epoch": 0.14576136612766563, "grad_norm": 8.1875, "learning_rate": 0.00047582416008471265, "loss": 15.9118, "step": 98400 }, { "epoch": 0.14579099242159402, "grad_norm": 6.96875, "learning_rate": 0.00047581922114970504, "loss": 15.9532, "step": 98420 }, { "epoch": 0.1458206187155224, "grad_norm": 6.3125, "learning_rate": 0.0004758142822146975, "loss": 15.8629, "step": 98440 }, { "epoch": 0.1458502450094508, "grad_norm": 6.6875, "learning_rate": 0.00047580934327968994, "loss": 15.8175, "step": 98460 }, { "epoch": 0.14587987130337918, "grad_norm": 7.03125, "learning_rate": 0.0004758044043446824, "loss": 15.8885, "step": 98480 }, { "epoch": 0.14590949759730756, "grad_norm": 6.78125, "learning_rate": 0.0004757994654096748, "loss": 15.9055, "step": 98500 }, { "epoch": 0.14593912389123595, "grad_norm": 6.59375, "learning_rate": 0.0004757945264746672, "loss": 15.8714, "step": 98520 }, { "epoch": 0.14596875018516434, "grad_norm": 7.21875, "learning_rate": 0.0004757895875396597, "loss": 15.9081, "step": 98540 }, { "epoch": 0.14599837647909272, "grad_norm": 6.15625, "learning_rate": 0.00047578464860465207, "loss": 15.8911, "step": 98560 }, { "epoch": 0.1460280027730211, "grad_norm": 6.78125, "learning_rate": 0.0004757797096696445, "loss": 15.8988, "step": 98580 }, { "epoch": 0.1460576290669495, "grad_norm": 6.0, "learning_rate": 0.00047577477073463696, "loss": 15.8897, "step": 98600 }, { "epoch": 0.14608725536087788, "grad_norm": 6.125, "learning_rate": 0.0004757698317996294, "loss": 15.8422, "step": 98620 }, { "epoch": 0.14611688165480627, "grad_norm": 7.03125, "learning_rate": 0.0004757648928646218, "loss": 15.8942, "step": 98640 }, { "epoch": 0.14614650794873466, "grad_norm": 6.4375, "learning_rate": 0.00047575995392961425, "loss": 15.8585, "step": 98660 }, { "epoch": 0.14617613424266304, "grad_norm": 7.9375, "learning_rate": 0.0004757550149946067, "loss": 15.8967, "step": 98680 }, { "epoch": 0.14620576053659143, "grad_norm": 6.09375, "learning_rate": 0.00047575007605959915, "loss": 15.9617, "step": 98700 }, { "epoch": 0.14623538683051981, "grad_norm": 6.3125, "learning_rate": 0.00047574513712459154, "loss": 15.8791, "step": 98720 }, { "epoch": 0.1462650131244482, "grad_norm": 6.1875, "learning_rate": 0.000475740198189584, "loss": 15.8744, "step": 98740 }, { "epoch": 0.1462946394183766, "grad_norm": 6.15625, "learning_rate": 0.00047573525925457644, "loss": 15.8701, "step": 98760 }, { "epoch": 0.14632426571230497, "grad_norm": 6.25, "learning_rate": 0.0004757303203195689, "loss": 15.8425, "step": 98780 }, { "epoch": 0.14635389200623336, "grad_norm": 6.28125, "learning_rate": 0.0004757253813845613, "loss": 15.9387, "step": 98800 }, { "epoch": 0.14638351830016175, "grad_norm": 6.03125, "learning_rate": 0.0004757204424495537, "loss": 15.8201, "step": 98820 }, { "epoch": 0.14641314459409013, "grad_norm": 6.0, "learning_rate": 0.0004757155035145462, "loss": 15.8952, "step": 98840 }, { "epoch": 0.14644277088801855, "grad_norm": 8.375, "learning_rate": 0.0004757105645795386, "loss": 15.9362, "step": 98860 }, { "epoch": 0.14647239718194693, "grad_norm": 6.5, "learning_rate": 0.000475705625644531, "loss": 15.8947, "step": 98880 }, { "epoch": 0.14650202347587532, "grad_norm": 6.6875, "learning_rate": 0.00047570068670952346, "loss": 15.859, "step": 98900 }, { "epoch": 0.1465316497698037, "grad_norm": 6.03125, "learning_rate": 0.0004756957477745159, "loss": 15.8616, "step": 98920 }, { "epoch": 0.1465612760637321, "grad_norm": 7.3125, "learning_rate": 0.0004756908088395083, "loss": 15.8874, "step": 98940 }, { "epoch": 0.14659090235766048, "grad_norm": 6.3125, "learning_rate": 0.00047568586990450075, "loss": 15.9255, "step": 98960 }, { "epoch": 0.14662052865158887, "grad_norm": 6.65625, "learning_rate": 0.0004756809309694932, "loss": 15.9159, "step": 98980 }, { "epoch": 0.14665015494551725, "grad_norm": 6.9375, "learning_rate": 0.00047567599203448565, "loss": 15.8396, "step": 99000 }, { "epoch": 0.14667978123944564, "grad_norm": 6.75, "learning_rate": 0.00047567105309947804, "loss": 15.8544, "step": 99020 }, { "epoch": 0.14670940753337403, "grad_norm": 6.3125, "learning_rate": 0.0004756661141644705, "loss": 15.8167, "step": 99040 }, { "epoch": 0.1467390338273024, "grad_norm": 7.5, "learning_rate": 0.00047566117522946294, "loss": 15.8752, "step": 99060 }, { "epoch": 0.1467686601212308, "grad_norm": 6.0, "learning_rate": 0.0004756562362944554, "loss": 15.9426, "step": 99080 }, { "epoch": 0.14679828641515918, "grad_norm": 5.90625, "learning_rate": 0.0004756512973594478, "loss": 15.8358, "step": 99100 }, { "epoch": 0.14682791270908757, "grad_norm": 6.65625, "learning_rate": 0.0004756463584244402, "loss": 15.8739, "step": 99120 }, { "epoch": 0.14685753900301596, "grad_norm": 6.6875, "learning_rate": 0.0004756414194894327, "loss": 15.91, "step": 99140 }, { "epoch": 0.14688716529694434, "grad_norm": 7.0, "learning_rate": 0.0004756364805544251, "loss": 15.8444, "step": 99160 }, { "epoch": 0.14691679159087273, "grad_norm": 5.96875, "learning_rate": 0.0004756315416194175, "loss": 15.9179, "step": 99180 }, { "epoch": 0.14694641788480112, "grad_norm": 6.46875, "learning_rate": 0.00047562660268441, "loss": 15.9259, "step": 99200 }, { "epoch": 0.1469760441787295, "grad_norm": 7.4375, "learning_rate": 0.0004756216637494024, "loss": 15.8124, "step": 99220 }, { "epoch": 0.1470056704726579, "grad_norm": 6.625, "learning_rate": 0.0004756167248143948, "loss": 15.8876, "step": 99240 }, { "epoch": 0.14703529676658628, "grad_norm": 6.34375, "learning_rate": 0.00047561178587938725, "loss": 15.915, "step": 99260 }, { "epoch": 0.14706492306051466, "grad_norm": 6.625, "learning_rate": 0.0004756068469443797, "loss": 15.9366, "step": 99280 }, { "epoch": 0.14709454935444305, "grad_norm": 6.0625, "learning_rate": 0.00047560190800937215, "loss": 15.9305, "step": 99300 }, { "epoch": 0.14712417564837144, "grad_norm": 7.03125, "learning_rate": 0.00047559696907436454, "loss": 15.8623, "step": 99320 }, { "epoch": 0.14715380194229982, "grad_norm": 7.53125, "learning_rate": 0.000475592030139357, "loss": 15.9174, "step": 99340 }, { "epoch": 0.1471834282362282, "grad_norm": 6.375, "learning_rate": 0.00047558709120434944, "loss": 15.8917, "step": 99360 }, { "epoch": 0.1472130545301566, "grad_norm": 6.3125, "learning_rate": 0.0004755821522693419, "loss": 15.8335, "step": 99380 }, { "epoch": 0.14724268082408498, "grad_norm": 6.3125, "learning_rate": 0.0004755772133343343, "loss": 15.8639, "step": 99400 }, { "epoch": 0.14727230711801337, "grad_norm": 6.65625, "learning_rate": 0.0004755722743993267, "loss": 15.858, "step": 99420 }, { "epoch": 0.14730193341194175, "grad_norm": 6.375, "learning_rate": 0.0004755673354643192, "loss": 15.8239, "step": 99440 }, { "epoch": 0.14733155970587014, "grad_norm": 5.9375, "learning_rate": 0.0004755623965293116, "loss": 15.849, "step": 99460 }, { "epoch": 0.14736118599979853, "grad_norm": 6.0, "learning_rate": 0.000475557457594304, "loss": 15.8021, "step": 99480 }, { "epoch": 0.14739081229372694, "grad_norm": 7.0625, "learning_rate": 0.0004755525186592965, "loss": 15.8675, "step": 99500 }, { "epoch": 0.14742043858765533, "grad_norm": 6.375, "learning_rate": 0.0004755475797242889, "loss": 15.8739, "step": 99520 }, { "epoch": 0.14745006488158371, "grad_norm": 6.21875, "learning_rate": 0.00047554264078928136, "loss": 15.8996, "step": 99540 }, { "epoch": 0.1474796911755121, "grad_norm": 7.09375, "learning_rate": 0.00047553770185427375, "loss": 15.8776, "step": 99560 }, { "epoch": 0.1475093174694405, "grad_norm": 5.5625, "learning_rate": 0.0004755327629192662, "loss": 15.8426, "step": 99580 }, { "epoch": 0.14753894376336887, "grad_norm": 6.75, "learning_rate": 0.00047552782398425865, "loss": 15.8734, "step": 99600 }, { "epoch": 0.14756857005729726, "grad_norm": 6.8125, "learning_rate": 0.00047552288504925104, "loss": 15.8602, "step": 99620 }, { "epoch": 0.14759819635122565, "grad_norm": 5.90625, "learning_rate": 0.0004755179461142435, "loss": 15.9296, "step": 99640 }, { "epoch": 0.14762782264515403, "grad_norm": 6.34375, "learning_rate": 0.00047551300717923594, "loss": 15.8599, "step": 99660 }, { "epoch": 0.14765744893908242, "grad_norm": 5.9375, "learning_rate": 0.0004755080682442284, "loss": 15.9459, "step": 99680 }, { "epoch": 0.1476870752330108, "grad_norm": 7.1875, "learning_rate": 0.0004755031293092208, "loss": 15.8781, "step": 99700 }, { "epoch": 0.1477167015269392, "grad_norm": 6.28125, "learning_rate": 0.0004754981903742132, "loss": 15.8215, "step": 99720 }, { "epoch": 0.14774632782086758, "grad_norm": 6.0625, "learning_rate": 0.0004754932514392057, "loss": 15.8065, "step": 99740 }, { "epoch": 0.14777595411479597, "grad_norm": 6.53125, "learning_rate": 0.0004754883125041981, "loss": 15.8926, "step": 99760 }, { "epoch": 0.14780558040872435, "grad_norm": 7.34375, "learning_rate": 0.0004754833735691905, "loss": 15.824, "step": 99780 }, { "epoch": 0.14783520670265274, "grad_norm": 6.84375, "learning_rate": 0.000475478434634183, "loss": 15.8495, "step": 99800 }, { "epoch": 0.14786483299658112, "grad_norm": 7.375, "learning_rate": 0.0004754734956991754, "loss": 15.8227, "step": 99820 }, { "epoch": 0.1478944592905095, "grad_norm": 6.0625, "learning_rate": 0.00047546855676416786, "loss": 15.8278, "step": 99840 }, { "epoch": 0.1479240855844379, "grad_norm": 6.3125, "learning_rate": 0.00047546361782916025, "loss": 15.8409, "step": 99860 }, { "epoch": 0.14795371187836628, "grad_norm": 6.21875, "learning_rate": 0.00047545867889415275, "loss": 15.8754, "step": 99880 }, { "epoch": 0.14798333817229467, "grad_norm": 6.3125, "learning_rate": 0.00047545373995914515, "loss": 15.8956, "step": 99900 }, { "epoch": 0.14801296446622306, "grad_norm": 6.40625, "learning_rate": 0.00047544880102413754, "loss": 15.8705, "step": 99920 }, { "epoch": 0.14804259076015144, "grad_norm": 6.90625, "learning_rate": 0.00047544386208913, "loss": 15.8666, "step": 99940 }, { "epoch": 0.14807221705407983, "grad_norm": 8.0625, "learning_rate": 0.00047543892315412244, "loss": 15.8759, "step": 99960 }, { "epoch": 0.14810184334800822, "grad_norm": 6.15625, "learning_rate": 0.0004754339842191149, "loss": 15.8608, "step": 99980 }, { "epoch": 0.1481314696419366, "grad_norm": 6.15625, "learning_rate": 0.0004754290452841073, "loss": 15.8969, "step": 100000 }, { "epoch": 0.148161095935865, "grad_norm": 6.59375, "learning_rate": 0.0004754241063490997, "loss": 15.87, "step": 100020 }, { "epoch": 0.14819072222979338, "grad_norm": 8.0, "learning_rate": 0.0004754191674140922, "loss": 15.8457, "step": 100040 }, { "epoch": 0.14822034852372176, "grad_norm": 6.40625, "learning_rate": 0.0004754142284790846, "loss": 15.7922, "step": 100060 }, { "epoch": 0.14824997481765015, "grad_norm": 6.28125, "learning_rate": 0.000475409289544077, "loss": 15.8786, "step": 100080 }, { "epoch": 0.14827960111157854, "grad_norm": 7.03125, "learning_rate": 0.0004754043506090695, "loss": 15.8238, "step": 100100 }, { "epoch": 0.14830922740550695, "grad_norm": 6.875, "learning_rate": 0.0004753994116740619, "loss": 15.8827, "step": 100120 }, { "epoch": 0.14833885369943534, "grad_norm": 7.0, "learning_rate": 0.00047539447273905436, "loss": 15.8598, "step": 100140 }, { "epoch": 0.14836847999336372, "grad_norm": 7.4375, "learning_rate": 0.00047538953380404675, "loss": 15.8262, "step": 100160 }, { "epoch": 0.1483981062872921, "grad_norm": 6.4375, "learning_rate": 0.00047538459486903926, "loss": 15.8707, "step": 100180 }, { "epoch": 0.1484277325812205, "grad_norm": 6.75, "learning_rate": 0.00047537965593403165, "loss": 15.8333, "step": 100200 }, { "epoch": 0.14845735887514888, "grad_norm": 6.0625, "learning_rate": 0.0004753747169990241, "loss": 15.8554, "step": 100220 }, { "epoch": 0.14848698516907727, "grad_norm": 5.84375, "learning_rate": 0.0004753697780640165, "loss": 15.9166, "step": 100240 }, { "epoch": 0.14851661146300565, "grad_norm": 7.0625, "learning_rate": 0.00047536483912900894, "loss": 15.8203, "step": 100260 }, { "epoch": 0.14854623775693404, "grad_norm": 6.15625, "learning_rate": 0.0004753599001940014, "loss": 15.8473, "step": 100280 }, { "epoch": 0.14857586405086243, "grad_norm": 5.78125, "learning_rate": 0.0004753549612589938, "loss": 15.8812, "step": 100300 }, { "epoch": 0.1486054903447908, "grad_norm": 6.6875, "learning_rate": 0.00047535002232398623, "loss": 15.8895, "step": 100320 }, { "epoch": 0.1486351166387192, "grad_norm": 6.21875, "learning_rate": 0.0004753450833889787, "loss": 15.8178, "step": 100340 }, { "epoch": 0.1486647429326476, "grad_norm": 6.34375, "learning_rate": 0.0004753401444539711, "loss": 15.9332, "step": 100360 }, { "epoch": 0.14869436922657597, "grad_norm": 6.0, "learning_rate": 0.0004753352055189635, "loss": 15.8379, "step": 100380 }, { "epoch": 0.14872399552050436, "grad_norm": 7.28125, "learning_rate": 0.000475330266583956, "loss": 15.8962, "step": 100400 }, { "epoch": 0.14875362181443275, "grad_norm": 7.0, "learning_rate": 0.0004753253276489484, "loss": 15.8436, "step": 100420 }, { "epoch": 0.14878324810836113, "grad_norm": 6.90625, "learning_rate": 0.00047532038871394086, "loss": 15.8355, "step": 100440 }, { "epoch": 0.14881287440228952, "grad_norm": 7.03125, "learning_rate": 0.00047531544977893325, "loss": 15.8488, "step": 100460 }, { "epoch": 0.1488425006962179, "grad_norm": 6.25, "learning_rate": 0.00047531051084392576, "loss": 15.9364, "step": 100480 }, { "epoch": 0.1488721269901463, "grad_norm": 6.15625, "learning_rate": 0.00047530557190891815, "loss": 15.8913, "step": 100500 }, { "epoch": 0.14890175328407468, "grad_norm": 6.0, "learning_rate": 0.0004753006329739106, "loss": 15.8927, "step": 100520 }, { "epoch": 0.14893137957800306, "grad_norm": 6.125, "learning_rate": 0.000475295694038903, "loss": 15.8896, "step": 100540 }, { "epoch": 0.14896100587193145, "grad_norm": 6.96875, "learning_rate": 0.0004752907551038955, "loss": 15.8886, "step": 100560 }, { "epoch": 0.14899063216585984, "grad_norm": 6.375, "learning_rate": 0.0004752858161688879, "loss": 15.8431, "step": 100580 }, { "epoch": 0.14902025845978822, "grad_norm": 6.6875, "learning_rate": 0.0004752808772338803, "loss": 15.8411, "step": 100600 }, { "epoch": 0.1490498847537166, "grad_norm": 6.90625, "learning_rate": 0.00047527593829887273, "loss": 15.8837, "step": 100620 }, { "epoch": 0.149079511047645, "grad_norm": 7.3125, "learning_rate": 0.0004752709993638652, "loss": 15.8375, "step": 100640 }, { "epoch": 0.14910913734157338, "grad_norm": 6.46875, "learning_rate": 0.0004752660604288576, "loss": 15.8386, "step": 100660 }, { "epoch": 0.14913876363550177, "grad_norm": 6.75, "learning_rate": 0.00047526112149385, "loss": 15.9526, "step": 100680 }, { "epoch": 0.14916838992943016, "grad_norm": 6.46875, "learning_rate": 0.0004752561825588425, "loss": 15.9102, "step": 100700 }, { "epoch": 0.14919801622335854, "grad_norm": 7.1875, "learning_rate": 0.0004752512436238349, "loss": 15.7821, "step": 100720 }, { "epoch": 0.14922764251728693, "grad_norm": 5.84375, "learning_rate": 0.00047524630468882736, "loss": 15.835, "step": 100740 }, { "epoch": 0.14925726881121534, "grad_norm": 6.5625, "learning_rate": 0.00047524136575381975, "loss": 15.8995, "step": 100760 }, { "epoch": 0.14928689510514373, "grad_norm": 7.03125, "learning_rate": 0.00047523642681881226, "loss": 15.8228, "step": 100780 }, { "epoch": 0.14931652139907212, "grad_norm": 7.59375, "learning_rate": 0.00047523148788380465, "loss": 15.83, "step": 100800 }, { "epoch": 0.1493461476930005, "grad_norm": 7.25, "learning_rate": 0.0004752265489487971, "loss": 15.8967, "step": 100820 }, { "epoch": 0.1493757739869289, "grad_norm": 6.96875, "learning_rate": 0.0004752216100137895, "loss": 15.8432, "step": 100840 }, { "epoch": 0.14940540028085728, "grad_norm": 6.78125, "learning_rate": 0.000475216671078782, "loss": 15.8533, "step": 100860 }, { "epoch": 0.14943502657478566, "grad_norm": 6.71875, "learning_rate": 0.0004752117321437744, "loss": 15.8572, "step": 100880 }, { "epoch": 0.14946465286871405, "grad_norm": 7.125, "learning_rate": 0.00047520679320876683, "loss": 15.8622, "step": 100900 }, { "epoch": 0.14949427916264243, "grad_norm": 6.5, "learning_rate": 0.00047520185427375923, "loss": 15.8992, "step": 100920 }, { "epoch": 0.14952390545657082, "grad_norm": 6.21875, "learning_rate": 0.0004751969153387517, "loss": 15.8401, "step": 100940 }, { "epoch": 0.1495535317504992, "grad_norm": 6.34375, "learning_rate": 0.0004751919764037441, "loss": 15.7994, "step": 100960 }, { "epoch": 0.1495831580444276, "grad_norm": 6.46875, "learning_rate": 0.0004751870374687365, "loss": 15.8689, "step": 100980 }, { "epoch": 0.14961278433835598, "grad_norm": 7.03125, "learning_rate": 0.000475182098533729, "loss": 15.8686, "step": 101000 }, { "epoch": 0.14964241063228437, "grad_norm": 6.21875, "learning_rate": 0.0004751771595987214, "loss": 15.8408, "step": 101020 }, { "epoch": 0.14967203692621275, "grad_norm": 6.46875, "learning_rate": 0.00047517222066371386, "loss": 15.8667, "step": 101040 }, { "epoch": 0.14970166322014114, "grad_norm": 8.25, "learning_rate": 0.00047516728172870625, "loss": 15.8582, "step": 101060 }, { "epoch": 0.14973128951406953, "grad_norm": 6.78125, "learning_rate": 0.00047516234279369876, "loss": 15.8413, "step": 101080 }, { "epoch": 0.1497609158079979, "grad_norm": 6.84375, "learning_rate": 0.00047515740385869115, "loss": 15.837, "step": 101100 }, { "epoch": 0.1497905421019263, "grad_norm": 6.8125, "learning_rate": 0.0004751524649236836, "loss": 15.8153, "step": 101120 }, { "epoch": 0.14982016839585469, "grad_norm": 5.875, "learning_rate": 0.000475147525988676, "loss": 15.8841, "step": 101140 }, { "epoch": 0.14984979468978307, "grad_norm": 6.9375, "learning_rate": 0.0004751425870536685, "loss": 15.7932, "step": 101160 }, { "epoch": 0.14987942098371146, "grad_norm": 6.65625, "learning_rate": 0.0004751376481186609, "loss": 15.8238, "step": 101180 }, { "epoch": 0.14990904727763985, "grad_norm": 6.53125, "learning_rate": 0.00047513270918365333, "loss": 15.8007, "step": 101200 }, { "epoch": 0.14993867357156823, "grad_norm": 6.6875, "learning_rate": 0.00047512777024864573, "loss": 15.8114, "step": 101220 }, { "epoch": 0.14996829986549662, "grad_norm": 6.40625, "learning_rate": 0.00047512283131363823, "loss": 15.8998, "step": 101240 }, { "epoch": 0.149997926159425, "grad_norm": 7.28125, "learning_rate": 0.0004751178923786306, "loss": 15.7957, "step": 101260 }, { "epoch": 0.1500275524533534, "grad_norm": 6.9375, "learning_rate": 0.000475112953443623, "loss": 15.792, "step": 101280 }, { "epoch": 0.15005717874728178, "grad_norm": 6.90625, "learning_rate": 0.00047510801450861547, "loss": 15.858, "step": 101300 }, { "epoch": 0.15008680504121016, "grad_norm": 6.3125, "learning_rate": 0.0004751030755736079, "loss": 15.9071, "step": 101320 }, { "epoch": 0.15011643133513855, "grad_norm": 5.875, "learning_rate": 0.00047509813663860036, "loss": 15.8434, "step": 101340 }, { "epoch": 0.15014605762906694, "grad_norm": 6.84375, "learning_rate": 0.00047509319770359275, "loss": 15.8702, "step": 101360 }, { "epoch": 0.15017568392299532, "grad_norm": 6.96875, "learning_rate": 0.00047508825876858526, "loss": 15.8149, "step": 101380 }, { "epoch": 0.15020531021692374, "grad_norm": 6.78125, "learning_rate": 0.00047508331983357765, "loss": 15.8167, "step": 101400 }, { "epoch": 0.15023493651085212, "grad_norm": 5.71875, "learning_rate": 0.0004750783808985701, "loss": 15.8355, "step": 101420 }, { "epoch": 0.1502645628047805, "grad_norm": 6.125, "learning_rate": 0.0004750734419635625, "loss": 15.8727, "step": 101440 }, { "epoch": 0.1502941890987089, "grad_norm": 6.625, "learning_rate": 0.000475068503028555, "loss": 15.8786, "step": 101460 }, { "epoch": 0.15032381539263728, "grad_norm": 7.21875, "learning_rate": 0.0004750635640935474, "loss": 15.8171, "step": 101480 }, { "epoch": 0.15035344168656567, "grad_norm": 6.84375, "learning_rate": 0.00047505862515853983, "loss": 15.7871, "step": 101500 }, { "epoch": 0.15038306798049406, "grad_norm": 6.53125, "learning_rate": 0.00047505368622353223, "loss": 15.8054, "step": 101520 }, { "epoch": 0.15041269427442244, "grad_norm": 7.375, "learning_rate": 0.00047504874728852473, "loss": 15.834, "step": 101540 }, { "epoch": 0.15044232056835083, "grad_norm": 6.65625, "learning_rate": 0.0004750438083535171, "loss": 15.906, "step": 101560 }, { "epoch": 0.15047194686227922, "grad_norm": 6.59375, "learning_rate": 0.00047503886941850957, "loss": 15.8236, "step": 101580 }, { "epoch": 0.1505015731562076, "grad_norm": 7.875, "learning_rate": 0.00047503393048350197, "loss": 15.8215, "step": 101600 }, { "epoch": 0.150531199450136, "grad_norm": 6.625, "learning_rate": 0.0004750289915484944, "loss": 15.8343, "step": 101620 }, { "epoch": 0.15056082574406437, "grad_norm": 6.875, "learning_rate": 0.00047502405261348686, "loss": 15.7763, "step": 101640 }, { "epoch": 0.15059045203799276, "grad_norm": 7.0, "learning_rate": 0.00047501911367847925, "loss": 15.8537, "step": 101660 }, { "epoch": 0.15062007833192115, "grad_norm": 6.5, "learning_rate": 0.00047501417474347176, "loss": 15.902, "step": 101680 }, { "epoch": 0.15064970462584953, "grad_norm": 6.15625, "learning_rate": 0.00047500923580846415, "loss": 15.8103, "step": 101700 }, { "epoch": 0.15067933091977792, "grad_norm": 6.40625, "learning_rate": 0.0004750042968734566, "loss": 15.8109, "step": 101720 }, { "epoch": 0.1507089572137063, "grad_norm": 7.3125, "learning_rate": 0.000474999357938449, "loss": 15.8841, "step": 101740 }, { "epoch": 0.1507385835076347, "grad_norm": 6.375, "learning_rate": 0.0004749944190034415, "loss": 15.8348, "step": 101760 }, { "epoch": 0.15076820980156308, "grad_norm": 7.09375, "learning_rate": 0.0004749894800684339, "loss": 15.851, "step": 101780 }, { "epoch": 0.15079783609549147, "grad_norm": 6.59375, "learning_rate": 0.00047498454113342634, "loss": 15.822, "step": 101800 }, { "epoch": 0.15082746238941985, "grad_norm": 6.46875, "learning_rate": 0.00047497960219841873, "loss": 15.8174, "step": 101820 }, { "epoch": 0.15085708868334824, "grad_norm": 7.0625, "learning_rate": 0.00047497466326341123, "loss": 15.8871, "step": 101840 }, { "epoch": 0.15088671497727663, "grad_norm": 6.75, "learning_rate": 0.0004749697243284036, "loss": 15.8636, "step": 101860 }, { "epoch": 0.150916341271205, "grad_norm": 6.625, "learning_rate": 0.00047496478539339607, "loss": 15.8234, "step": 101880 }, { "epoch": 0.1509459675651334, "grad_norm": 7.78125, "learning_rate": 0.00047495984645838847, "loss": 15.8701, "step": 101900 }, { "epoch": 0.15097559385906179, "grad_norm": 6.25, "learning_rate": 0.00047495490752338097, "loss": 15.826, "step": 101920 }, { "epoch": 0.15100522015299017, "grad_norm": 6.84375, "learning_rate": 0.00047494996858837336, "loss": 15.8412, "step": 101940 }, { "epoch": 0.15103484644691856, "grad_norm": 6.21875, "learning_rate": 0.00047494502965336576, "loss": 15.831, "step": 101960 }, { "epoch": 0.15106447274084694, "grad_norm": 6.8125, "learning_rate": 0.00047494009071835826, "loss": 15.8454, "step": 101980 }, { "epoch": 0.15109409903477533, "grad_norm": 6.53125, "learning_rate": 0.00047493515178335065, "loss": 15.837, "step": 102000 } ], "logging_steps": 20, "max_steps": 2025228, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.499797585582883e+19, "train_batch_size": 48, "trial_name": null, "trial_params": null }