{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 20000, "global_step": 309400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03232062055591468, "grad_norm": 66.88756561279297, "learning_rate": 9.900000000000002e-06, "loss": 6.7553, "step": 100 }, { "epoch": 0.06464124111182935, "grad_norm": 289.3135986328125, "learning_rate": 1.9900000000000003e-05, "loss": 4.1267, "step": 200 }, { "epoch": 0.09696186166774402, "grad_norm": 45.74175262451172, "learning_rate": 2.9900000000000002e-05, "loss": 3.921, "step": 300 }, { "epoch": 0.1292824822236587, "grad_norm": 32.51044464111328, "learning_rate": 3.99e-05, "loss": 3.8577, "step": 400 }, { "epoch": 0.16160310277957338, "grad_norm": 2.4775547981262207, "learning_rate": 4.99e-05, "loss": 3.8039, "step": 500 }, { "epoch": 0.19392372333548805, "grad_norm": 3.538912057876587, "learning_rate": 5.9900000000000006e-05, "loss": 3.712, "step": 600 }, { "epoch": 0.22624434389140272, "grad_norm": 2.861309051513672, "learning_rate": 6.99e-05, "loss": 3.6726, "step": 700 }, { "epoch": 0.2585649644473174, "grad_norm": 3.0789966583251953, "learning_rate": 7.99e-05, "loss": 3.614, "step": 800 }, { "epoch": 0.2908855850032321, "grad_norm": 3.7614481449127197, "learning_rate": 8.989999999999999e-05, "loss": 3.5828, "step": 900 }, { "epoch": 0.32320620555914675, "grad_norm": 2.577141761779785, "learning_rate": 9.99e-05, "loss": 3.5587, "step": 1000 }, { "epoch": 0.3555268261150614, "grad_norm": 3.804441213607788, "learning_rate": 0.0001099, "loss": 3.5228, "step": 1100 }, { "epoch": 0.3878474466709761, "grad_norm": 2.961329221725464, "learning_rate": 0.00011990000000000001, "loss": 3.4824, "step": 1200 }, { "epoch": 0.42016806722689076, "grad_norm": 4.203826427459717, "learning_rate": 0.00012989999999999999, "loss": 3.4918, "step": 1300 }, { "epoch": 0.45248868778280543, "grad_norm": 2.4737327098846436, "learning_rate": 0.0001399, "loss": 3.4637, "step": 1400 }, { "epoch": 0.4848093083387201, "grad_norm": 2.1090731620788574, "learning_rate": 0.0001499, "loss": 3.443, "step": 1500 }, { "epoch": 0.5171299288946348, "grad_norm": 2.3649468421936035, "learning_rate": 0.00015989999999999998, "loss": 3.4346, "step": 1600 }, { "epoch": 0.5494505494505495, "grad_norm": 2.484947443008423, "learning_rate": 0.0001699, "loss": 3.4079, "step": 1700 }, { "epoch": 0.5817711700064642, "grad_norm": 2.3304977416992188, "learning_rate": 0.0001799, "loss": 3.4021, "step": 1800 }, { "epoch": 0.6140917905623788, "grad_norm": 4.561882019042969, "learning_rate": 0.0001899, "loss": 3.4102, "step": 1900 }, { "epoch": 0.6464124111182935, "grad_norm": 2.2382924556732178, "learning_rate": 0.0001999, "loss": 3.3626, "step": 2000 }, { "epoch": 0.6787330316742082, "grad_norm": 1.9975863695144653, "learning_rate": 0.0002099, "loss": 3.3456, "step": 2100 }, { "epoch": 0.7110536522301228, "grad_norm": 2.0771262645721436, "learning_rate": 0.0002199, "loss": 3.3514, "step": 2200 }, { "epoch": 0.7433742727860375, "grad_norm": 1.9176592826843262, "learning_rate": 0.0002299, "loss": 3.32, "step": 2300 }, { "epoch": 0.7756948933419522, "grad_norm": 1.5840671062469482, "learning_rate": 0.0002399, "loss": 3.3192, "step": 2400 }, { "epoch": 0.8080155138978669, "grad_norm": 1.8482654094696045, "learning_rate": 0.0002499, "loss": 3.3045, "step": 2500 }, { "epoch": 0.8403361344537815, "grad_norm": 1.8740278482437134, "learning_rate": 0.00025990000000000003, "loss": 3.3037, "step": 2600 }, { "epoch": 0.8726567550096962, "grad_norm": 1.7020177841186523, "learning_rate": 0.0002699, "loss": 3.3148, "step": 2700 }, { "epoch": 0.9049773755656109, "grad_norm": 2.070858955383301, "learning_rate": 0.0002799, "loss": 3.3109, "step": 2800 }, { "epoch": 0.9372979961215255, "grad_norm": 1.7364747524261475, "learning_rate": 0.0002899, "loss": 3.2752, "step": 2900 }, { "epoch": 0.9696186166774402, "grad_norm": 1.6613702774047852, "learning_rate": 0.0002999, "loss": 3.2583, "step": 3000 }, { "epoch": 1.0019392372333549, "grad_norm": 1.32826828956604, "learning_rate": 0.0003099, "loss": 3.2762, "step": 3100 }, { "epoch": 1.0342598577892697, "grad_norm": 1.132542610168457, "learning_rate": 0.0003199, "loss": 3.1884, "step": 3200 }, { "epoch": 1.0665804783451842, "grad_norm": 1.0772817134857178, "learning_rate": 0.00032990000000000005, "loss": 3.1768, "step": 3300 }, { "epoch": 1.098901098901099, "grad_norm": 1.0938591957092285, "learning_rate": 0.00033989999999999997, "loss": 3.1658, "step": 3400 }, { "epoch": 1.1312217194570136, "grad_norm": 0.8866330981254578, "learning_rate": 0.0003499, "loss": 3.1859, "step": 3500 }, { "epoch": 1.1635423400129283, "grad_norm": 1.2083250284194946, "learning_rate": 0.0003599, "loss": 3.1764, "step": 3600 }, { "epoch": 1.195862960568843, "grad_norm": 0.8650018572807312, "learning_rate": 0.0003699, "loss": 3.1657, "step": 3700 }, { "epoch": 1.2281835811247577, "grad_norm": 1.4492496252059937, "learning_rate": 0.0003799, "loss": 3.1614, "step": 3800 }, { "epoch": 1.2605042016806722, "grad_norm": 1.0163968801498413, "learning_rate": 0.00038990000000000004, "loss": 3.1643, "step": 3900 }, { "epoch": 1.292824822236587, "grad_norm": 1.2569615840911865, "learning_rate": 0.00039989999999999996, "loss": 3.1387, "step": 4000 }, { "epoch": 1.3251454427925016, "grad_norm": 0.9586646556854248, "learning_rate": 0.0004099, "loss": 3.1571, "step": 4100 }, { "epoch": 1.3574660633484164, "grad_norm": 1.0670658349990845, "learning_rate": 0.0004199, "loss": 3.158, "step": 4200 }, { "epoch": 1.389786683904331, "grad_norm": 0.8761248588562012, "learning_rate": 0.0004299, "loss": 3.1633, "step": 4300 }, { "epoch": 1.4221073044602457, "grad_norm": 0.8355223536491394, "learning_rate": 0.0004399, "loss": 3.152, "step": 4400 }, { "epoch": 1.4544279250161603, "grad_norm": 0.7409338355064392, "learning_rate": 0.00044990000000000004, "loss": 3.1438, "step": 4500 }, { "epoch": 1.486748545572075, "grad_norm": 0.89316326379776, "learning_rate": 0.0004599, "loss": 3.1399, "step": 4600 }, { "epoch": 1.5190691661279896, "grad_norm": 0.8241536617279053, "learning_rate": 0.0004699, "loss": 3.1498, "step": 4700 }, { "epoch": 1.5513897866839044, "grad_norm": 0.7510794997215271, "learning_rate": 0.0004799, "loss": 3.1499, "step": 4800 }, { "epoch": 1.5837104072398192, "grad_norm": 2.760122299194336, "learning_rate": 0.0004899, "loss": 3.1394, "step": 4900 }, { "epoch": 1.6160310277957337, "grad_norm": 1.0606189966201782, "learning_rate": 0.0004999000000000001, "loss": 3.1237, "step": 5000 }, { "epoch": 1.6483516483516483, "grad_norm": 0.9081011414527893, "learning_rate": 0.0005099, "loss": 3.1506, "step": 5100 }, { "epoch": 1.680672268907563, "grad_norm": 0.8172641396522522, "learning_rate": 0.0005199, "loss": 3.1137, "step": 5200 }, { "epoch": 1.7129928894634778, "grad_norm": 0.8468915224075317, "learning_rate": 0.0005299, "loss": 3.121, "step": 5300 }, { "epoch": 1.7453135100193924, "grad_norm": 1.045647144317627, "learning_rate": 0.0005399000000000001, "loss": 3.1216, "step": 5400 }, { "epoch": 1.777634130575307, "grad_norm": 0.9530847072601318, "learning_rate": 0.0005499000000000001, "loss": 3.1325, "step": 5500 }, { "epoch": 1.8099547511312217, "grad_norm": 0.8608438968658447, "learning_rate": 0.0005599, "loss": 3.1243, "step": 5600 }, { "epoch": 1.8422753716871365, "grad_norm": 0.9429317712783813, "learning_rate": 0.0005698999999999999, "loss": 3.1128, "step": 5700 }, { "epoch": 1.874595992243051, "grad_norm": 0.7844179272651672, "learning_rate": 0.0005799, "loss": 3.1021, "step": 5800 }, { "epoch": 1.9069166127989656, "grad_norm": 0.7004523873329163, "learning_rate": 0.0005899, "loss": 3.1218, "step": 5900 }, { "epoch": 1.9392372333548804, "grad_norm": 0.8081589937210083, "learning_rate": 0.0005999, "loss": 3.1188, "step": 6000 }, { "epoch": 1.9715578539107952, "grad_norm": 0.7867565155029297, "learning_rate": 0.0006099, "loss": 3.1269, "step": 6100 }, { "epoch": 2.0038784744667097, "grad_norm": 0.7220768928527832, "learning_rate": 0.0006199, "loss": 3.1239, "step": 6200 }, { "epoch": 2.0361990950226243, "grad_norm": 3.026498794555664, "learning_rate": 0.0006299000000000001, "loss": 2.9632, "step": 6300 }, { "epoch": 2.0685197155785393, "grad_norm": 0.9547273516654968, "learning_rate": 0.0006399, "loss": 2.9836, "step": 6400 }, { "epoch": 2.100840336134454, "grad_norm": 0.6885647177696228, "learning_rate": 0.0006499, "loss": 2.9829, "step": 6500 }, { "epoch": 2.1331609566903684, "grad_norm": 0.6459360718727112, "learning_rate": 0.0006599, "loss": 2.9664, "step": 6600 }, { "epoch": 2.165481577246283, "grad_norm": 0.7086136341094971, "learning_rate": 0.0006699000000000001, "loss": 3.006, "step": 6700 }, { "epoch": 2.197802197802198, "grad_norm": 0.7053001523017883, "learning_rate": 0.0006799, "loss": 3.0035, "step": 6800 }, { "epoch": 2.2301228183581125, "grad_norm": 0.7083503007888794, "learning_rate": 0.0006899, "loss": 3.0054, "step": 6900 }, { "epoch": 2.262443438914027, "grad_norm": 0.9023714661598206, "learning_rate": 0.0006998999999999999, "loss": 3.0125, "step": 7000 }, { "epoch": 2.2947640594699417, "grad_norm": 0.8567366600036621, "learning_rate": 0.0007099, "loss": 2.9999, "step": 7100 }, { "epoch": 2.3270846800258567, "grad_norm": 0.7361999154090881, "learning_rate": 0.0007199, "loss": 3.0057, "step": 7200 }, { "epoch": 2.3594053005817712, "grad_norm": 0.6811242699623108, "learning_rate": 0.0007299, "loss": 3.0164, "step": 7300 }, { "epoch": 2.391725921137686, "grad_norm": 0.7550032734870911, "learning_rate": 0.0007399, "loss": 3.0249, "step": 7400 }, { "epoch": 2.4240465416936003, "grad_norm": 0.7618332505226135, "learning_rate": 0.0007499000000000001, "loss": 3.0256, "step": 7500 }, { "epoch": 2.4563671622495153, "grad_norm": 0.7856324911117554, "learning_rate": 0.0007599, "loss": 3.0083, "step": 7600 }, { "epoch": 2.48868778280543, "grad_norm": 0.7583564519882202, "learning_rate": 0.0007699, "loss": 3.0091, "step": 7700 }, { "epoch": 2.5210084033613445, "grad_norm": 0.5609772801399231, "learning_rate": 0.0007799, "loss": 3.0246, "step": 7800 }, { "epoch": 2.553329023917259, "grad_norm": 0.978759229183197, "learning_rate": 0.0007899000000000001, "loss": 3.0241, "step": 7900 }, { "epoch": 2.585649644473174, "grad_norm": 0.782306969165802, "learning_rate": 0.0007999000000000001, "loss": 3.0268, "step": 8000 }, { "epoch": 2.6179702650290886, "grad_norm": 0.6647075414657593, "learning_rate": 0.0008099, "loss": 2.9166, "step": 8100 }, { "epoch": 2.650290885585003, "grad_norm": 0.7843105792999268, "learning_rate": 0.0008198999999999999, "loss": 2.9117, "step": 8200 }, { "epoch": 2.682611506140918, "grad_norm": 0.7441990375518799, "learning_rate": 0.0008299, "loss": 2.9023, "step": 8300 }, { "epoch": 2.7149321266968327, "grad_norm": 0.7716897130012512, "learning_rate": 0.0008399, "loss": 2.9142, "step": 8400 }, { "epoch": 2.7472527472527473, "grad_norm": 0.6922304034233093, "learning_rate": 0.0008499, "loss": 2.9073, "step": 8500 }, { "epoch": 2.779573367808662, "grad_norm": 0.5913103818893433, "learning_rate": 0.0008599, "loss": 2.9103, "step": 8600 }, { "epoch": 2.8118939883645764, "grad_norm": 0.8580074906349182, "learning_rate": 0.0008699000000000001, "loss": 2.9196, "step": 8700 }, { "epoch": 2.8442146089204914, "grad_norm": 0.6174400448799133, "learning_rate": 0.0008799000000000001, "loss": 2.9267, "step": 8800 }, { "epoch": 2.876535229476406, "grad_norm": 0.7376132607460022, "learning_rate": 0.0008899, "loss": 2.9468, "step": 8900 }, { "epoch": 2.9088558500323205, "grad_norm": 0.6166063547134399, "learning_rate": 0.0008999, "loss": 2.9474, "step": 9000 }, { "epoch": 2.9411764705882355, "grad_norm": 0.6004799604415894, "learning_rate": 0.0009099, "loss": 2.933, "step": 9100 }, { "epoch": 2.97349709114415, "grad_norm": 0.6095404028892517, "learning_rate": 0.0009199000000000001, "loss": 2.9256, "step": 9200 }, { "epoch": 3.0058177117000646, "grad_norm": 0.6382755637168884, "learning_rate": 0.0009299, "loss": 2.915, "step": 9300 }, { "epoch": 3.038138332255979, "grad_norm": 0.6753411889076233, "learning_rate": 0.0009399, "loss": 2.8479, "step": 9400 }, { "epoch": 3.070458952811894, "grad_norm": 0.6568487286567688, "learning_rate": 0.0009498999999999999, "loss": 2.8787, "step": 9500 }, { "epoch": 3.1027795733678087, "grad_norm": 0.6492568850517273, "learning_rate": 0.0009599, "loss": 2.8759, "step": 9600 }, { "epoch": 3.1351001939237233, "grad_norm": 0.6102732419967651, "learning_rate": 0.0009699, "loss": 2.8744, "step": 9700 }, { "epoch": 3.167420814479638, "grad_norm": 0.6784172654151917, "learning_rate": 0.0009799, "loss": 2.8858, "step": 9800 }, { "epoch": 3.199741435035553, "grad_norm": 0.6339209079742432, "learning_rate": 0.0009899, "loss": 2.905, "step": 9900 }, { "epoch": 3.2320620555914674, "grad_norm": 0.6749688982963562, "learning_rate": 0.0009999, "loss": 2.8886, "step": 10000 }, { "epoch": 3.264382676147382, "grad_norm": 0.6347559094429016, "learning_rate": 0.001, "loss": 2.8889, "step": 10100 }, { "epoch": 3.2967032967032965, "grad_norm": 0.6496750116348267, "learning_rate": 0.001, "loss": 2.8995, "step": 10200 }, { "epoch": 3.3290239172592115, "grad_norm": 0.6221312284469604, "learning_rate": 0.001, "loss": 2.8879, "step": 10300 }, { "epoch": 3.361344537815126, "grad_norm": 0.7453864216804504, "learning_rate": 0.001, "loss": 2.8963, "step": 10400 }, { "epoch": 3.3936651583710407, "grad_norm": 0.5936296582221985, "learning_rate": 0.001, "loss": 2.8824, "step": 10500 }, { "epoch": 3.425985778926955, "grad_norm": 0.5613277554512024, "learning_rate": 0.001, "loss": 2.8848, "step": 10600 }, { "epoch": 3.45830639948287, "grad_norm": 0.8417971134185791, "learning_rate": 0.001, "loss": 2.877, "step": 10700 }, { "epoch": 3.490627020038785, "grad_norm": 0.7448247671127319, "learning_rate": 0.001, "loss": 2.8981, "step": 10800 }, { "epoch": 3.5229476405946993, "grad_norm": 2.9669742584228516, "learning_rate": 0.001, "loss": 2.9163, "step": 10900 }, { "epoch": 3.555268261150614, "grad_norm": 0.6835345029830933, "learning_rate": 0.001, "loss": 2.8926, "step": 11000 }, { "epoch": 3.587588881706529, "grad_norm": 0.6762674450874329, "learning_rate": 0.001, "loss": 2.8979, "step": 11100 }, { "epoch": 3.6199095022624435, "grad_norm": 0.6161184906959534, "learning_rate": 0.001, "loss": 2.9069, "step": 11200 }, { "epoch": 3.652230122818358, "grad_norm": 0.6049681305885315, "learning_rate": 0.001, "loss": 2.8988, "step": 11300 }, { "epoch": 3.684550743374273, "grad_norm": 0.8423939943313599, "learning_rate": 0.001, "loss": 2.8965, "step": 11400 }, { "epoch": 3.7168713639301876, "grad_norm": 0.5460887551307678, "learning_rate": 0.001, "loss": 2.9071, "step": 11500 }, { "epoch": 3.749191984486102, "grad_norm": 0.5244189500808716, "learning_rate": 0.001, "loss": 2.9083, "step": 11600 }, { "epoch": 3.7815126050420167, "grad_norm": 0.7033939957618713, "learning_rate": 0.001, "loss": 2.9149, "step": 11700 }, { "epoch": 3.8138332255979313, "grad_norm": 0.7727781534194946, "learning_rate": 0.001, "loss": 2.8929, "step": 11800 }, { "epoch": 3.8461538461538463, "grad_norm": 0.6533515453338623, "learning_rate": 0.001, "loss": 2.8945, "step": 11900 }, { "epoch": 3.878474466709761, "grad_norm": 0.6650077104568481, "learning_rate": 0.001, "loss": 2.9128, "step": 12000 }, { "epoch": 3.9107950872656754, "grad_norm": 0.5781480669975281, "learning_rate": 0.001, "loss": 2.8918, "step": 12100 }, { "epoch": 3.9431157078215904, "grad_norm": 0.6937007308006287, "learning_rate": 0.001, "loss": 2.91, "step": 12200 }, { "epoch": 3.975436328377505, "grad_norm": 0.7640072703361511, "learning_rate": 0.001, "loss": 2.9112, "step": 12300 }, { "epoch": 4.0077569489334195, "grad_norm": 0.7321586012840271, "learning_rate": 0.001, "loss": 2.8646, "step": 12400 }, { "epoch": 4.040077569489334, "grad_norm": 0.5589576959609985, "learning_rate": 0.001, "loss": 2.6958, "step": 12500 }, { "epoch": 4.072398190045249, "grad_norm": 0.6566141843795776, "learning_rate": 0.001, "loss": 2.7134, "step": 12600 }, { "epoch": 4.104718810601163, "grad_norm": 0.6012281775474548, "learning_rate": 0.001, "loss": 2.7294, "step": 12700 }, { "epoch": 4.137039431157079, "grad_norm": 0.5390825867652893, "learning_rate": 0.001, "loss": 2.7277, "step": 12800 }, { "epoch": 4.169360051712993, "grad_norm": 0.6723217368125916, "learning_rate": 0.001, "loss": 2.7376, "step": 12900 }, { "epoch": 4.201680672268908, "grad_norm": 0.5671263933181763, "learning_rate": 0.001, "loss": 2.7317, "step": 13000 }, { "epoch": 4.234001292824822, "grad_norm": 0.7619785070419312, "learning_rate": 0.001, "loss": 2.7454, "step": 13100 }, { "epoch": 4.266321913380737, "grad_norm": 0.7301904559135437, "learning_rate": 0.001, "loss": 2.7405, "step": 13200 }, { "epoch": 4.298642533936651, "grad_norm": 0.8046207427978516, "learning_rate": 0.001, "loss": 2.7377, "step": 13300 }, { "epoch": 4.330963154492566, "grad_norm": 0.5398958325386047, "learning_rate": 0.001, "loss": 2.7597, "step": 13400 }, { "epoch": 4.3632837750484805, "grad_norm": 0.6474503874778748, "learning_rate": 0.001, "loss": 2.7611, "step": 13500 }, { "epoch": 4.395604395604396, "grad_norm": 0.6445947885513306, "learning_rate": 0.001, "loss": 2.7419, "step": 13600 }, { "epoch": 4.4279250161603105, "grad_norm": 0.8848703503608704, "learning_rate": 0.001, "loss": 2.7228, "step": 13700 }, { "epoch": 4.460245636716225, "grad_norm": 0.5973100066184998, "learning_rate": 0.001, "loss": 2.7548, "step": 13800 }, { "epoch": 4.49256625727214, "grad_norm": 0.5903606414794922, "learning_rate": 0.001, "loss": 2.7629, "step": 13900 }, { "epoch": 4.524886877828054, "grad_norm": 0.634925127029419, "learning_rate": 0.001, "loss": 2.7289, "step": 14000 }, { "epoch": 4.557207498383969, "grad_norm": 0.6438220739364624, "learning_rate": 0.001, "loss": 2.7498, "step": 14100 }, { "epoch": 4.589528118939883, "grad_norm": 0.7223869562149048, "learning_rate": 0.001, "loss": 2.7596, "step": 14200 }, { "epoch": 4.621848739495798, "grad_norm": 0.6791086196899414, "learning_rate": 0.001, "loss": 2.7606, "step": 14300 }, { "epoch": 4.654169360051713, "grad_norm": 0.5691003799438477, "learning_rate": 0.001, "loss": 2.7634, "step": 14400 }, { "epoch": 4.686489980607628, "grad_norm": 0.4981643557548523, "learning_rate": 0.001, "loss": 2.7673, "step": 14500 }, { "epoch": 4.7188106011635425, "grad_norm": 0.6301348209381104, "learning_rate": 0.001, "loss": 2.7437, "step": 14600 }, { "epoch": 4.751131221719457, "grad_norm": 0.7287594676017761, "learning_rate": 0.001, "loss": 2.752, "step": 14700 }, { "epoch": 4.783451842275372, "grad_norm": 0.5703333020210266, "learning_rate": 0.001, "loss": 2.7341, "step": 14800 }, { "epoch": 4.815772462831286, "grad_norm": 0.6989650130271912, "learning_rate": 0.001, "loss": 2.7428, "step": 14900 }, { "epoch": 4.848093083387201, "grad_norm": 0.6579810976982117, "learning_rate": 0.001, "loss": 2.7636, "step": 15000 }, { "epoch": 4.880413703943116, "grad_norm": 0.4922899603843689, "learning_rate": 0.001, "loss": 2.749, "step": 15100 }, { "epoch": 4.912734324499031, "grad_norm": 0.785276472568512, "learning_rate": 0.001, "loss": 2.755, "step": 15200 }, { "epoch": 4.945054945054945, "grad_norm": 0.5782487988471985, "learning_rate": 0.001, "loss": 2.7493, "step": 15300 }, { "epoch": 4.97737556561086, "grad_norm": 0.6123748421669006, "learning_rate": 0.001, "loss": 2.7664, "step": 15400 }, { "epoch": 5.009696186166774, "grad_norm": 0.7287214994430542, "learning_rate": 0.001, "loss": 2.7029, "step": 15500 }, { "epoch": 5.042016806722689, "grad_norm": 0.8925830721855164, "learning_rate": 0.001, "loss": 2.5254, "step": 15600 }, { "epoch": 5.0743374272786035, "grad_norm": 0.6593157052993774, "learning_rate": 0.001, "loss": 2.5639, "step": 15700 }, { "epoch": 5.106658047834518, "grad_norm": 0.9514279365539551, "learning_rate": 0.001, "loss": 2.5682, "step": 15800 }, { "epoch": 5.1389786683904335, "grad_norm": 0.6457402110099792, "learning_rate": 0.001, "loss": 2.5711, "step": 15900 }, { "epoch": 5.171299288946348, "grad_norm": 0.6891105771064758, "learning_rate": 0.001, "loss": 2.5954, "step": 16000 }, { "epoch": 5.203619909502263, "grad_norm": 0.7481428980827332, "learning_rate": 0.001, "loss": 2.5816, "step": 16100 }, { "epoch": 5.235940530058177, "grad_norm": 0.7517533898353577, "learning_rate": 0.001, "loss": 2.5863, "step": 16200 }, { "epoch": 5.268261150614092, "grad_norm": 0.6421850323677063, "learning_rate": 0.001, "loss": 2.5953, "step": 16300 }, { "epoch": 5.300581771170006, "grad_norm": 0.6543081998825073, "learning_rate": 0.001, "loss": 2.6048, "step": 16400 }, { "epoch": 5.332902391725921, "grad_norm": 0.6907905340194702, "learning_rate": 0.001, "loss": 2.6027, "step": 16500 }, { "epoch": 5.365223012281835, "grad_norm": 0.5717052221298218, "learning_rate": 0.001, "loss": 2.5986, "step": 16600 }, { "epoch": 5.397543632837751, "grad_norm": 0.692226767539978, "learning_rate": 0.001, "loss": 2.601, "step": 16700 }, { "epoch": 5.429864253393665, "grad_norm": 0.6696076989173889, "learning_rate": 0.001, "loss": 2.6176, "step": 16800 }, { "epoch": 5.46218487394958, "grad_norm": 0.7315012812614441, "learning_rate": 0.001, "loss": 2.6038, "step": 16900 }, { "epoch": 5.4945054945054945, "grad_norm": 0.6441245675086975, "learning_rate": 0.001, "loss": 2.6235, "step": 17000 }, { "epoch": 5.526826115061409, "grad_norm": 0.784841001033783, "learning_rate": 0.001, "loss": 2.611, "step": 17100 }, { "epoch": 5.559146735617324, "grad_norm": 0.8113099932670593, "learning_rate": 0.001, "loss": 2.6178, "step": 17200 }, { "epoch": 5.591467356173238, "grad_norm": 0.7180185317993164, "learning_rate": 0.001, "loss": 2.5989, "step": 17300 }, { "epoch": 5.623787976729153, "grad_norm": 0.625437319278717, "learning_rate": 0.001, "loss": 2.6175, "step": 17400 }, { "epoch": 5.656108597285068, "grad_norm": 0.6755355000495911, "learning_rate": 0.001, "loss": 2.6185, "step": 17500 }, { "epoch": 5.688429217840983, "grad_norm": 0.8148272037506104, "learning_rate": 0.001, "loss": 2.6467, "step": 17600 }, { "epoch": 5.720749838396897, "grad_norm": 0.7229486703872681, "learning_rate": 0.001, "loss": 2.6318, "step": 17700 }, { "epoch": 5.753070458952812, "grad_norm": 0.8580754399299622, "learning_rate": 0.001, "loss": 2.6383, "step": 17800 }, { "epoch": 5.785391079508726, "grad_norm": 0.7429691553115845, "learning_rate": 0.001, "loss": 2.6134, "step": 17900 }, { "epoch": 5.817711700064641, "grad_norm": 0.9500037431716919, "learning_rate": 0.001, "loss": 2.6358, "step": 18000 }, { "epoch": 5.850032320620556, "grad_norm": 0.7822642922401428, "learning_rate": 0.001, "loss": 2.6401, "step": 18100 }, { "epoch": 5.882352941176471, "grad_norm": 0.6479462385177612, "learning_rate": 0.001, "loss": 2.6358, "step": 18200 }, { "epoch": 5.914673561732386, "grad_norm": 0.7904376983642578, "learning_rate": 0.001, "loss": 2.6571, "step": 18300 }, { "epoch": 5.9469941822883, "grad_norm": 0.7136936783790588, "learning_rate": 0.001, "loss": 2.6362, "step": 18400 }, { "epoch": 5.979314802844215, "grad_norm": 0.8026592135429382, "learning_rate": 0.001, "loss": 2.6342, "step": 18500 }, { "epoch": 6.011635423400129, "grad_norm": 0.7487733960151672, "learning_rate": 0.001, "loss": 2.5643, "step": 18600 }, { "epoch": 6.043956043956044, "grad_norm": 0.7606178522109985, "learning_rate": 0.001, "loss": 2.4319, "step": 18700 }, { "epoch": 6.076276664511958, "grad_norm": 0.6428869962692261, "learning_rate": 0.001, "loss": 2.4242, "step": 18800 }, { "epoch": 6.108597285067873, "grad_norm": 1.0921810865402222, "learning_rate": 0.001, "loss": 2.4427, "step": 18900 }, { "epoch": 6.140917905623788, "grad_norm": 0.733107328414917, "learning_rate": 0.001, "loss": 2.468, "step": 19000 }, { "epoch": 6.173238526179703, "grad_norm": 0.8409562110900879, "learning_rate": 0.001, "loss": 2.4603, "step": 19100 }, { "epoch": 6.2055591467356175, "grad_norm": 0.7587376832962036, "learning_rate": 0.001, "loss": 2.4618, "step": 19200 }, { "epoch": 6.237879767291532, "grad_norm": 0.7010579109191895, "learning_rate": 0.001, "loss": 2.4815, "step": 19300 }, { "epoch": 6.270200387847447, "grad_norm": 1.59754478931427, "learning_rate": 0.001, "loss": 2.5032, "step": 19400 }, { "epoch": 6.302521008403361, "grad_norm": 0.8466838598251343, "learning_rate": 0.001, "loss": 2.4821, "step": 19500 }, { "epoch": 6.334841628959276, "grad_norm": 0.7049305438995361, "learning_rate": 0.001, "loss": 2.476, "step": 19600 }, { "epoch": 6.36716224951519, "grad_norm": 0.9942471981048584, "learning_rate": 0.001, "loss": 2.4938, "step": 19700 }, { "epoch": 6.399482870071106, "grad_norm": 0.619873046875, "learning_rate": 0.001, "loss": 2.5094, "step": 19800 }, { "epoch": 6.43180349062702, "grad_norm": 0.6633639931678772, "learning_rate": 0.001, "loss": 2.4949, "step": 19900 }, { "epoch": 6.464124111182935, "grad_norm": 0.7525016665458679, "learning_rate": 0.001, "loss": 2.4835, "step": 20000 }, { "epoch": 6.496444731738849, "grad_norm": 0.9855210185050964, "learning_rate": 0.001, "loss": 2.5049, "step": 20100 }, { "epoch": 6.528765352294764, "grad_norm": 1.0406348705291748, "learning_rate": 0.001, "loss": 2.5106, "step": 20200 }, { "epoch": 6.5610859728506785, "grad_norm": 0.7409505844116211, "learning_rate": 0.001, "loss": 2.5109, "step": 20300 }, { "epoch": 6.593406593406593, "grad_norm": 0.7548337578773499, "learning_rate": 0.001, "loss": 2.5175, "step": 20400 }, { "epoch": 6.625727213962508, "grad_norm": 0.728243887424469, "learning_rate": 0.001, "loss": 2.5138, "step": 20500 }, { "epoch": 6.658047834518423, "grad_norm": 0.7074353098869324, "learning_rate": 0.001, "loss": 2.5069, "step": 20600 }, { "epoch": 6.690368455074338, "grad_norm": 0.7344133257865906, "learning_rate": 0.001, "loss": 2.5084, "step": 20700 }, { "epoch": 6.722689075630252, "grad_norm": 0.6384263634681702, "learning_rate": 0.001, "loss": 2.5244, "step": 20800 }, { "epoch": 6.755009696186167, "grad_norm": 0.7917103171348572, "learning_rate": 0.001, "loss": 2.5391, "step": 20900 }, { "epoch": 6.787330316742081, "grad_norm": 0.8949106931686401, "learning_rate": 0.001, "loss": 2.5241, "step": 21000 }, { "epoch": 6.819650937297996, "grad_norm": 0.71200031042099, "learning_rate": 0.001, "loss": 2.5122, "step": 21100 }, { "epoch": 6.85197155785391, "grad_norm": 0.7527298331260681, "learning_rate": 0.001, "loss": 2.5351, "step": 21200 }, { "epoch": 6.884292178409826, "grad_norm": 0.9986135363578796, "learning_rate": 0.001, "loss": 2.5351, "step": 21300 }, { "epoch": 6.91661279896574, "grad_norm": 0.6079899072647095, "learning_rate": 0.001, "loss": 2.5611, "step": 21400 }, { "epoch": 6.948933419521655, "grad_norm": 0.5985284447669983, "learning_rate": 0.001, "loss": 2.5463, "step": 21500 }, { "epoch": 6.98125404007757, "grad_norm": 0.7148701548576355, "learning_rate": 0.001, "loss": 2.5257, "step": 21600 }, { "epoch": 7.013574660633484, "grad_norm": 0.8065994381904602, "learning_rate": 0.001, "loss": 2.4399, "step": 21700 }, { "epoch": 7.045895281189399, "grad_norm": 0.8313201665878296, "learning_rate": 0.001, "loss": 2.3344, "step": 21800 }, { "epoch": 7.078215901745313, "grad_norm": 1.226493239402771, "learning_rate": 0.001, "loss": 2.3308, "step": 21900 }, { "epoch": 7.110536522301228, "grad_norm": 0.8439836502075195, "learning_rate": 0.001, "loss": 2.3437, "step": 22000 }, { "epoch": 7.142857142857143, "grad_norm": 0.8471029996871948, "learning_rate": 0.001, "loss": 2.3484, "step": 22100 }, { "epoch": 7.175177763413058, "grad_norm": 1.0813688039779663, "learning_rate": 0.001, "loss": 2.3564, "step": 22200 }, { "epoch": 7.207498383968972, "grad_norm": 0.9241097569465637, "learning_rate": 0.001, "loss": 2.3673, "step": 22300 }, { "epoch": 7.239819004524887, "grad_norm": 1.023908019065857, "learning_rate": 0.001, "loss": 2.3764, "step": 22400 }, { "epoch": 7.2721396250808015, "grad_norm": 0.7787300944328308, "learning_rate": 0.001, "loss": 2.3865, "step": 22500 }, { "epoch": 7.304460245636716, "grad_norm": 0.8337021470069885, "learning_rate": 0.001, "loss": 2.3815, "step": 22600 }, { "epoch": 7.336780866192631, "grad_norm": 0.7736303210258484, "learning_rate": 0.001, "loss": 2.3807, "step": 22700 }, { "epoch": 7.369101486748546, "grad_norm": 0.7054851055145264, "learning_rate": 0.001, "loss": 2.4049, "step": 22800 }, { "epoch": 7.401422107304461, "grad_norm": 0.8541019558906555, "learning_rate": 0.001, "loss": 2.4045, "step": 22900 }, { "epoch": 7.433742727860375, "grad_norm": 0.7019346356391907, "learning_rate": 0.001, "loss": 2.4038, "step": 23000 }, { "epoch": 7.46606334841629, "grad_norm": 0.7218290567398071, "learning_rate": 0.001, "loss": 2.4048, "step": 23100 }, { "epoch": 7.498383968972204, "grad_norm": 1.0404819250106812, "learning_rate": 0.001, "loss": 2.4196, "step": 23200 }, { "epoch": 7.530704589528119, "grad_norm": 0.9479681849479675, "learning_rate": 0.001, "loss": 2.4037, "step": 23300 }, { "epoch": 7.563025210084033, "grad_norm": 0.9519902467727661, "learning_rate": 0.001, "loss": 2.4236, "step": 23400 }, { "epoch": 7.595345830639948, "grad_norm": 0.7967488765716553, "learning_rate": 0.001, "loss": 2.4048, "step": 23500 }, { "epoch": 7.6276664511958625, "grad_norm": 0.8376961350440979, "learning_rate": 0.001, "loss": 2.4146, "step": 23600 }, { "epoch": 7.659987071751778, "grad_norm": 0.7245869636535645, "learning_rate": 0.001, "loss": 2.413, "step": 23700 }, { "epoch": 7.6923076923076925, "grad_norm": 0.93974369764328, "learning_rate": 0.001, "loss": 2.4076, "step": 23800 }, { "epoch": 7.724628312863607, "grad_norm": 0.8594515919685364, "learning_rate": 0.001, "loss": 2.4125, "step": 23900 }, { "epoch": 7.756948933419522, "grad_norm": 0.742110013961792, "learning_rate": 0.001, "loss": 2.4153, "step": 24000 }, { "epoch": 7.789269553975436, "grad_norm": 0.9653095602989197, "learning_rate": 0.001, "loss": 2.4358, "step": 24100 }, { "epoch": 7.821590174531351, "grad_norm": 0.7756925225257874, "learning_rate": 0.001, "loss": 2.4335, "step": 24200 }, { "epoch": 7.853910795087265, "grad_norm": 0.7115217447280884, "learning_rate": 0.001, "loss": 2.4118, "step": 24300 }, { "epoch": 7.886231415643181, "grad_norm": 0.7968654632568359, "learning_rate": 0.001, "loss": 2.4233, "step": 24400 }, { "epoch": 7.918552036199095, "grad_norm": 0.7779571413993835, "learning_rate": 0.001, "loss": 2.425, "step": 24500 }, { "epoch": 7.95087265675501, "grad_norm": 0.8876954317092896, "learning_rate": 0.001, "loss": 2.464, "step": 24600 }, { "epoch": 7.983193277310924, "grad_norm": 0.7932461500167847, "learning_rate": 0.001, "loss": 2.4456, "step": 24700 }, { "epoch": 8.015513897866839, "grad_norm": 0.9637851119041443, "learning_rate": 0.001, "loss": 2.3134, "step": 24800 }, { "epoch": 8.047834518422754, "grad_norm": 0.9260105490684509, "learning_rate": 0.001, "loss": 2.2351, "step": 24900 }, { "epoch": 8.080155138978668, "grad_norm": 1.0348434448242188, "learning_rate": 0.001, "loss": 2.2492, "step": 25000 }, { "epoch": 8.112475759534583, "grad_norm": 1.0315574407577515, "learning_rate": 0.001, "loss": 2.2525, "step": 25100 }, { "epoch": 8.144796380090497, "grad_norm": 1.0305691957473755, "learning_rate": 0.001, "loss": 2.2773, "step": 25200 }, { "epoch": 8.177117000646412, "grad_norm": 1.050819754600525, "learning_rate": 0.001, "loss": 2.2531, "step": 25300 }, { "epoch": 8.209437621202326, "grad_norm": 0.9204294681549072, "learning_rate": 0.001, "loss": 2.2718, "step": 25400 }, { "epoch": 8.241758241758241, "grad_norm": 1.0493748188018799, "learning_rate": 0.001, "loss": 2.2707, "step": 25500 }, { "epoch": 8.274078862314157, "grad_norm": 1.0253117084503174, "learning_rate": 0.001, "loss": 2.2601, "step": 25600 }, { "epoch": 8.306399482870072, "grad_norm": 1.111863136291504, "learning_rate": 0.001, "loss": 2.2725, "step": 25700 }, { "epoch": 8.338720103425986, "grad_norm": 1.4464136362075806, "learning_rate": 0.001, "loss": 2.2943, "step": 25800 }, { "epoch": 8.371040723981901, "grad_norm": 0.8653415441513062, "learning_rate": 0.001, "loss": 2.2977, "step": 25900 }, { "epoch": 8.403361344537815, "grad_norm": 1.0481756925582886, "learning_rate": 0.001, "loss": 2.2875, "step": 26000 }, { "epoch": 8.43568196509373, "grad_norm": 1.0806539058685303, "learning_rate": 0.001, "loss": 2.2981, "step": 26100 }, { "epoch": 8.468002585649645, "grad_norm": 1.2387253046035767, "learning_rate": 0.001, "loss": 2.3222, "step": 26200 }, { "epoch": 8.50032320620556, "grad_norm": 1.090662956237793, "learning_rate": 0.001, "loss": 2.3181, "step": 26300 }, { "epoch": 8.532643826761474, "grad_norm": 1.035711646080017, "learning_rate": 0.001, "loss": 2.3134, "step": 26400 }, { "epoch": 8.564964447317388, "grad_norm": 1.017500400543213, "learning_rate": 0.001, "loss": 2.3257, "step": 26500 }, { "epoch": 8.597285067873303, "grad_norm": 1.197115182876587, "learning_rate": 0.001, "loss": 2.3233, "step": 26600 }, { "epoch": 8.629605688429217, "grad_norm": 0.9728832244873047, "learning_rate": 0.001, "loss": 2.3347, "step": 26700 }, { "epoch": 8.661926308985132, "grad_norm": 1.0433615446090698, "learning_rate": 0.001, "loss": 2.3082, "step": 26800 }, { "epoch": 8.694246929541046, "grad_norm": 1.156947374343872, "learning_rate": 0.001, "loss": 2.3294, "step": 26900 }, { "epoch": 8.726567550096961, "grad_norm": 0.8574950098991394, "learning_rate": 0.001, "loss": 2.3439, "step": 27000 }, { "epoch": 8.758888170652877, "grad_norm": 1.0127687454223633, "learning_rate": 0.001, "loss": 2.3335, "step": 27100 }, { "epoch": 8.791208791208792, "grad_norm": 0.8747397661209106, "learning_rate": 0.001, "loss": 2.3225, "step": 27200 }, { "epoch": 8.823529411764707, "grad_norm": 0.9062464833259583, "learning_rate": 0.001, "loss": 2.3381, "step": 27300 }, { "epoch": 8.855850032320621, "grad_norm": 1.384310245513916, "learning_rate": 0.001, "loss": 2.3595, "step": 27400 }, { "epoch": 8.888170652876536, "grad_norm": 1.2313902378082275, "learning_rate": 0.001, "loss": 2.3429, "step": 27500 }, { "epoch": 8.92049127343245, "grad_norm": 1.0385419130325317, "learning_rate": 0.001, "loss": 2.3322, "step": 27600 }, { "epoch": 8.952811893988365, "grad_norm": 0.8618333339691162, "learning_rate": 0.001, "loss": 2.3619, "step": 27700 }, { "epoch": 8.98513251454428, "grad_norm": 0.8095340728759766, "learning_rate": 0.001, "loss": 2.3515, "step": 27800 }, { "epoch": 9.017453135100194, "grad_norm": 0.8152446746826172, "learning_rate": 0.001, "loss": 2.2173, "step": 27900 }, { "epoch": 9.049773755656108, "grad_norm": 0.9104654788970947, "learning_rate": 0.001, "loss": 2.1334, "step": 28000 }, { "epoch": 9.082094376212023, "grad_norm": 0.9501104354858398, "learning_rate": 0.001, "loss": 2.1654, "step": 28100 }, { "epoch": 9.114414996767938, "grad_norm": 0.7561880350112915, "learning_rate": 0.001, "loss": 2.1617, "step": 28200 }, { "epoch": 9.146735617323852, "grad_norm": 0.7767878174781799, "learning_rate": 0.001, "loss": 2.1663, "step": 28300 }, { "epoch": 9.179056237879767, "grad_norm": 1.3334866762161255, "learning_rate": 0.001, "loss": 2.1753, "step": 28400 }, { "epoch": 9.211376858435681, "grad_norm": 0.9232680201530457, "learning_rate": 0.001, "loss": 2.1592, "step": 28500 }, { "epoch": 9.243697478991596, "grad_norm": 0.9162148237228394, "learning_rate": 0.001, "loss": 2.1784, "step": 28600 }, { "epoch": 9.276018099547512, "grad_norm": 0.9561941027641296, "learning_rate": 0.001, "loss": 2.1872, "step": 28700 }, { "epoch": 9.308338720103427, "grad_norm": 0.9245226979255676, "learning_rate": 0.001, "loss": 2.2256, "step": 28800 }, { "epoch": 9.340659340659341, "grad_norm": 0.8566681146621704, "learning_rate": 0.001, "loss": 2.232, "step": 28900 }, { "epoch": 9.372979961215256, "grad_norm": 0.7797905802726746, "learning_rate": 0.001, "loss": 2.2236, "step": 29000 }, { "epoch": 9.40530058177117, "grad_norm": 0.8803383708000183, "learning_rate": 0.001, "loss": 2.2305, "step": 29100 }, { "epoch": 9.437621202327085, "grad_norm": 0.9841068983078003, "learning_rate": 0.001, "loss": 2.2286, "step": 29200 }, { "epoch": 9.469941822883, "grad_norm": 1.1682223081588745, "learning_rate": 0.001, "loss": 2.2275, "step": 29300 }, { "epoch": 9.502262443438914, "grad_norm": 0.8928592205047607, "learning_rate": 0.001, "loss": 2.2312, "step": 29400 }, { "epoch": 9.534583063994829, "grad_norm": 0.8675641417503357, "learning_rate": 0.001, "loss": 2.2342, "step": 29500 }, { "epoch": 9.566903684550743, "grad_norm": 0.9928888082504272, "learning_rate": 0.001, "loss": 2.2156, "step": 29600 }, { "epoch": 9.599224305106658, "grad_norm": 1.1597424745559692, "learning_rate": 0.001, "loss": 2.2309, "step": 29700 }, { "epoch": 9.631544925662572, "grad_norm": 0.8660579323768616, "learning_rate": 0.001, "loss": 2.2665, "step": 29800 }, { "epoch": 9.663865546218487, "grad_norm": 0.8956477046012878, "learning_rate": 0.001, "loss": 2.2221, "step": 29900 }, { "epoch": 9.696186166774401, "grad_norm": 1.0077533721923828, "learning_rate": 0.001, "loss": 2.2294, "step": 30000 }, { "epoch": 9.728506787330316, "grad_norm": 0.9608396291732788, "learning_rate": 0.001, "loss": 2.2633, "step": 30100 }, { "epoch": 9.760827407886232, "grad_norm": 1.1400505304336548, "learning_rate": 0.001, "loss": 2.2494, "step": 30200 }, { "epoch": 9.793148028442147, "grad_norm": 0.9409213066101074, "learning_rate": 0.001, "loss": 2.2723, "step": 30300 }, { "epoch": 9.825468648998061, "grad_norm": 0.852608323097229, "learning_rate": 0.001, "loss": 2.2425, "step": 30400 }, { "epoch": 9.857789269553976, "grad_norm": 0.9838336110115051, "learning_rate": 0.001, "loss": 2.248, "step": 30500 }, { "epoch": 9.89010989010989, "grad_norm": 0.9911132454872131, "learning_rate": 0.001, "loss": 2.2382, "step": 30600 }, { "epoch": 9.922430510665805, "grad_norm": 0.7434262633323669, "learning_rate": 0.001, "loss": 2.2705, "step": 30700 }, { "epoch": 9.95475113122172, "grad_norm": 0.7670807242393494, "learning_rate": 0.001, "loss": 2.2694, "step": 30800 }, { "epoch": 9.987071751777634, "grad_norm": 0.9397686123847961, "learning_rate": 0.001, "loss": 2.2755, "step": 30900 }, { "epoch": 10.019392372333549, "grad_norm": 0.9051443338394165, "learning_rate": 0.001, "loss": 2.1607, "step": 31000 }, { "epoch": 10.051712992889463, "grad_norm": 1.093329906463623, "learning_rate": 0.001, "loss": 2.0741, "step": 31100 }, { "epoch": 10.084033613445378, "grad_norm": 1.1604770421981812, "learning_rate": 0.001, "loss": 2.0766, "step": 31200 }, { "epoch": 10.116354234001292, "grad_norm": 0.7785763740539551, "learning_rate": 0.001, "loss": 2.0835, "step": 31300 }, { "epoch": 10.148674854557207, "grad_norm": 1.1409004926681519, "learning_rate": 0.001, "loss": 2.0825, "step": 31400 }, { "epoch": 10.180995475113122, "grad_norm": 0.7650139927864075, "learning_rate": 0.001, "loss": 2.1077, "step": 31500 }, { "epoch": 10.213316095669036, "grad_norm": 0.7837153077125549, "learning_rate": 0.001, "loss": 2.1105, "step": 31600 }, { "epoch": 10.24563671622495, "grad_norm": 1.2077397108078003, "learning_rate": 0.001, "loss": 2.1089, "step": 31700 }, { "epoch": 10.277957336780867, "grad_norm": 0.9440487623214722, "learning_rate": 0.001, "loss": 2.109, "step": 31800 }, { "epoch": 10.310277957336782, "grad_norm": 0.7951603531837463, "learning_rate": 0.001, "loss": 2.1353, "step": 31900 }, { "epoch": 10.342598577892696, "grad_norm": 0.7156578302383423, "learning_rate": 0.001, "loss": 2.109, "step": 32000 }, { "epoch": 10.37491919844861, "grad_norm": 0.7799148559570312, "learning_rate": 0.001, "loss": 2.1223, "step": 32100 }, { "epoch": 10.407239819004525, "grad_norm": 0.9924362301826477, "learning_rate": 0.001, "loss": 2.1638, "step": 32200 }, { "epoch": 10.43956043956044, "grad_norm": 0.6534135341644287, "learning_rate": 0.001, "loss": 2.1347, "step": 32300 }, { "epoch": 10.471881060116354, "grad_norm": 1.0332063436508179, "learning_rate": 0.001, "loss": 2.1485, "step": 32400 }, { "epoch": 10.504201680672269, "grad_norm": 0.7265544533729553, "learning_rate": 0.001, "loss": 2.1351, "step": 32500 }, { "epoch": 10.536522301228183, "grad_norm": 0.7755329012870789, "learning_rate": 0.001, "loss": 2.1511, "step": 32600 }, { "epoch": 10.568842921784098, "grad_norm": 0.7896728515625, "learning_rate": 0.001, "loss": 2.1583, "step": 32700 }, { "epoch": 10.601163542340013, "grad_norm": 1.0628712177276611, "learning_rate": 0.001, "loss": 2.1577, "step": 32800 }, { "epoch": 10.633484162895927, "grad_norm": 0.9422286748886108, "learning_rate": 0.001, "loss": 2.1433, "step": 32900 }, { "epoch": 10.665804783451842, "grad_norm": 0.8647461533546448, "learning_rate": 0.001, "loss": 2.1747, "step": 33000 }, { "epoch": 10.698125404007756, "grad_norm": 0.7959375381469727, "learning_rate": 0.001, "loss": 2.164, "step": 33100 }, { "epoch": 10.73044602456367, "grad_norm": 0.9136986136436462, "learning_rate": 0.001, "loss": 2.1632, "step": 33200 }, { "epoch": 10.762766645119587, "grad_norm": 1.1918915510177612, "learning_rate": 0.001, "loss": 2.1715, "step": 33300 }, { "epoch": 10.795087265675502, "grad_norm": 0.9639080166816711, "learning_rate": 0.001, "loss": 2.1833, "step": 33400 }, { "epoch": 10.827407886231416, "grad_norm": 0.8730572462081909, "learning_rate": 0.001, "loss": 2.1853, "step": 33500 }, { "epoch": 10.85972850678733, "grad_norm": 0.8985272645950317, "learning_rate": 0.001, "loss": 2.1778, "step": 33600 }, { "epoch": 10.892049127343245, "grad_norm": 0.8060519695281982, "learning_rate": 0.001, "loss": 2.1798, "step": 33700 }, { "epoch": 10.92436974789916, "grad_norm": 0.797792911529541, "learning_rate": 0.001, "loss": 2.1823, "step": 33800 }, { "epoch": 10.956690368455074, "grad_norm": 0.8014101386070251, "learning_rate": 0.001, "loss": 2.1847, "step": 33900 }, { "epoch": 10.989010989010989, "grad_norm": 0.8443660140037537, "learning_rate": 0.001, "loss": 2.1811, "step": 34000 }, { "epoch": 11.021331609566904, "grad_norm": 1.1962101459503174, "learning_rate": 0.001, "loss": 2.073, "step": 34100 }, { "epoch": 11.053652230122818, "grad_norm": 0.8784822821617126, "learning_rate": 0.001, "loss": 1.965, "step": 34200 }, { "epoch": 11.085972850678733, "grad_norm": 0.9349249005317688, "learning_rate": 0.001, "loss": 1.9996, "step": 34300 }, { "epoch": 11.118293471234647, "grad_norm": 0.7623976469039917, "learning_rate": 0.001, "loss": 2.021, "step": 34400 }, { "epoch": 11.150614091790562, "grad_norm": 0.9323384165763855, "learning_rate": 0.001, "loss": 2.0062, "step": 34500 }, { "epoch": 11.182934712346476, "grad_norm": 0.6012263298034668, "learning_rate": 0.001, "loss": 2.02, "step": 34600 }, { "epoch": 11.215255332902391, "grad_norm": 0.804936945438385, "learning_rate": 0.001, "loss": 2.0324, "step": 34700 }, { "epoch": 11.247575953458306, "grad_norm": 0.762173056602478, "learning_rate": 0.001, "loss": 2.0231, "step": 34800 }, { "epoch": 11.279896574014222, "grad_norm": 0.7957737445831299, "learning_rate": 0.001, "loss": 2.0364, "step": 34900 }, { "epoch": 11.312217194570136, "grad_norm": 0.9475676417350769, "learning_rate": 0.001, "loss": 2.0411, "step": 35000 }, { "epoch": 11.344537815126051, "grad_norm": 1.0098015069961548, "learning_rate": 0.001, "loss": 2.0515, "step": 35100 }, { "epoch": 11.376858435681966, "grad_norm": 0.6368846297264099, "learning_rate": 0.001, "loss": 2.0557, "step": 35200 }, { "epoch": 11.40917905623788, "grad_norm": 0.8262781500816345, "learning_rate": 0.001, "loss": 2.0719, "step": 35300 }, { "epoch": 11.441499676793795, "grad_norm": 1.166345477104187, "learning_rate": 0.001, "loss": 2.082, "step": 35400 }, { "epoch": 11.47382029734971, "grad_norm": 0.8355542421340942, "learning_rate": 0.001, "loss": 2.0857, "step": 35500 }, { "epoch": 11.506140917905624, "grad_norm": 0.6808557510375977, "learning_rate": 0.001, "loss": 2.0747, "step": 35600 }, { "epoch": 11.538461538461538, "grad_norm": 0.9561796188354492, "learning_rate": 0.001, "loss": 2.0965, "step": 35700 }, { "epoch": 11.570782159017453, "grad_norm": 0.861172616481781, "learning_rate": 0.001, "loss": 2.0767, "step": 35800 }, { "epoch": 11.603102779573367, "grad_norm": 0.7714723348617554, "learning_rate": 0.001, "loss": 2.0886, "step": 35900 }, { "epoch": 11.635423400129282, "grad_norm": 1.0395039319992065, "learning_rate": 0.001, "loss": 2.0919, "step": 36000 }, { "epoch": 11.667744020685197, "grad_norm": 0.7050213813781738, "learning_rate": 0.001, "loss": 2.0982, "step": 36100 }, { "epoch": 11.700064641241111, "grad_norm": 0.8370696902275085, "learning_rate": 0.001, "loss": 2.1201, "step": 36200 }, { "epoch": 11.732385261797026, "grad_norm": 0.8443080186843872, "learning_rate": 0.001, "loss": 2.1086, "step": 36300 }, { "epoch": 11.764705882352942, "grad_norm": 0.8879348635673523, "learning_rate": 0.001, "loss": 2.1193, "step": 36400 }, { "epoch": 11.797026502908857, "grad_norm": 0.9272090792655945, "learning_rate": 0.001, "loss": 2.094, "step": 36500 }, { "epoch": 11.829347123464771, "grad_norm": 0.7015024423599243, "learning_rate": 0.001, "loss": 2.0999, "step": 36600 }, { "epoch": 11.861667744020686, "grad_norm": 0.7815345525741577, "learning_rate": 0.001, "loss": 2.1084, "step": 36700 }, { "epoch": 11.8939883645766, "grad_norm": 1.0485022068023682, "learning_rate": 0.001, "loss": 2.1083, "step": 36800 }, { "epoch": 11.926308985132515, "grad_norm": 0.797907292842865, "learning_rate": 0.001, "loss": 2.1304, "step": 36900 }, { "epoch": 11.95862960568843, "grad_norm": 1.0350991487503052, "learning_rate": 0.001, "loss": 2.1273, "step": 37000 }, { "epoch": 11.990950226244344, "grad_norm": 0.7197867631912231, "learning_rate": 0.001, "loss": 2.1191, "step": 37100 }, { "epoch": 12.023270846800258, "grad_norm": 0.8014159202575684, "learning_rate": 0.001, "loss": 1.9888, "step": 37200 }, { "epoch": 12.055591467356173, "grad_norm": 0.8005861043930054, "learning_rate": 0.001, "loss": 1.9242, "step": 37300 }, { "epoch": 12.087912087912088, "grad_norm": 0.9065251350402832, "learning_rate": 0.001, "loss": 1.9244, "step": 37400 }, { "epoch": 12.120232708468002, "grad_norm": 0.7590367197990417, "learning_rate": 0.001, "loss": 1.9261, "step": 37500 }, { "epoch": 12.152553329023917, "grad_norm": 0.8810202479362488, "learning_rate": 0.001, "loss": 1.9375, "step": 37600 }, { "epoch": 12.184873949579831, "grad_norm": 0.9053120613098145, "learning_rate": 0.001, "loss": 1.9506, "step": 37700 }, { "epoch": 12.217194570135746, "grad_norm": 0.758583128452301, "learning_rate": 0.001, "loss": 1.9714, "step": 37800 }, { "epoch": 12.24951519069166, "grad_norm": 0.6105663776397705, "learning_rate": 0.001, "loss": 1.9549, "step": 37900 }, { "epoch": 12.281835811247577, "grad_norm": 0.8235393166542053, "learning_rate": 0.001, "loss": 1.9612, "step": 38000 }, { "epoch": 12.314156431803491, "grad_norm": 0.9864819645881653, "learning_rate": 0.001, "loss": 1.9538, "step": 38100 }, { "epoch": 12.346477052359406, "grad_norm": 0.7817924618721008, "learning_rate": 0.001, "loss": 1.9949, "step": 38200 }, { "epoch": 12.37879767291532, "grad_norm": 0.8047240972518921, "learning_rate": 0.001, "loss": 1.9845, "step": 38300 }, { "epoch": 12.411118293471235, "grad_norm": 0.8879502415657043, "learning_rate": 0.001, "loss": 2.0003, "step": 38400 }, { "epoch": 12.44343891402715, "grad_norm": 0.774183452129364, "learning_rate": 0.001, "loss": 2.0056, "step": 38500 }, { "epoch": 12.475759534583064, "grad_norm": 1.0419282913208008, "learning_rate": 0.001, "loss": 2.008, "step": 38600 }, { "epoch": 12.508080155138979, "grad_norm": 0.9086660146713257, "learning_rate": 0.001, "loss": 1.9963, "step": 38700 }, { "epoch": 12.540400775694893, "grad_norm": 1.152785062789917, "learning_rate": 0.001, "loss": 2.0291, "step": 38800 }, { "epoch": 12.572721396250808, "grad_norm": 0.8627676367759705, "learning_rate": 0.001, "loss": 2.0201, "step": 38900 }, { "epoch": 12.605042016806722, "grad_norm": 1.0265108346939087, "learning_rate": 0.001, "loss": 2.0214, "step": 39000 }, { "epoch": 12.637362637362637, "grad_norm": 0.7832869291305542, "learning_rate": 0.001, "loss": 2.007, "step": 39100 }, { "epoch": 12.669683257918551, "grad_norm": 0.9335310459136963, "learning_rate": 0.001, "loss": 2.0157, "step": 39200 }, { "epoch": 12.702003878474466, "grad_norm": 0.8939380645751953, "learning_rate": 0.001, "loss": 2.0295, "step": 39300 }, { "epoch": 12.73432449903038, "grad_norm": 0.657747745513916, "learning_rate": 0.001, "loss": 2.0335, "step": 39400 }, { "epoch": 12.766645119586297, "grad_norm": 1.07808518409729, "learning_rate": 0.001, "loss": 2.024, "step": 39500 }, { "epoch": 12.798965740142211, "grad_norm": 0.6657872200012207, "learning_rate": 0.001, "loss": 2.0554, "step": 39600 }, { "epoch": 12.831286360698126, "grad_norm": 0.7777263522148132, "learning_rate": 0.001, "loss": 2.0502, "step": 39700 }, { "epoch": 12.86360698125404, "grad_norm": 1.1781259775161743, "learning_rate": 0.001, "loss": 2.0226, "step": 39800 }, { "epoch": 12.895927601809955, "grad_norm": 1.0324816703796387, "learning_rate": 0.001, "loss": 2.0319, "step": 39900 }, { "epoch": 12.92824822236587, "grad_norm": 0.9894204139709473, "learning_rate": 0.001, "loss": 2.0413, "step": 40000 }, { "epoch": 12.960568842921784, "grad_norm": 0.7464094161987305, "learning_rate": 0.001, "loss": 2.0269, "step": 40100 }, { "epoch": 12.992889463477699, "grad_norm": 0.6893013119697571, "learning_rate": 0.001, "loss": 2.0494, "step": 40200 }, { "epoch": 13.025210084033613, "grad_norm": 1.1119939088821411, "learning_rate": 0.001, "loss": 1.9035, "step": 40300 }, { "epoch": 13.057530704589528, "grad_norm": 0.847991943359375, "learning_rate": 0.001, "loss": 1.8552, "step": 40400 }, { "epoch": 13.089851325145442, "grad_norm": 1.1580784320831299, "learning_rate": 0.001, "loss": 1.8505, "step": 40500 }, { "epoch": 13.122171945701357, "grad_norm": 0.9855078458786011, "learning_rate": 0.001, "loss": 1.8534, "step": 40600 }, { "epoch": 13.154492566257272, "grad_norm": 0.7795642018318176, "learning_rate": 0.001, "loss": 1.8824, "step": 40700 }, { "epoch": 13.186813186813186, "grad_norm": 0.9514868259429932, "learning_rate": 0.001, "loss": 1.8769, "step": 40800 }, { "epoch": 13.2191338073691, "grad_norm": 0.946529746055603, "learning_rate": 0.001, "loss": 1.893, "step": 40900 }, { "epoch": 13.251454427925015, "grad_norm": 1.1682661771774292, "learning_rate": 0.001, "loss": 1.8791, "step": 41000 }, { "epoch": 13.283775048480932, "grad_norm": 0.9147705435752869, "learning_rate": 0.001, "loss": 1.9041, "step": 41100 }, { "epoch": 13.316095669036846, "grad_norm": 0.8599191904067993, "learning_rate": 0.001, "loss": 1.9083, "step": 41200 }, { "epoch": 13.34841628959276, "grad_norm": 1.046255111694336, "learning_rate": 0.001, "loss": 1.9071, "step": 41300 }, { "epoch": 13.380736910148675, "grad_norm": 0.7599090933799744, "learning_rate": 0.001, "loss": 1.9173, "step": 41400 }, { "epoch": 13.41305753070459, "grad_norm": 0.9124330878257751, "learning_rate": 0.001, "loss": 1.9049, "step": 41500 }, { "epoch": 13.445378151260504, "grad_norm": 1.1939584016799927, "learning_rate": 0.001, "loss": 1.9343, "step": 41600 }, { "epoch": 13.477698771816419, "grad_norm": 1.0007096529006958, "learning_rate": 0.001, "loss": 1.9261, "step": 41700 }, { "epoch": 13.510019392372334, "grad_norm": 0.8467878699302673, "learning_rate": 0.001, "loss": 1.956, "step": 41800 }, { "epoch": 13.542340012928248, "grad_norm": 0.8382981419563293, "learning_rate": 0.001, "loss": 1.9429, "step": 41900 }, { "epoch": 13.574660633484163, "grad_norm": 0.659842848777771, "learning_rate": 0.001, "loss": 1.9419, "step": 42000 }, { "epoch": 13.606981254040077, "grad_norm": 0.9384772777557373, "learning_rate": 0.001, "loss": 1.9763, "step": 42100 }, { "epoch": 13.639301874595992, "grad_norm": 1.0314030647277832, "learning_rate": 0.001, "loss": 1.9494, "step": 42200 }, { "epoch": 13.671622495151906, "grad_norm": 1.0490249395370483, "learning_rate": 0.001, "loss": 1.9576, "step": 42300 }, { "epoch": 13.70394311570782, "grad_norm": 0.9144356846809387, "learning_rate": 0.001, "loss": 1.9573, "step": 42400 }, { "epoch": 13.736263736263737, "grad_norm": 0.8135116696357727, "learning_rate": 0.001, "loss": 1.97, "step": 42500 }, { "epoch": 13.768584356819652, "grad_norm": 0.8302633166313171, "learning_rate": 0.001, "loss": 1.9725, "step": 42600 }, { "epoch": 13.800904977375566, "grad_norm": 1.0226770639419556, "learning_rate": 0.001, "loss": 1.972, "step": 42700 }, { "epoch": 13.83322559793148, "grad_norm": 0.9724535942077637, "learning_rate": 0.001, "loss": 1.9849, "step": 42800 }, { "epoch": 13.865546218487395, "grad_norm": 1.0346013307571411, "learning_rate": 0.001, "loss": 1.9931, "step": 42900 }, { "epoch": 13.89786683904331, "grad_norm": 0.8794422745704651, "learning_rate": 0.001, "loss": 1.9613, "step": 43000 }, { "epoch": 13.930187459599225, "grad_norm": 1.1985619068145752, "learning_rate": 0.001, "loss": 1.9906, "step": 43100 }, { "epoch": 13.96250808015514, "grad_norm": 0.802087128162384, "learning_rate": 0.001, "loss": 1.9834, "step": 43200 }, { "epoch": 13.994828700711054, "grad_norm": 0.957065999507904, "learning_rate": 0.001, "loss": 1.966, "step": 43300 }, { "epoch": 14.027149321266968, "grad_norm": 0.9812272191047668, "learning_rate": 0.001, "loss": 1.8188, "step": 43400 }, { "epoch": 14.059469941822883, "grad_norm": 0.9734141826629639, "learning_rate": 0.001, "loss": 1.8044, "step": 43500 }, { "epoch": 14.091790562378797, "grad_norm": 1.0251330137252808, "learning_rate": 0.001, "loss": 1.8169, "step": 43600 }, { "epoch": 14.124111182934712, "grad_norm": 0.9613792300224304, "learning_rate": 0.001, "loss": 1.7929, "step": 43700 }, { "epoch": 14.156431803490626, "grad_norm": 1.0136278867721558, "learning_rate": 0.001, "loss": 1.8124, "step": 43800 }, { "epoch": 14.188752424046541, "grad_norm": 0.9243314862251282, "learning_rate": 0.001, "loss": 1.8384, "step": 43900 }, { "epoch": 14.221073044602456, "grad_norm": 0.8909540176391602, "learning_rate": 0.001, "loss": 1.8317, "step": 44000 }, { "epoch": 14.25339366515837, "grad_norm": 1.246249794960022, "learning_rate": 0.001, "loss": 1.8051, "step": 44100 }, { "epoch": 14.285714285714286, "grad_norm": 1.0866832733154297, "learning_rate": 0.001, "loss": 1.8238, "step": 44200 }, { "epoch": 14.318034906270201, "grad_norm": 0.8544468283653259, "learning_rate": 0.001, "loss": 1.8277, "step": 44300 }, { "epoch": 14.350355526826116, "grad_norm": 1.095165491104126, "learning_rate": 0.001, "loss": 1.8405, "step": 44400 }, { "epoch": 14.38267614738203, "grad_norm": 0.9691571593284607, "learning_rate": 0.001, "loss": 1.8616, "step": 44500 }, { "epoch": 14.414996767937945, "grad_norm": 1.226742148399353, "learning_rate": 0.001, "loss": 1.8676, "step": 44600 }, { "epoch": 14.44731738849386, "grad_norm": 0.8271725177764893, "learning_rate": 0.001, "loss": 1.8638, "step": 44700 }, { "epoch": 14.479638009049774, "grad_norm": 0.9551851153373718, "learning_rate": 0.001, "loss": 1.8768, "step": 44800 }, { "epoch": 14.511958629605688, "grad_norm": 0.9541354775428772, "learning_rate": 0.001, "loss": 1.8649, "step": 44900 }, { "epoch": 14.544279250161603, "grad_norm": 1.1981089115142822, "learning_rate": 0.001, "loss": 1.8751, "step": 45000 }, { "epoch": 14.576599870717518, "grad_norm": 0.9535338282585144, "learning_rate": 0.001, "loss": 1.8841, "step": 45100 }, { "epoch": 14.608920491273432, "grad_norm": 1.0231659412384033, "learning_rate": 0.001, "loss": 1.8815, "step": 45200 }, { "epoch": 14.641241111829347, "grad_norm": 0.8634649515151978, "learning_rate": 0.001, "loss": 1.8955, "step": 45300 }, { "epoch": 14.673561732385261, "grad_norm": 0.999064028263092, "learning_rate": 0.001, "loss": 1.9051, "step": 45400 }, { "epoch": 14.705882352941176, "grad_norm": 0.8997722864151001, "learning_rate": 0.001, "loss": 1.9082, "step": 45500 }, { "epoch": 14.738202973497092, "grad_norm": 1.321564793586731, "learning_rate": 0.001, "loss": 1.9196, "step": 45600 }, { "epoch": 14.770523594053007, "grad_norm": 1.0230721235275269, "learning_rate": 0.001, "loss": 1.8889, "step": 45700 }, { "epoch": 14.802844214608921, "grad_norm": 0.8777180314064026, "learning_rate": 0.001, "loss": 1.9141, "step": 45800 }, { "epoch": 14.835164835164836, "grad_norm": 1.0284687280654907, "learning_rate": 0.001, "loss": 1.8936, "step": 45900 }, { "epoch": 14.86748545572075, "grad_norm": 0.9637032151222229, "learning_rate": 0.001, "loss": 1.9116, "step": 46000 }, { "epoch": 14.899806076276665, "grad_norm": 0.9907274842262268, "learning_rate": 0.001, "loss": 1.9031, "step": 46100 }, { "epoch": 14.93212669683258, "grad_norm": 1.183750867843628, "learning_rate": 0.001, "loss": 1.9342, "step": 46200 }, { "epoch": 14.964447317388494, "grad_norm": 0.753564715385437, "learning_rate": 0.001, "loss": 1.9406, "step": 46300 }, { "epoch": 14.996767937944409, "grad_norm": 0.9098049402236938, "learning_rate": 0.001, "loss": 1.9155, "step": 46400 }, { "epoch": 15.029088558500323, "grad_norm": 1.0562727451324463, "learning_rate": 0.001, "loss": 1.7354, "step": 46500 }, { "epoch": 15.061409179056238, "grad_norm": 1.2021610736846924, "learning_rate": 0.001, "loss": 1.7266, "step": 46600 }, { "epoch": 15.093729799612152, "grad_norm": 0.9725460410118103, "learning_rate": 0.001, "loss": 1.7453, "step": 46700 }, { "epoch": 15.126050420168067, "grad_norm": 1.1837831735610962, "learning_rate": 0.001, "loss": 1.739, "step": 46800 }, { "epoch": 15.158371040723981, "grad_norm": 1.251788854598999, "learning_rate": 0.001, "loss": 1.7539, "step": 46900 }, { "epoch": 15.190691661279896, "grad_norm": 1.0251481533050537, "learning_rate": 0.001, "loss": 1.7423, "step": 47000 }, { "epoch": 15.22301228183581, "grad_norm": 0.9209014773368835, "learning_rate": 0.001, "loss": 1.763, "step": 47100 }, { "epoch": 15.255332902391725, "grad_norm": 1.0937693119049072, "learning_rate": 0.001, "loss": 1.7498, "step": 47200 }, { "epoch": 15.287653522947641, "grad_norm": 1.2322750091552734, "learning_rate": 0.001, "loss": 1.7696, "step": 47300 }, { "epoch": 15.319974143503556, "grad_norm": 1.1660560369491577, "learning_rate": 0.001, "loss": 1.7826, "step": 47400 }, { "epoch": 15.35229476405947, "grad_norm": 0.831125020980835, "learning_rate": 0.001, "loss": 1.7861, "step": 47500 }, { "epoch": 15.384615384615385, "grad_norm": 1.0888745784759521, "learning_rate": 0.001, "loss": 1.7752, "step": 47600 }, { "epoch": 15.4169360051713, "grad_norm": 1.26369309425354, "learning_rate": 0.001, "loss": 1.8101, "step": 47700 }, { "epoch": 15.449256625727214, "grad_norm": 1.07938551902771, "learning_rate": 0.001, "loss": 1.7942, "step": 47800 }, { "epoch": 15.481577246283129, "grad_norm": 0.981393039226532, "learning_rate": 0.001, "loss": 1.8024, "step": 47900 }, { "epoch": 15.513897866839043, "grad_norm": 1.0893641710281372, "learning_rate": 0.001, "loss": 1.8371, "step": 48000 }, { "epoch": 15.546218487394958, "grad_norm": 1.029589295387268, "learning_rate": 0.001, "loss": 1.8242, "step": 48100 }, { "epoch": 15.578539107950872, "grad_norm": 1.3051846027374268, "learning_rate": 0.001, "loss": 1.8231, "step": 48200 }, { "epoch": 15.610859728506787, "grad_norm": 1.2779626846313477, "learning_rate": 0.001, "loss": 1.833, "step": 48300 }, { "epoch": 15.643180349062701, "grad_norm": 0.993664562702179, "learning_rate": 0.001, "loss": 1.822, "step": 48400 }, { "epoch": 15.675500969618616, "grad_norm": 1.0728681087493896, "learning_rate": 0.001, "loss": 1.8283, "step": 48500 }, { "epoch": 15.70782159017453, "grad_norm": 1.019649624824524, "learning_rate": 0.001, "loss": 1.8281, "step": 48600 }, { "epoch": 15.740142210730447, "grad_norm": 1.0167169570922852, "learning_rate": 0.001, "loss": 1.837, "step": 48700 }, { "epoch": 15.772462831286362, "grad_norm": 0.8728282451629639, "learning_rate": 0.001, "loss": 1.8401, "step": 48800 }, { "epoch": 15.804783451842276, "grad_norm": 1.1664496660232544, "learning_rate": 0.001, "loss": 1.8387, "step": 48900 }, { "epoch": 15.83710407239819, "grad_norm": 0.9092980027198792, "learning_rate": 0.001, "loss": 1.8311, "step": 49000 }, { "epoch": 15.869424692954105, "grad_norm": 0.9564887285232544, "learning_rate": 0.001, "loss": 1.8584, "step": 49100 }, { "epoch": 15.90174531351002, "grad_norm": 1.341081142425537, "learning_rate": 0.001, "loss": 1.8589, "step": 49200 }, { "epoch": 15.934065934065934, "grad_norm": 1.1868051290512085, "learning_rate": 0.001, "loss": 1.8652, "step": 49300 }, { "epoch": 15.966386554621849, "grad_norm": 0.9599682092666626, "learning_rate": 0.001, "loss": 1.8639, "step": 49400 }, { "epoch": 15.998707175177763, "grad_norm": 1.0819648504257202, "learning_rate": 0.001, "loss": 1.8739, "step": 49500 }, { "epoch": 16.031027795733678, "grad_norm": 1.191168189048767, "learning_rate": 0.001, "loss": 1.6666, "step": 49600 }, { "epoch": 16.063348416289593, "grad_norm": 1.4504457712173462, "learning_rate": 0.001, "loss": 1.6821, "step": 49700 }, { "epoch": 16.095669036845507, "grad_norm": 1.1250367164611816, "learning_rate": 0.001, "loss": 1.6814, "step": 49800 }, { "epoch": 16.12798965740142, "grad_norm": 1.3092635869979858, "learning_rate": 0.001, "loss": 1.6677, "step": 49900 }, { "epoch": 16.160310277957336, "grad_norm": 1.4502923488616943, "learning_rate": 0.001, "loss": 1.7021, "step": 50000 }, { "epoch": 16.19263089851325, "grad_norm": 1.0239949226379395, "learning_rate": 0.001, "loss": 1.7089, "step": 50100 }, { "epoch": 16.224951519069165, "grad_norm": 1.3678889274597168, "learning_rate": 0.001, "loss": 1.731, "step": 50200 }, { "epoch": 16.25727213962508, "grad_norm": 1.2372901439666748, "learning_rate": 0.001, "loss": 1.713, "step": 50300 }, { "epoch": 16.289592760180994, "grad_norm": 1.2210361957550049, "learning_rate": 0.001, "loss": 1.7228, "step": 50400 }, { "epoch": 16.32191338073691, "grad_norm": 1.0007407665252686, "learning_rate": 0.001, "loss": 1.719, "step": 50500 }, { "epoch": 16.354234001292824, "grad_norm": 1.2971030473709106, "learning_rate": 0.001, "loss": 1.7299, "step": 50600 }, { "epoch": 16.386554621848738, "grad_norm": 1.2815371751785278, "learning_rate": 0.001, "loss": 1.7459, "step": 50700 }, { "epoch": 16.418875242404653, "grad_norm": 1.6864794492721558, "learning_rate": 0.001, "loss": 1.7343, "step": 50800 }, { "epoch": 16.451195862960567, "grad_norm": 1.0352317094802856, "learning_rate": 0.001, "loss": 1.7447, "step": 50900 }, { "epoch": 16.483516483516482, "grad_norm": 1.1855565309524536, "learning_rate": 0.001, "loss": 1.7381, "step": 51000 }, { "epoch": 16.5158371040724, "grad_norm": 1.26261568069458, "learning_rate": 0.001, "loss": 1.763, "step": 51100 }, { "epoch": 16.548157724628314, "grad_norm": 1.307020902633667, "learning_rate": 0.001, "loss": 1.757, "step": 51200 }, { "epoch": 16.58047834518423, "grad_norm": 1.8064260482788086, "learning_rate": 0.001, "loss": 1.7732, "step": 51300 }, { "epoch": 16.612798965740144, "grad_norm": 1.3192243576049805, "learning_rate": 0.001, "loss": 1.7662, "step": 51400 }, { "epoch": 16.645119586296058, "grad_norm": 1.3806297779083252, "learning_rate": 0.001, "loss": 1.7792, "step": 51500 }, { "epoch": 16.677440206851973, "grad_norm": 1.2907260656356812, "learning_rate": 0.001, "loss": 1.7802, "step": 51600 }, { "epoch": 16.709760827407887, "grad_norm": 1.0979167222976685, "learning_rate": 0.001, "loss": 1.7888, "step": 51700 }, { "epoch": 16.742081447963802, "grad_norm": 1.104905128479004, "learning_rate": 0.001, "loss": 1.7731, "step": 51800 }, { "epoch": 16.774402068519716, "grad_norm": 1.462869644165039, "learning_rate": 0.001, "loss": 1.7893, "step": 51900 }, { "epoch": 16.80672268907563, "grad_norm": 1.0832281112670898, "learning_rate": 0.001, "loss": 1.7766, "step": 52000 }, { "epoch": 16.839043309631545, "grad_norm": 1.4008649587631226, "learning_rate": 0.001, "loss": 1.7986, "step": 52100 }, { "epoch": 16.87136393018746, "grad_norm": 1.0097993612289429, "learning_rate": 0.001, "loss": 1.7932, "step": 52200 }, { "epoch": 16.903684550743375, "grad_norm": 0.9743539690971375, "learning_rate": 0.001, "loss": 1.8006, "step": 52300 }, { "epoch": 16.93600517129929, "grad_norm": 1.2008857727050781, "learning_rate": 0.001, "loss": 1.8052, "step": 52400 }, { "epoch": 16.968325791855204, "grad_norm": 1.1717451810836792, "learning_rate": 0.001, "loss": 1.791, "step": 52500 }, { "epoch": 17.00064641241112, "grad_norm": 1.366879940032959, "learning_rate": 0.001, "loss": 1.7872, "step": 52600 }, { "epoch": 17.032967032967033, "grad_norm": 1.0977575778961182, "learning_rate": 0.001, "loss": 1.5917, "step": 52700 }, { "epoch": 17.065287653522947, "grad_norm": 1.2198309898376465, "learning_rate": 0.001, "loss": 1.6125, "step": 52800 }, { "epoch": 17.097608274078862, "grad_norm": 1.2161996364593506, "learning_rate": 0.001, "loss": 1.6158, "step": 52900 }, { "epoch": 17.129928894634777, "grad_norm": 1.1537986993789673, "learning_rate": 0.001, "loss": 1.6346, "step": 53000 }, { "epoch": 17.16224951519069, "grad_norm": 1.0179994106292725, "learning_rate": 0.001, "loss": 1.6249, "step": 53100 }, { "epoch": 17.194570135746606, "grad_norm": 1.2588387727737427, "learning_rate": 0.001, "loss": 1.6321, "step": 53200 }, { "epoch": 17.22689075630252, "grad_norm": 1.39448881149292, "learning_rate": 0.001, "loss": 1.6427, "step": 53300 }, { "epoch": 17.259211376858435, "grad_norm": 1.043461799621582, "learning_rate": 0.001, "loss": 1.663, "step": 53400 }, { "epoch": 17.29153199741435, "grad_norm": 1.4092097282409668, "learning_rate": 0.001, "loss": 1.6445, "step": 53500 }, { "epoch": 17.323852617970264, "grad_norm": 1.1003872156143188, "learning_rate": 0.001, "loss": 1.6803, "step": 53600 }, { "epoch": 17.35617323852618, "grad_norm": 1.3128610849380493, "learning_rate": 0.001, "loss": 1.6558, "step": 53700 }, { "epoch": 17.388493859082093, "grad_norm": 1.4222233295440674, "learning_rate": 0.001, "loss": 1.6718, "step": 53800 }, { "epoch": 17.420814479638008, "grad_norm": 1.4770063161849976, "learning_rate": 0.001, "loss": 1.6894, "step": 53900 }, { "epoch": 17.453135100193922, "grad_norm": 1.2568024396896362, "learning_rate": 0.001, "loss": 1.7062, "step": 54000 }, { "epoch": 17.485455720749837, "grad_norm": 1.3366326093673706, "learning_rate": 0.001, "loss": 1.7102, "step": 54100 }, { "epoch": 17.517776341305755, "grad_norm": 1.065588355064392, "learning_rate": 0.001, "loss": 1.7056, "step": 54200 }, { "epoch": 17.55009696186167, "grad_norm": 1.1240742206573486, "learning_rate": 0.001, "loss": 1.6804, "step": 54300 }, { "epoch": 17.582417582417584, "grad_norm": 1.1816436052322388, "learning_rate": 0.001, "loss": 1.6866, "step": 54400 }, { "epoch": 17.6147382029735, "grad_norm": 1.0550204515457153, "learning_rate": 0.001, "loss": 1.7337, "step": 54500 }, { "epoch": 17.647058823529413, "grad_norm": 1.513448715209961, "learning_rate": 0.001, "loss": 1.717, "step": 54600 }, { "epoch": 17.679379444085328, "grad_norm": 0.954359769821167, "learning_rate": 0.001, "loss": 1.7318, "step": 54700 }, { "epoch": 17.711700064641242, "grad_norm": 1.1431012153625488, "learning_rate": 0.001, "loss": 1.7361, "step": 54800 }, { "epoch": 17.744020685197157, "grad_norm": 1.1148468255996704, "learning_rate": 0.001, "loss": 1.7472, "step": 54900 }, { "epoch": 17.77634130575307, "grad_norm": 1.0310159921646118, "learning_rate": 0.001, "loss": 1.7284, "step": 55000 }, { "epoch": 17.808661926308986, "grad_norm": 1.2773805856704712, "learning_rate": 0.001, "loss": 1.7407, "step": 55100 }, { "epoch": 17.8409825468649, "grad_norm": 1.1706979274749756, "learning_rate": 0.001, "loss": 1.727, "step": 55200 }, { "epoch": 17.873303167420815, "grad_norm": 1.0627738237380981, "learning_rate": 0.001, "loss": 1.7581, "step": 55300 }, { "epoch": 17.90562378797673, "grad_norm": 1.3061949014663696, "learning_rate": 0.001, "loss": 1.7453, "step": 55400 }, { "epoch": 17.937944408532644, "grad_norm": 0.9596644043922424, "learning_rate": 0.001, "loss": 1.7545, "step": 55500 }, { "epoch": 17.97026502908856, "grad_norm": 1.1811267137527466, "learning_rate": 0.001, "loss": 1.7687, "step": 55600 }, { "epoch": 18.002585649644473, "grad_norm": 1.2386430501937866, "learning_rate": 0.001, "loss": 1.7574, "step": 55700 }, { "epoch": 18.034906270200388, "grad_norm": 1.0408971309661865, "learning_rate": 0.001, "loss": 1.5295, "step": 55800 }, { "epoch": 18.067226890756302, "grad_norm": 1.1850616931915283, "learning_rate": 0.001, "loss": 1.5551, "step": 55900 }, { "epoch": 18.099547511312217, "grad_norm": 1.2921085357666016, "learning_rate": 0.001, "loss": 1.557, "step": 56000 }, { "epoch": 18.13186813186813, "grad_norm": 0.8810726404190063, "learning_rate": 0.001, "loss": 1.5878, "step": 56100 }, { "epoch": 18.164188752424046, "grad_norm": 1.1503159999847412, "learning_rate": 0.001, "loss": 1.5883, "step": 56200 }, { "epoch": 18.19650937297996, "grad_norm": 1.309756875038147, "learning_rate": 0.001, "loss": 1.591, "step": 56300 }, { "epoch": 18.228829993535875, "grad_norm": 1.2218385934829712, "learning_rate": 0.001, "loss": 1.6116, "step": 56400 }, { "epoch": 18.26115061409179, "grad_norm": 1.1329491138458252, "learning_rate": 0.001, "loss": 1.5911, "step": 56500 }, { "epoch": 18.293471234647704, "grad_norm": 0.9875311255455017, "learning_rate": 0.001, "loss": 1.617, "step": 56600 }, { "epoch": 18.32579185520362, "grad_norm": 0.9733879566192627, "learning_rate": 0.001, "loss": 1.6325, "step": 56700 }, { "epoch": 18.358112475759533, "grad_norm": 0.9910351037979126, "learning_rate": 0.001, "loss": 1.605, "step": 56800 }, { "epoch": 18.390433096315448, "grad_norm": 1.025378942489624, "learning_rate": 0.001, "loss": 1.6284, "step": 56900 }, { "epoch": 18.422753716871362, "grad_norm": 0.9419228434562683, "learning_rate": 0.001, "loss": 1.6417, "step": 57000 }, { "epoch": 18.455074337427277, "grad_norm": 0.9942294955253601, "learning_rate": 0.001, "loss": 1.6434, "step": 57100 }, { "epoch": 18.48739495798319, "grad_norm": 1.1818641424179077, "learning_rate": 0.001, "loss": 1.651, "step": 57200 }, { "epoch": 18.51971557853911, "grad_norm": 1.368897795677185, "learning_rate": 0.001, "loss": 1.649, "step": 57300 }, { "epoch": 18.552036199095024, "grad_norm": 0.9063993692398071, "learning_rate": 0.001, "loss": 1.6485, "step": 57400 }, { "epoch": 18.58435681965094, "grad_norm": 1.0962148904800415, "learning_rate": 0.001, "loss": 1.6482, "step": 57500 }, { "epoch": 18.616677440206853, "grad_norm": 1.2267396450042725, "learning_rate": 0.001, "loss": 1.6524, "step": 57600 }, { "epoch": 18.648998060762768, "grad_norm": 1.106046199798584, "learning_rate": 0.001, "loss": 1.6619, "step": 57700 }, { "epoch": 18.681318681318682, "grad_norm": 1.151220679283142, "learning_rate": 0.001, "loss": 1.6654, "step": 57800 }, { "epoch": 18.713639301874597, "grad_norm": 1.0850752592086792, "learning_rate": 0.001, "loss": 1.6842, "step": 57900 }, { "epoch": 18.74595992243051, "grad_norm": 0.9457443952560425, "learning_rate": 0.001, "loss": 1.6871, "step": 58000 }, { "epoch": 18.778280542986426, "grad_norm": 1.0624374151229858, "learning_rate": 0.001, "loss": 1.6758, "step": 58100 }, { "epoch": 18.81060116354234, "grad_norm": 0.9737662076950073, "learning_rate": 0.001, "loss": 1.6667, "step": 58200 }, { "epoch": 18.842921784098255, "grad_norm": 1.0922777652740479, "learning_rate": 0.001, "loss": 1.6761, "step": 58300 }, { "epoch": 18.87524240465417, "grad_norm": 1.1318670511245728, "learning_rate": 0.001, "loss": 1.6959, "step": 58400 }, { "epoch": 18.907563025210084, "grad_norm": 1.0507032871246338, "learning_rate": 0.001, "loss": 1.7219, "step": 58500 }, { "epoch": 18.939883645766, "grad_norm": 1.085754632949829, "learning_rate": 0.001, "loss": 1.7005, "step": 58600 }, { "epoch": 18.972204266321913, "grad_norm": 1.124943733215332, "learning_rate": 0.001, "loss": 1.6993, "step": 58700 }, { "epoch": 19.004524886877828, "grad_norm": 0.8196804523468018, "learning_rate": 0.001, "loss": 1.685, "step": 58800 }, { "epoch": 19.036845507433743, "grad_norm": 1.1527674198150635, "learning_rate": 0.001, "loss": 1.4873, "step": 58900 }, { "epoch": 19.069166127989657, "grad_norm": 1.1686272621154785, "learning_rate": 0.001, "loss": 1.4985, "step": 59000 }, { "epoch": 19.10148674854557, "grad_norm": 1.1501728296279907, "learning_rate": 0.001, "loss": 1.5108, "step": 59100 }, { "epoch": 19.133807369101486, "grad_norm": 0.7028889656066895, "learning_rate": 0.001, "loss": 1.5277, "step": 59200 }, { "epoch": 19.1661279896574, "grad_norm": 1.0898555517196655, "learning_rate": 0.001, "loss": 1.5244, "step": 59300 }, { "epoch": 19.198448610213315, "grad_norm": 0.9707253575325012, "learning_rate": 0.001, "loss": 1.5416, "step": 59400 }, { "epoch": 19.23076923076923, "grad_norm": 0.9205765724182129, "learning_rate": 0.001, "loss": 1.5484, "step": 59500 }, { "epoch": 19.263089851325145, "grad_norm": 0.9760745167732239, "learning_rate": 0.001, "loss": 1.5623, "step": 59600 }, { "epoch": 19.29541047188106, "grad_norm": 1.1636168956756592, "learning_rate": 0.001, "loss": 1.5584, "step": 59700 }, { "epoch": 19.327731092436974, "grad_norm": 1.148553729057312, "learning_rate": 0.001, "loss": 1.5642, "step": 59800 }, { "epoch": 19.360051712992888, "grad_norm": 1.0770126581192017, "learning_rate": 0.001, "loss": 1.5786, "step": 59900 }, { "epoch": 19.392372333548803, "grad_norm": 1.0418332815170288, "learning_rate": 0.001, "loss": 1.5852, "step": 60000 }, { "epoch": 19.424692954104717, "grad_norm": 1.116357445716858, "learning_rate": 0.001, "loss": 1.5837, "step": 60100 }, { "epoch": 19.457013574660632, "grad_norm": 1.0376067161560059, "learning_rate": 0.001, "loss": 1.5745, "step": 60200 }, { "epoch": 19.489334195216546, "grad_norm": 1.1527152061462402, "learning_rate": 0.001, "loss": 1.5984, "step": 60300 }, { "epoch": 19.521654815772465, "grad_norm": 1.1605216264724731, "learning_rate": 0.001, "loss": 1.5935, "step": 60400 }, { "epoch": 19.55397543632838, "grad_norm": 0.7423362135887146, "learning_rate": 0.001, "loss": 1.5946, "step": 60500 }, { "epoch": 19.586296056884294, "grad_norm": 0.9806872010231018, "learning_rate": 0.001, "loss": 1.6018, "step": 60600 }, { "epoch": 19.618616677440208, "grad_norm": 1.0289793014526367, "learning_rate": 0.001, "loss": 1.6093, "step": 60700 }, { "epoch": 19.650937297996123, "grad_norm": 1.1556607484817505, "learning_rate": 0.001, "loss": 1.6189, "step": 60800 }, { "epoch": 19.683257918552037, "grad_norm": 1.3741674423217773, "learning_rate": 0.001, "loss": 1.6013, "step": 60900 }, { "epoch": 19.715578539107952, "grad_norm": 0.9011361002922058, "learning_rate": 0.001, "loss": 1.6185, "step": 61000 }, { "epoch": 19.747899159663866, "grad_norm": 1.1335152387619019, "learning_rate": 0.001, "loss": 1.6295, "step": 61100 }, { "epoch": 19.78021978021978, "grad_norm": 0.8284692764282227, "learning_rate": 0.001, "loss": 1.629, "step": 61200 }, { "epoch": 19.812540400775696, "grad_norm": 0.9114537239074707, "learning_rate": 0.001, "loss": 1.6228, "step": 61300 }, { "epoch": 19.84486102133161, "grad_norm": 1.3349568843841553, "learning_rate": 0.001, "loss": 1.6382, "step": 61400 }, { "epoch": 19.877181641887525, "grad_norm": 1.230270504951477, "learning_rate": 0.001, "loss": 1.6351, "step": 61500 }, { "epoch": 19.90950226244344, "grad_norm": 1.2369942665100098, "learning_rate": 0.001, "loss": 1.6392, "step": 61600 }, { "epoch": 19.941822882999354, "grad_norm": 0.8151253461837769, "learning_rate": 0.001, "loss": 1.6426, "step": 61700 }, { "epoch": 19.97414350355527, "grad_norm": 1.1438778638839722, "learning_rate": 0.001, "loss": 1.6541, "step": 61800 }, { "epoch": 20.006464124111183, "grad_norm": 0.9913794994354248, "learning_rate": 0.001, "loss": 1.6122, "step": 61900 }, { "epoch": 20.038784744667097, "grad_norm": 0.9743097424507141, "learning_rate": 0.001, "loss": 1.4537, "step": 62000 }, { "epoch": 20.071105365223012, "grad_norm": 1.1288503408432007, "learning_rate": 0.001, "loss": 1.472, "step": 62100 }, { "epoch": 20.103425985778927, "grad_norm": 1.294502854347229, "learning_rate": 0.001, "loss": 1.4587, "step": 62200 }, { "epoch": 20.13574660633484, "grad_norm": 1.037326693534851, "learning_rate": 0.001, "loss": 1.4856, "step": 62300 }, { "epoch": 20.168067226890756, "grad_norm": 1.0581369400024414, "learning_rate": 0.001, "loss": 1.4908, "step": 62400 }, { "epoch": 20.20038784744667, "grad_norm": 0.8377583026885986, "learning_rate": 0.001, "loss": 1.4862, "step": 62500 }, { "epoch": 20.232708468002585, "grad_norm": 1.0047152042388916, "learning_rate": 0.001, "loss": 1.4899, "step": 62600 }, { "epoch": 20.2650290885585, "grad_norm": 1.0214612483978271, "learning_rate": 0.001, "loss": 1.4951, "step": 62700 }, { "epoch": 20.297349709114414, "grad_norm": 1.0336310863494873, "learning_rate": 0.001, "loss": 1.518, "step": 62800 }, { "epoch": 20.32967032967033, "grad_norm": 0.9026684761047363, "learning_rate": 0.001, "loss": 1.5128, "step": 62900 }, { "epoch": 20.361990950226243, "grad_norm": 0.772000253200531, "learning_rate": 0.001, "loss": 1.5363, "step": 63000 }, { "epoch": 20.394311570782158, "grad_norm": 0.8923009634017944, "learning_rate": 0.001, "loss": 1.5094, "step": 63100 }, { "epoch": 20.426632191338072, "grad_norm": 1.060086727142334, "learning_rate": 0.001, "loss": 1.522, "step": 63200 }, { "epoch": 20.458952811893987, "grad_norm": 0.9212659597396851, "learning_rate": 0.001, "loss": 1.5227, "step": 63300 }, { "epoch": 20.4912734324499, "grad_norm": 1.1138205528259277, "learning_rate": 0.001, "loss": 1.5358, "step": 63400 }, { "epoch": 20.52359405300582, "grad_norm": 1.1013556718826294, "learning_rate": 0.001, "loss": 1.5289, "step": 63500 }, { "epoch": 20.555914673561734, "grad_norm": 0.9058528542518616, "learning_rate": 0.001, "loss": 1.5551, "step": 63600 }, { "epoch": 20.58823529411765, "grad_norm": 0.9466219544410706, "learning_rate": 0.001, "loss": 1.5533, "step": 63700 }, { "epoch": 20.620555914673563, "grad_norm": 1.0883358716964722, "learning_rate": 0.001, "loss": 1.552, "step": 63800 }, { "epoch": 20.652876535229478, "grad_norm": 1.0810259580612183, "learning_rate": 0.001, "loss": 1.5487, "step": 63900 }, { "epoch": 20.685197155785392, "grad_norm": 1.0904487371444702, "learning_rate": 0.001, "loss": 1.5562, "step": 64000 }, { "epoch": 20.717517776341307, "grad_norm": 0.9347714185714722, "learning_rate": 0.001, "loss": 1.5684, "step": 64100 }, { "epoch": 20.74983839689722, "grad_norm": 0.9312880635261536, "learning_rate": 0.001, "loss": 1.5794, "step": 64200 }, { "epoch": 20.782159017453136, "grad_norm": 0.9336447715759277, "learning_rate": 0.001, "loss": 1.569, "step": 64300 }, { "epoch": 20.81447963800905, "grad_norm": 1.2219997644424438, "learning_rate": 0.001, "loss": 1.5931, "step": 64400 }, { "epoch": 20.846800258564965, "grad_norm": 1.119971513748169, "learning_rate": 0.001, "loss": 1.5824, "step": 64500 }, { "epoch": 20.87912087912088, "grad_norm": 1.2351752519607544, "learning_rate": 0.001, "loss": 1.6108, "step": 64600 }, { "epoch": 20.911441499676794, "grad_norm": 1.048165202140808, "learning_rate": 0.001, "loss": 1.5908, "step": 64700 }, { "epoch": 20.94376212023271, "grad_norm": 1.0197266340255737, "learning_rate": 0.001, "loss": 1.6088, "step": 64800 }, { "epoch": 20.976082740788623, "grad_norm": 0.805881679058075, "learning_rate": 0.001, "loss": 1.6057, "step": 64900 }, { "epoch": 21.008403361344538, "grad_norm": 0.8987480401992798, "learning_rate": 0.001, "loss": 1.543, "step": 65000 }, { "epoch": 21.040723981900452, "grad_norm": 0.8974378108978271, "learning_rate": 0.001, "loss": 1.4066, "step": 65100 }, { "epoch": 21.073044602456367, "grad_norm": 1.323530912399292, "learning_rate": 0.001, "loss": 1.4129, "step": 65200 }, { "epoch": 21.10536522301228, "grad_norm": 1.1739469766616821, "learning_rate": 0.001, "loss": 1.4246, "step": 65300 }, { "epoch": 21.137685843568196, "grad_norm": 1.1534544229507446, "learning_rate": 0.001, "loss": 1.4379, "step": 65400 }, { "epoch": 21.17000646412411, "grad_norm": 1.2204139232635498, "learning_rate": 0.001, "loss": 1.4343, "step": 65500 }, { "epoch": 21.202327084680025, "grad_norm": 0.8088862299919128, "learning_rate": 0.001, "loss": 1.441, "step": 65600 }, { "epoch": 21.23464770523594, "grad_norm": 1.09054696559906, "learning_rate": 0.001, "loss": 1.4427, "step": 65700 }, { "epoch": 21.266968325791854, "grad_norm": 1.189874291419983, "learning_rate": 0.001, "loss": 1.4503, "step": 65800 }, { "epoch": 21.29928894634777, "grad_norm": 1.1406506299972534, "learning_rate": 0.001, "loss": 1.4645, "step": 65900 }, { "epoch": 21.331609566903683, "grad_norm": 0.966766893863678, "learning_rate": 0.001, "loss": 1.4816, "step": 66000 }, { "epoch": 21.363930187459598, "grad_norm": 1.0039381980895996, "learning_rate": 0.001, "loss": 1.4548, "step": 66100 }, { "epoch": 21.396250808015512, "grad_norm": 1.017235279083252, "learning_rate": 0.001, "loss": 1.4674, "step": 66200 }, { "epoch": 21.428571428571427, "grad_norm": 1.0016789436340332, "learning_rate": 0.001, "loss": 1.4841, "step": 66300 }, { "epoch": 21.46089204912734, "grad_norm": 1.1150920391082764, "learning_rate": 0.001, "loss": 1.4959, "step": 66400 }, { "epoch": 21.49321266968326, "grad_norm": 0.9548745155334473, "learning_rate": 0.001, "loss": 1.4753, "step": 66500 }, { "epoch": 21.525533290239174, "grad_norm": 1.000315546989441, "learning_rate": 0.001, "loss": 1.4674, "step": 66600 }, { "epoch": 21.55785391079509, "grad_norm": 1.0356563329696655, "learning_rate": 0.001, "loss": 1.5119, "step": 66700 }, { "epoch": 21.590174531351003, "grad_norm": 1.0894801616668701, "learning_rate": 0.001, "loss": 1.512, "step": 66800 }, { "epoch": 21.622495151906918, "grad_norm": 0.9347051978111267, "learning_rate": 0.001, "loss": 1.5113, "step": 66900 }, { "epoch": 21.654815772462833, "grad_norm": 0.8430983424186707, "learning_rate": 0.001, "loss": 1.5212, "step": 67000 }, { "epoch": 21.687136393018747, "grad_norm": 1.1729098558425903, "learning_rate": 0.001, "loss": 1.5288, "step": 67100 }, { "epoch": 21.71945701357466, "grad_norm": 0.9759130477905273, "learning_rate": 0.001, "loss": 1.539, "step": 67200 }, { "epoch": 21.751777634130576, "grad_norm": 1.1313591003417969, "learning_rate": 0.001, "loss": 1.5387, "step": 67300 }, { "epoch": 21.78409825468649, "grad_norm": 0.970131516456604, "learning_rate": 0.001, "loss": 1.5614, "step": 67400 }, { "epoch": 21.816418875242405, "grad_norm": 1.094679355621338, "learning_rate": 0.001, "loss": 1.5561, "step": 67500 }, { "epoch": 21.84873949579832, "grad_norm": 1.0493223667144775, "learning_rate": 0.001, "loss": 1.5455, "step": 67600 }, { "epoch": 21.881060116354234, "grad_norm": 0.9850541353225708, "learning_rate": 0.001, "loss": 1.5538, "step": 67700 }, { "epoch": 21.91338073691015, "grad_norm": 1.133143663406372, "learning_rate": 0.001, "loss": 1.5361, "step": 67800 }, { "epoch": 21.945701357466064, "grad_norm": 1.1763890981674194, "learning_rate": 0.001, "loss": 1.5432, "step": 67900 }, { "epoch": 21.978021978021978, "grad_norm": 0.9275557398796082, "learning_rate": 0.001, "loss": 1.5586, "step": 68000 }, { "epoch": 22.010342598577893, "grad_norm": 1.040199637413025, "learning_rate": 0.001, "loss": 1.485, "step": 68100 }, { "epoch": 22.042663219133807, "grad_norm": 1.283536434173584, "learning_rate": 0.001, "loss": 1.3573, "step": 68200 }, { "epoch": 22.07498383968972, "grad_norm": 1.013226866722107, "learning_rate": 0.001, "loss": 1.3566, "step": 68300 }, { "epoch": 22.107304460245636, "grad_norm": 1.2437382936477661, "learning_rate": 0.001, "loss": 1.376, "step": 68400 }, { "epoch": 22.13962508080155, "grad_norm": 1.019288420677185, "learning_rate": 0.001, "loss": 1.3824, "step": 68500 }, { "epoch": 22.171945701357465, "grad_norm": 1.2516907453536987, "learning_rate": 0.001, "loss": 1.3895, "step": 68600 }, { "epoch": 22.20426632191338, "grad_norm": 1.0436148643493652, "learning_rate": 0.001, "loss": 1.4035, "step": 68700 }, { "epoch": 22.236586942469295, "grad_norm": 1.0722150802612305, "learning_rate": 0.001, "loss": 1.4027, "step": 68800 }, { "epoch": 22.26890756302521, "grad_norm": 1.1059958934783936, "learning_rate": 0.001, "loss": 1.3992, "step": 68900 }, { "epoch": 22.301228183581124, "grad_norm": 0.9927516579627991, "learning_rate": 0.001, "loss": 1.4179, "step": 69000 }, { "epoch": 22.33354880413704, "grad_norm": 1.0695496797561646, "learning_rate": 0.001, "loss": 1.4082, "step": 69100 }, { "epoch": 22.365869424692953, "grad_norm": 0.916350781917572, "learning_rate": 0.001, "loss": 1.4451, "step": 69200 }, { "epoch": 22.398190045248867, "grad_norm": 1.093136191368103, "learning_rate": 0.001, "loss": 1.4456, "step": 69300 }, { "epoch": 22.430510665804782, "grad_norm": 1.0535207986831665, "learning_rate": 0.001, "loss": 1.4389, "step": 69400 }, { "epoch": 22.462831286360696, "grad_norm": 1.1754355430603027, "learning_rate": 0.001, "loss": 1.4387, "step": 69500 }, { "epoch": 22.49515190691661, "grad_norm": 1.1777504682540894, "learning_rate": 0.001, "loss": 1.4633, "step": 69600 }, { "epoch": 22.52747252747253, "grad_norm": 1.2048346996307373, "learning_rate": 0.001, "loss": 1.4496, "step": 69700 }, { "epoch": 22.559793148028444, "grad_norm": 1.249259114265442, "learning_rate": 0.001, "loss": 1.457, "step": 69800 }, { "epoch": 22.59211376858436, "grad_norm": 1.569291591644287, "learning_rate": 0.001, "loss": 1.4649, "step": 69900 }, { "epoch": 22.624434389140273, "grad_norm": 0.9100170731544495, "learning_rate": 0.001, "loss": 1.4706, "step": 70000 }, { "epoch": 22.656755009696187, "grad_norm": 1.29209566116333, "learning_rate": 0.001, "loss": 1.4663, "step": 70100 }, { "epoch": 22.689075630252102, "grad_norm": 1.2646560668945312, "learning_rate": 0.001, "loss": 1.4923, "step": 70200 }, { "epoch": 22.721396250808017, "grad_norm": 1.1903401613235474, "learning_rate": 0.001, "loss": 1.4915, "step": 70300 }, { "epoch": 22.75371687136393, "grad_norm": 1.4104832410812378, "learning_rate": 0.001, "loss": 1.481, "step": 70400 }, { "epoch": 22.786037491919846, "grad_norm": 1.1116533279418945, "learning_rate": 0.001, "loss": 1.4883, "step": 70500 }, { "epoch": 22.81835811247576, "grad_norm": 1.0137779712677002, "learning_rate": 0.001, "loss": 1.4909, "step": 70600 }, { "epoch": 22.850678733031675, "grad_norm": 1.223270058631897, "learning_rate": 0.001, "loss": 1.4846, "step": 70700 }, { "epoch": 22.88299935358759, "grad_norm": 1.0463052988052368, "learning_rate": 0.001, "loss": 1.5081, "step": 70800 }, { "epoch": 22.915319974143504, "grad_norm": 1.1443082094192505, "learning_rate": 0.001, "loss": 1.4983, "step": 70900 }, { "epoch": 22.94764059469942, "grad_norm": 1.2371140718460083, "learning_rate": 0.001, "loss": 1.4977, "step": 71000 }, { "epoch": 22.979961215255333, "grad_norm": 1.2574529647827148, "learning_rate": 0.001, "loss": 1.5084, "step": 71100 }, { "epoch": 23.012281835811248, "grad_norm": 1.1230933666229248, "learning_rate": 0.001, "loss": 1.4055, "step": 71200 }, { "epoch": 23.044602456367162, "grad_norm": 1.3886984586715698, "learning_rate": 0.001, "loss": 1.3259, "step": 71300 }, { "epoch": 23.076923076923077, "grad_norm": 1.0537104606628418, "learning_rate": 0.001, "loss": 1.315, "step": 71400 }, { "epoch": 23.10924369747899, "grad_norm": 1.4168047904968262, "learning_rate": 0.001, "loss": 1.3237, "step": 71500 }, { "epoch": 23.141564318034906, "grad_norm": 1.1438844203948975, "learning_rate": 0.001, "loss": 1.3395, "step": 71600 }, { "epoch": 23.17388493859082, "grad_norm": 1.0299094915390015, "learning_rate": 0.001, "loss": 1.343, "step": 71700 }, { "epoch": 23.206205559146735, "grad_norm": 1.0527406930923462, "learning_rate": 0.001, "loss": 1.3572, "step": 71800 }, { "epoch": 23.23852617970265, "grad_norm": 1.1058151721954346, "learning_rate": 0.001, "loss": 1.362, "step": 71900 }, { "epoch": 23.270846800258564, "grad_norm": 1.684592366218567, "learning_rate": 0.001, "loss": 1.3578, "step": 72000 }, { "epoch": 23.30316742081448, "grad_norm": 1.046174168586731, "learning_rate": 0.001, "loss": 1.371, "step": 72100 }, { "epoch": 23.335488041370393, "grad_norm": 1.2183332443237305, "learning_rate": 0.001, "loss": 1.3695, "step": 72200 }, { "epoch": 23.367808661926308, "grad_norm": 1.4460830688476562, "learning_rate": 0.001, "loss": 1.3809, "step": 72300 }, { "epoch": 23.400129282482222, "grad_norm": 1.3572590351104736, "learning_rate": 0.001, "loss": 1.3904, "step": 72400 }, { "epoch": 23.432449903038137, "grad_norm": 1.1535612344741821, "learning_rate": 0.001, "loss": 1.3792, "step": 72500 }, { "epoch": 23.46477052359405, "grad_norm": 1.208449125289917, "learning_rate": 0.001, "loss": 1.3839, "step": 72600 }, { "epoch": 23.49709114414997, "grad_norm": 1.3959027528762817, "learning_rate": 0.001, "loss": 1.3958, "step": 72700 }, { "epoch": 23.529411764705884, "grad_norm": 1.1388272047042847, "learning_rate": 0.001, "loss": 1.414, "step": 72800 }, { "epoch": 23.5617323852618, "grad_norm": 1.0771448612213135, "learning_rate": 0.001, "loss": 1.4082, "step": 72900 }, { "epoch": 23.594053005817713, "grad_norm": 1.0695377588272095, "learning_rate": 0.001, "loss": 1.4204, "step": 73000 }, { "epoch": 23.626373626373628, "grad_norm": 1.3739023208618164, "learning_rate": 0.001, "loss": 1.4197, "step": 73100 }, { "epoch": 23.658694246929542, "grad_norm": 1.3017525672912598, "learning_rate": 0.001, "loss": 1.4388, "step": 73200 }, { "epoch": 23.691014867485457, "grad_norm": 1.0027174949645996, "learning_rate": 0.001, "loss": 1.4295, "step": 73300 }, { "epoch": 23.72333548804137, "grad_norm": 1.24492347240448, "learning_rate": 0.001, "loss": 1.4506, "step": 73400 }, { "epoch": 23.755656108597286, "grad_norm": 1.2767709493637085, "learning_rate": 0.001, "loss": 1.4411, "step": 73500 }, { "epoch": 23.7879767291532, "grad_norm": 1.067986011505127, "learning_rate": 0.001, "loss": 1.4627, "step": 73600 }, { "epoch": 23.820297349709115, "grad_norm": 1.2810823917388916, "learning_rate": 0.001, "loss": 1.4388, "step": 73700 }, { "epoch": 23.85261797026503, "grad_norm": 1.2160966396331787, "learning_rate": 0.001, "loss": 1.45, "step": 73800 }, { "epoch": 23.884938590820944, "grad_norm": 1.150084376335144, "learning_rate": 0.001, "loss": 1.4455, "step": 73900 }, { "epoch": 23.91725921137686, "grad_norm": 1.461829423904419, "learning_rate": 0.001, "loss": 1.477, "step": 74000 }, { "epoch": 23.949579831932773, "grad_norm": 1.2498176097869873, "learning_rate": 0.001, "loss": 1.4618, "step": 74100 }, { "epoch": 23.981900452488688, "grad_norm": 1.094996452331543, "learning_rate": 0.001, "loss": 1.4653, "step": 74200 }, { "epoch": 24.014221073044602, "grad_norm": 1.0680311918258667, "learning_rate": 0.001, "loss": 1.374, "step": 74300 }, { "epoch": 24.046541693600517, "grad_norm": 1.501842975616455, "learning_rate": 0.001, "loss": 1.2727, "step": 74400 }, { "epoch": 24.07886231415643, "grad_norm": 1.3221544027328491, "learning_rate": 0.001, "loss": 1.2783, "step": 74500 }, { "epoch": 24.111182934712346, "grad_norm": 1.5800127983093262, "learning_rate": 0.001, "loss": 1.2852, "step": 74600 }, { "epoch": 24.14350355526826, "grad_norm": 1.010425090789795, "learning_rate": 0.001, "loss": 1.3149, "step": 74700 }, { "epoch": 24.175824175824175, "grad_norm": 1.1475272178649902, "learning_rate": 0.001, "loss": 1.3325, "step": 74800 }, { "epoch": 24.20814479638009, "grad_norm": 1.318882703781128, "learning_rate": 0.001, "loss": 1.302, "step": 74900 }, { "epoch": 24.240465416936004, "grad_norm": 1.1558727025985718, "learning_rate": 0.001, "loss": 1.3216, "step": 75000 }, { "epoch": 24.27278603749192, "grad_norm": 1.2066766023635864, "learning_rate": 0.001, "loss": 1.3327, "step": 75100 }, { "epoch": 24.305106658047833, "grad_norm": 1.556397795677185, "learning_rate": 0.001, "loss": 1.3357, "step": 75200 }, { "epoch": 24.337427278603748, "grad_norm": 1.1016446352005005, "learning_rate": 0.001, "loss": 1.3487, "step": 75300 }, { "epoch": 24.369747899159663, "grad_norm": 1.2599446773529053, "learning_rate": 0.001, "loss": 1.3405, "step": 75400 }, { "epoch": 24.402068519715577, "grad_norm": 1.5940327644348145, "learning_rate": 0.001, "loss": 1.3376, "step": 75500 }, { "epoch": 24.43438914027149, "grad_norm": 1.4542889595031738, "learning_rate": 0.001, "loss": 1.3444, "step": 75600 }, { "epoch": 24.466709760827406, "grad_norm": 1.3479986190795898, "learning_rate": 0.001, "loss": 1.3434, "step": 75700 }, { "epoch": 24.49903038138332, "grad_norm": 1.2094930410385132, "learning_rate": 0.001, "loss": 1.3654, "step": 75800 }, { "epoch": 24.53135100193924, "grad_norm": 1.3410935401916504, "learning_rate": 0.001, "loss": 1.3561, "step": 75900 }, { "epoch": 24.563671622495153, "grad_norm": 1.5417860746383667, "learning_rate": 0.001, "loss": 1.3866, "step": 76000 }, { "epoch": 24.595992243051068, "grad_norm": 1.5708967447280884, "learning_rate": 0.001, "loss": 1.3718, "step": 76100 }, { "epoch": 24.628312863606983, "grad_norm": 1.4419132471084595, "learning_rate": 0.001, "loss": 1.3725, "step": 76200 }, { "epoch": 24.660633484162897, "grad_norm": 1.2467317581176758, "learning_rate": 0.001, "loss": 1.3797, "step": 76300 }, { "epoch": 24.69295410471881, "grad_norm": 1.2097139358520508, "learning_rate": 0.001, "loss": 1.3985, "step": 76400 }, { "epoch": 24.725274725274726, "grad_norm": 1.7639493942260742, "learning_rate": 0.001, "loss": 1.3969, "step": 76500 }, { "epoch": 24.75759534583064, "grad_norm": 1.297530174255371, "learning_rate": 0.001, "loss": 1.4112, "step": 76600 }, { "epoch": 24.789915966386555, "grad_norm": 1.3182817697525024, "learning_rate": 0.001, "loss": 1.3942, "step": 76700 }, { "epoch": 24.82223658694247, "grad_norm": 1.274584412574768, "learning_rate": 0.001, "loss": 1.4026, "step": 76800 }, { "epoch": 24.854557207498384, "grad_norm": 1.0307163000106812, "learning_rate": 0.001, "loss": 1.4126, "step": 76900 }, { "epoch": 24.8868778280543, "grad_norm": 1.301167607307434, "learning_rate": 0.001, "loss": 1.4317, "step": 77000 }, { "epoch": 24.919198448610214, "grad_norm": 1.2866164445877075, "learning_rate": 0.001, "loss": 1.4193, "step": 77100 }, { "epoch": 24.951519069166128, "grad_norm": 1.2805465459823608, "learning_rate": 0.001, "loss": 1.4201, "step": 77200 }, { "epoch": 24.983839689722043, "grad_norm": 1.0741902589797974, "learning_rate": 0.001, "loss": 1.4212, "step": 77300 }, { "epoch": 25.016160310277957, "grad_norm": 1.5920143127441406, "learning_rate": 0.001, "loss": 1.2859, "step": 77400 }, { "epoch": 25.048480930833872, "grad_norm": 1.637182354927063, "learning_rate": 0.001, "loss": 1.263, "step": 77500 }, { "epoch": 25.080801551389786, "grad_norm": 1.4814125299453735, "learning_rate": 0.001, "loss": 1.2354, "step": 77600 }, { "epoch": 25.1131221719457, "grad_norm": 1.445595145225525, "learning_rate": 0.001, "loss": 1.2472, "step": 77700 }, { "epoch": 25.145442792501616, "grad_norm": 1.5496681928634644, "learning_rate": 0.001, "loss": 1.2577, "step": 77800 }, { "epoch": 25.17776341305753, "grad_norm": 1.2278058528900146, "learning_rate": 0.001, "loss": 1.2876, "step": 77900 }, { "epoch": 25.210084033613445, "grad_norm": 1.454778790473938, "learning_rate": 0.001, "loss": 1.2877, "step": 78000 }, { "epoch": 25.24240465416936, "grad_norm": 1.7783571481704712, "learning_rate": 0.001, "loss": 1.2856, "step": 78100 }, { "epoch": 25.274725274725274, "grad_norm": 1.9721797704696655, "learning_rate": 0.001, "loss": 1.2745, "step": 78200 }, { "epoch": 25.30704589528119, "grad_norm": 1.4700068235397339, "learning_rate": 0.001, "loss": 1.3014, "step": 78300 }, { "epoch": 25.339366515837103, "grad_norm": 1.4729541540145874, "learning_rate": 0.001, "loss": 1.2972, "step": 78400 }, { "epoch": 25.371687136393017, "grad_norm": 1.648840069770813, "learning_rate": 0.001, "loss": 1.3019, "step": 78500 }, { "epoch": 25.404007756948932, "grad_norm": 1.7799139022827148, "learning_rate": 0.001, "loss": 1.3123, "step": 78600 }, { "epoch": 25.436328377504847, "grad_norm": 1.3367239236831665, "learning_rate": 0.001, "loss": 1.3021, "step": 78700 }, { "epoch": 25.46864899806076, "grad_norm": 1.8208343982696533, "learning_rate": 0.001, "loss": 1.3211, "step": 78800 }, { "epoch": 25.50096961861668, "grad_norm": 1.535638689994812, "learning_rate": 0.001, "loss": 1.304, "step": 78900 }, { "epoch": 25.533290239172594, "grad_norm": 1.6062589883804321, "learning_rate": 0.001, "loss": 1.3474, "step": 79000 }, { "epoch": 25.56561085972851, "grad_norm": 1.499072790145874, "learning_rate": 0.001, "loss": 1.3299, "step": 79100 }, { "epoch": 25.597931480284423, "grad_norm": 1.9271340370178223, "learning_rate": 0.001, "loss": 1.3519, "step": 79200 }, { "epoch": 25.630252100840337, "grad_norm": 1.5534205436706543, "learning_rate": 0.001, "loss": 1.3256, "step": 79300 }, { "epoch": 25.662572721396252, "grad_norm": 1.6242579221725464, "learning_rate": 0.001, "loss": 1.3502, "step": 79400 }, { "epoch": 25.694893341952167, "grad_norm": 1.366665244102478, "learning_rate": 0.001, "loss": 1.3583, "step": 79500 }, { "epoch": 25.72721396250808, "grad_norm": 1.6597201824188232, "learning_rate": 0.001, "loss": 1.3511, "step": 79600 }, { "epoch": 25.759534583063996, "grad_norm": 1.3746975660324097, "learning_rate": 0.001, "loss": 1.3368, "step": 79700 }, { "epoch": 25.79185520361991, "grad_norm": 1.496278166770935, "learning_rate": 0.001, "loss": 1.3648, "step": 79800 }, { "epoch": 25.824175824175825, "grad_norm": 1.7412163019180298, "learning_rate": 0.001, "loss": 1.3511, "step": 79900 }, { "epoch": 25.85649644473174, "grad_norm": 1.6861299276351929, "learning_rate": 0.001, "loss": 1.3799, "step": 80000 }, { "epoch": 25.888817065287654, "grad_norm": 1.5833033323287964, "learning_rate": 0.001, "loss": 1.3645, "step": 80100 }, { "epoch": 25.92113768584357, "grad_norm": 1.5015044212341309, "learning_rate": 0.001, "loss": 1.3668, "step": 80200 }, { "epoch": 25.953458306399483, "grad_norm": 1.9954609870910645, "learning_rate": 0.001, "loss": 1.3749, "step": 80300 }, { "epoch": 25.985778926955398, "grad_norm": 1.4081565141677856, "learning_rate": 0.001, "loss": 1.3732, "step": 80400 }, { "epoch": 26.018099547511312, "grad_norm": 1.4699125289916992, "learning_rate": 0.001, "loss": 1.2887, "step": 80500 }, { "epoch": 26.050420168067227, "grad_norm": 0.9101908206939697, "learning_rate": 0.001, "loss": 1.1946, "step": 80600 }, { "epoch": 26.08274078862314, "grad_norm": 1.125428557395935, "learning_rate": 0.001, "loss": 1.208, "step": 80700 }, { "epoch": 26.115061409179056, "grad_norm": 1.20472252368927, "learning_rate": 0.001, "loss": 1.2274, "step": 80800 }, { "epoch": 26.14738202973497, "grad_norm": 1.2856500148773193, "learning_rate": 0.001, "loss": 1.2139, "step": 80900 }, { "epoch": 26.179702650290885, "grad_norm": 1.4371981620788574, "learning_rate": 0.001, "loss": 1.2145, "step": 81000 }, { "epoch": 26.2120232708468, "grad_norm": 1.0588725805282593, "learning_rate": 0.001, "loss": 1.2267, "step": 81100 }, { "epoch": 26.244343891402714, "grad_norm": 1.1403706073760986, "learning_rate": 0.001, "loss": 1.2378, "step": 81200 }, { "epoch": 26.27666451195863, "grad_norm": 1.1909862756729126, "learning_rate": 0.001, "loss": 1.2542, "step": 81300 }, { "epoch": 26.308985132514543, "grad_norm": 1.1355961561203003, "learning_rate": 0.001, "loss": 1.2556, "step": 81400 }, { "epoch": 26.341305753070458, "grad_norm": 1.253331184387207, "learning_rate": 0.001, "loss": 1.2709, "step": 81500 }, { "epoch": 26.373626373626372, "grad_norm": 1.317488431930542, "learning_rate": 0.001, "loss": 1.2582, "step": 81600 }, { "epoch": 26.405946994182287, "grad_norm": 1.0958579778671265, "learning_rate": 0.001, "loss": 1.2751, "step": 81700 }, { "epoch": 26.4382676147382, "grad_norm": 1.001457929611206, "learning_rate": 0.001, "loss": 1.2582, "step": 81800 }, { "epoch": 26.470588235294116, "grad_norm": 1.0963808298110962, "learning_rate": 0.001, "loss": 1.2701, "step": 81900 }, { "epoch": 26.50290885585003, "grad_norm": 1.261762022972107, "learning_rate": 0.001, "loss": 1.2868, "step": 82000 }, { "epoch": 26.53522947640595, "grad_norm": 1.094172477722168, "learning_rate": 0.001, "loss": 1.2781, "step": 82100 }, { "epoch": 26.567550096961863, "grad_norm": 1.1191786527633667, "learning_rate": 0.001, "loss": 1.2885, "step": 82200 }, { "epoch": 26.599870717517778, "grad_norm": 1.3089439868927002, "learning_rate": 0.001, "loss": 1.2962, "step": 82300 }, { "epoch": 26.632191338073692, "grad_norm": 0.9621067047119141, "learning_rate": 0.001, "loss": 1.3008, "step": 82400 }, { "epoch": 26.664511958629607, "grad_norm": 1.2809053659439087, "learning_rate": 0.001, "loss": 1.3121, "step": 82500 }, { "epoch": 26.69683257918552, "grad_norm": 1.125797986984253, "learning_rate": 0.001, "loss": 1.2999, "step": 82600 }, { "epoch": 26.729153199741436, "grad_norm": 1.3780908584594727, "learning_rate": 0.001, "loss": 1.3192, "step": 82700 }, { "epoch": 26.76147382029735, "grad_norm": 1.2585889101028442, "learning_rate": 0.001, "loss": 1.3113, "step": 82800 }, { "epoch": 26.793794440853265, "grad_norm": 0.9433250427246094, "learning_rate": 0.001, "loss": 1.32, "step": 82900 }, { "epoch": 26.82611506140918, "grad_norm": 1.3953701257705688, "learning_rate": 0.001, "loss": 1.3285, "step": 83000 }, { "epoch": 26.858435681965094, "grad_norm": 1.5540825128555298, "learning_rate": 0.001, "loss": 1.3615, "step": 83100 }, { "epoch": 26.89075630252101, "grad_norm": 1.2253857851028442, "learning_rate": 0.001, "loss": 1.3356, "step": 83200 }, { "epoch": 26.923076923076923, "grad_norm": 1.203088402748108, "learning_rate": 0.001, "loss": 1.3272, "step": 83300 }, { "epoch": 26.955397543632838, "grad_norm": 1.4294911623001099, "learning_rate": 0.001, "loss": 1.3356, "step": 83400 }, { "epoch": 26.987718164188752, "grad_norm": 1.2564371824264526, "learning_rate": 0.001, "loss": 1.3468, "step": 83500 }, { "epoch": 27.020038784744667, "grad_norm": 1.0057709217071533, "learning_rate": 0.001, "loss": 1.2465, "step": 83600 }, { "epoch": 27.05235940530058, "grad_norm": 1.23238205909729, "learning_rate": 0.001, "loss": 1.1518, "step": 83700 }, { "epoch": 27.084680025856496, "grad_norm": 1.1697194576263428, "learning_rate": 0.001, "loss": 1.1605, "step": 83800 }, { "epoch": 27.11700064641241, "grad_norm": 1.1245832443237305, "learning_rate": 0.001, "loss": 1.1897, "step": 83900 }, { "epoch": 27.149321266968325, "grad_norm": 1.0499457120895386, "learning_rate": 0.001, "loss": 1.1816, "step": 84000 }, { "epoch": 27.18164188752424, "grad_norm": 1.1450672149658203, "learning_rate": 0.001, "loss": 1.1806, "step": 84100 }, { "epoch": 27.213962508080154, "grad_norm": 1.2955071926116943, "learning_rate": 0.001, "loss": 1.2078, "step": 84200 }, { "epoch": 27.24628312863607, "grad_norm": 1.0418306589126587, "learning_rate": 0.001, "loss": 1.1855, "step": 84300 }, { "epoch": 27.278603749191983, "grad_norm": 1.3569614887237549, "learning_rate": 0.001, "loss": 1.2211, "step": 84400 }, { "epoch": 27.310924369747898, "grad_norm": 1.3948146104812622, "learning_rate": 0.001, "loss": 1.2193, "step": 84500 }, { "epoch": 27.343244990303813, "grad_norm": 1.1107206344604492, "learning_rate": 0.001, "loss": 1.2285, "step": 84600 }, { "epoch": 27.375565610859727, "grad_norm": 1.2725037336349487, "learning_rate": 0.001, "loss": 1.2285, "step": 84700 }, { "epoch": 27.40788623141564, "grad_norm": 1.153855562210083, "learning_rate": 0.001, "loss": 1.2255, "step": 84800 }, { "epoch": 27.440206851971556, "grad_norm": 0.8474412560462952, "learning_rate": 0.001, "loss": 1.2386, "step": 84900 }, { "epoch": 27.47252747252747, "grad_norm": 1.1903868913650513, "learning_rate": 0.001, "loss": 1.2472, "step": 85000 }, { "epoch": 27.50484809308339, "grad_norm": 1.0249782800674438, "learning_rate": 0.001, "loss": 1.2467, "step": 85100 }, { "epoch": 27.537168713639304, "grad_norm": 1.3111518621444702, "learning_rate": 0.001, "loss": 1.2523, "step": 85200 }, { "epoch": 27.569489334195218, "grad_norm": 0.8078916072845459, "learning_rate": 0.001, "loss": 1.26, "step": 85300 }, { "epoch": 27.601809954751133, "grad_norm": 1.0639981031417847, "learning_rate": 0.001, "loss": 1.2497, "step": 85400 }, { "epoch": 27.634130575307047, "grad_norm": 1.1080636978149414, "learning_rate": 0.001, "loss": 1.2663, "step": 85500 }, { "epoch": 27.66645119586296, "grad_norm": 0.9247094988822937, "learning_rate": 0.001, "loss": 1.2719, "step": 85600 }, { "epoch": 27.698771816418876, "grad_norm": 1.3015022277832031, "learning_rate": 0.001, "loss": 1.268, "step": 85700 }, { "epoch": 27.73109243697479, "grad_norm": 1.2387443780899048, "learning_rate": 0.001, "loss": 1.2641, "step": 85800 }, { "epoch": 27.763413057530705, "grad_norm": 1.0121554136276245, "learning_rate": 0.001, "loss": 1.2793, "step": 85900 }, { "epoch": 27.79573367808662, "grad_norm": 1.4396352767944336, "learning_rate": 0.001, "loss": 1.26, "step": 86000 }, { "epoch": 27.828054298642535, "grad_norm": 1.3812124729156494, "learning_rate": 0.001, "loss": 1.2812, "step": 86100 }, { "epoch": 27.86037491919845, "grad_norm": 1.072251796722412, "learning_rate": 0.001, "loss": 1.2902, "step": 86200 }, { "epoch": 27.892695539754364, "grad_norm": 1.041167140007019, "learning_rate": 0.001, "loss": 1.2868, "step": 86300 }, { "epoch": 27.92501616031028, "grad_norm": 1.3763902187347412, "learning_rate": 0.001, "loss": 1.2942, "step": 86400 }, { "epoch": 27.957336780866193, "grad_norm": 0.8273499011993408, "learning_rate": 0.001, "loss": 1.3093, "step": 86500 }, { "epoch": 27.989657401422107, "grad_norm": 0.8569942712783813, "learning_rate": 0.001, "loss": 1.3082, "step": 86600 }, { "epoch": 28.021978021978022, "grad_norm": 1.6358920335769653, "learning_rate": 0.001, "loss": 1.1964, "step": 86700 }, { "epoch": 28.054298642533936, "grad_norm": 1.0004743337631226, "learning_rate": 0.001, "loss": 1.1128, "step": 86800 }, { "epoch": 28.08661926308985, "grad_norm": 1.163028597831726, "learning_rate": 0.001, "loss": 1.1301, "step": 86900 }, { "epoch": 28.118939883645766, "grad_norm": 0.991905927658081, "learning_rate": 0.001, "loss": 1.1481, "step": 87000 }, { "epoch": 28.15126050420168, "grad_norm": 0.9166576266288757, "learning_rate": 0.001, "loss": 1.1396, "step": 87100 }, { "epoch": 28.183581124757595, "grad_norm": 1.2474740743637085, "learning_rate": 0.001, "loss": 1.1617, "step": 87200 }, { "epoch": 28.21590174531351, "grad_norm": 1.2966070175170898, "learning_rate": 0.001, "loss": 1.1586, "step": 87300 }, { "epoch": 28.248222365869424, "grad_norm": 1.1202948093414307, "learning_rate": 0.001, "loss": 1.1632, "step": 87400 }, { "epoch": 28.28054298642534, "grad_norm": 1.090155839920044, "learning_rate": 0.001, "loss": 1.1846, "step": 87500 }, { "epoch": 28.312863606981253, "grad_norm": 1.269773244857788, "learning_rate": 0.001, "loss": 1.1873, "step": 87600 }, { "epoch": 28.345184227537167, "grad_norm": 1.0590721368789673, "learning_rate": 0.001, "loss": 1.1649, "step": 87700 }, { "epoch": 28.377504848093082, "grad_norm": 1.083297610282898, "learning_rate": 0.001, "loss": 1.192, "step": 87800 }, { "epoch": 28.409825468648997, "grad_norm": 0.9660324454307556, "learning_rate": 0.001, "loss": 1.2088, "step": 87900 }, { "epoch": 28.44214608920491, "grad_norm": 0.9054110646247864, "learning_rate": 0.001, "loss": 1.1976, "step": 88000 }, { "epoch": 28.474466709760826, "grad_norm": 1.1105581521987915, "learning_rate": 0.001, "loss": 1.1832, "step": 88100 }, { "epoch": 28.50678733031674, "grad_norm": 1.3275364637374878, "learning_rate": 0.001, "loss": 1.2088, "step": 88200 }, { "epoch": 28.53910795087266, "grad_norm": 1.0542171001434326, "learning_rate": 0.001, "loss": 1.2133, "step": 88300 }, { "epoch": 28.571428571428573, "grad_norm": 0.87533038854599, "learning_rate": 0.001, "loss": 1.2057, "step": 88400 }, { "epoch": 28.603749191984488, "grad_norm": 0.9360105395317078, "learning_rate": 0.001, "loss": 1.2249, "step": 88500 }, { "epoch": 28.636069812540402, "grad_norm": 0.9831392765045166, "learning_rate": 0.001, "loss": 1.2151, "step": 88600 }, { "epoch": 28.668390433096317, "grad_norm": 1.0925713777542114, "learning_rate": 0.001, "loss": 1.2448, "step": 88700 }, { "epoch": 28.70071105365223, "grad_norm": 0.879048228263855, "learning_rate": 0.001, "loss": 1.216, "step": 88800 }, { "epoch": 28.733031674208146, "grad_norm": 1.4416413307189941, "learning_rate": 0.001, "loss": 1.2268, "step": 88900 }, { "epoch": 28.76535229476406, "grad_norm": 1.619722843170166, "learning_rate": 0.001, "loss": 1.2504, "step": 89000 }, { "epoch": 28.797672915319975, "grad_norm": 1.1090532541275024, "learning_rate": 0.001, "loss": 1.2477, "step": 89100 }, { "epoch": 28.82999353587589, "grad_norm": 1.1145672798156738, "learning_rate": 0.001, "loss": 1.2572, "step": 89200 }, { "epoch": 28.862314156431804, "grad_norm": 0.971638560295105, "learning_rate": 0.001, "loss": 1.2676, "step": 89300 }, { "epoch": 28.89463477698772, "grad_norm": 1.1659706830978394, "learning_rate": 0.001, "loss": 1.2602, "step": 89400 }, { "epoch": 28.926955397543633, "grad_norm": 0.9213696122169495, "learning_rate": 0.001, "loss": 1.2647, "step": 89500 }, { "epoch": 28.959276018099548, "grad_norm": 1.0233408212661743, "learning_rate": 0.001, "loss": 1.2674, "step": 89600 }, { "epoch": 28.991596638655462, "grad_norm": 1.0114099979400635, "learning_rate": 0.001, "loss": 1.2738, "step": 89700 }, { "epoch": 29.023917259211377, "grad_norm": 1.039438009262085, "learning_rate": 0.001, "loss": 1.1347, "step": 89800 }, { "epoch": 29.05623787976729, "grad_norm": 1.383270502090454, "learning_rate": 0.001, "loss": 1.0848, "step": 89900 }, { "epoch": 29.088558500323206, "grad_norm": 1.0938317775726318, "learning_rate": 0.001, "loss": 1.0924, "step": 90000 }, { "epoch": 29.12087912087912, "grad_norm": 1.2125070095062256, "learning_rate": 0.001, "loss": 1.0983, "step": 90100 }, { "epoch": 29.153199741435035, "grad_norm": 1.1463993787765503, "learning_rate": 0.001, "loss": 1.1264, "step": 90200 }, { "epoch": 29.18552036199095, "grad_norm": 1.4031280279159546, "learning_rate": 0.001, "loss": 1.1252, "step": 90300 }, { "epoch": 29.217840982546864, "grad_norm": 0.9598605036735535, "learning_rate": 0.001, "loss": 1.1183, "step": 90400 }, { "epoch": 29.25016160310278, "grad_norm": 1.2645177841186523, "learning_rate": 0.001, "loss": 1.1496, "step": 90500 }, { "epoch": 29.282482223658693, "grad_norm": 1.0181360244750977, "learning_rate": 0.001, "loss": 1.1413, "step": 90600 }, { "epoch": 29.314802844214608, "grad_norm": 1.2696707248687744, "learning_rate": 0.001, "loss": 1.1346, "step": 90700 }, { "epoch": 29.347123464770522, "grad_norm": 1.0124163627624512, "learning_rate": 0.001, "loss": 1.1493, "step": 90800 }, { "epoch": 29.379444085326437, "grad_norm": 1.348593831062317, "learning_rate": 0.001, "loss": 1.1467, "step": 90900 }, { "epoch": 29.41176470588235, "grad_norm": 1.2244763374328613, "learning_rate": 0.001, "loss": 1.1379, "step": 91000 }, { "epoch": 29.444085326438266, "grad_norm": 1.229876160621643, "learning_rate": 0.001, "loss": 1.1581, "step": 91100 }, { "epoch": 29.47640594699418, "grad_norm": 1.0367881059646606, "learning_rate": 0.001, "loss": 1.1661, "step": 91200 }, { "epoch": 29.5087265675501, "grad_norm": 1.4910993576049805, "learning_rate": 0.001, "loss": 1.1748, "step": 91300 }, { "epoch": 29.541047188106013, "grad_norm": 1.1240816116333008, "learning_rate": 0.001, "loss": 1.1898, "step": 91400 }, { "epoch": 29.573367808661928, "grad_norm": 1.0969679355621338, "learning_rate": 0.001, "loss": 1.1821, "step": 91500 }, { "epoch": 29.605688429217842, "grad_norm": 1.3014343976974487, "learning_rate": 0.001, "loss": 1.1861, "step": 91600 }, { "epoch": 29.638009049773757, "grad_norm": 0.852351725101471, "learning_rate": 0.001, "loss": 1.1915, "step": 91700 }, { "epoch": 29.67032967032967, "grad_norm": 0.8870136737823486, "learning_rate": 0.001, "loss": 1.1972, "step": 91800 }, { "epoch": 29.702650290885586, "grad_norm": 1.129637598991394, "learning_rate": 0.001, "loss": 1.2077, "step": 91900 }, { "epoch": 29.7349709114415, "grad_norm": 1.210863471031189, "learning_rate": 0.001, "loss": 1.1948, "step": 92000 }, { "epoch": 29.767291531997415, "grad_norm": 1.10159432888031, "learning_rate": 0.001, "loss": 1.2118, "step": 92100 }, { "epoch": 29.79961215255333, "grad_norm": 1.000765323638916, "learning_rate": 0.001, "loss": 1.2065, "step": 92200 }, { "epoch": 29.831932773109244, "grad_norm": 1.2256118059158325, "learning_rate": 0.001, "loss": 1.2094, "step": 92300 }, { "epoch": 29.86425339366516, "grad_norm": 1.0303242206573486, "learning_rate": 0.001, "loss": 1.2251, "step": 92400 }, { "epoch": 29.896574014221073, "grad_norm": 1.5883982181549072, "learning_rate": 0.001, "loss": 1.2179, "step": 92500 }, { "epoch": 29.928894634776988, "grad_norm": 1.179963231086731, "learning_rate": 0.001, "loss": 1.2288, "step": 92600 }, { "epoch": 29.961215255332903, "grad_norm": 0.95389723777771, "learning_rate": 0.001, "loss": 1.2296, "step": 92700 }, { "epoch": 29.993535875888817, "grad_norm": 1.052783489227295, "learning_rate": 0.001, "loss": 1.2375, "step": 92800 }, { "epoch": 30.02585649644473, "grad_norm": 1.0525052547454834, "learning_rate": 0.001, "loss": 1.0845, "step": 92900 }, { "epoch": 30.058177117000646, "grad_norm": 1.197313904762268, "learning_rate": 0.001, "loss": 1.0555, "step": 93000 }, { "epoch": 30.09049773755656, "grad_norm": 0.9721173048019409, "learning_rate": 0.001, "loss": 1.0587, "step": 93100 }, { "epoch": 30.122818358112475, "grad_norm": 1.1624667644500732, "learning_rate": 0.001, "loss": 1.07, "step": 93200 }, { "epoch": 30.15513897866839, "grad_norm": 1.4760102033615112, "learning_rate": 0.001, "loss": 1.086, "step": 93300 }, { "epoch": 30.187459599224304, "grad_norm": 0.9805911779403687, "learning_rate": 0.001, "loss": 1.0824, "step": 93400 }, { "epoch": 30.21978021978022, "grad_norm": 1.0840893983840942, "learning_rate": 0.001, "loss": 1.0937, "step": 93500 }, { "epoch": 30.252100840336134, "grad_norm": 1.2991894483566284, "learning_rate": 0.001, "loss": 1.0905, "step": 93600 }, { "epoch": 30.284421460892048, "grad_norm": 1.1039001941680908, "learning_rate": 0.001, "loss": 1.1143, "step": 93700 }, { "epoch": 30.316742081447963, "grad_norm": 0.9290472269058228, "learning_rate": 0.001, "loss": 1.1017, "step": 93800 }, { "epoch": 30.349062702003877, "grad_norm": 1.234883189201355, "learning_rate": 0.001, "loss": 1.1118, "step": 93900 }, { "epoch": 30.381383322559792, "grad_norm": 1.0684950351715088, "learning_rate": 0.001, "loss": 1.1207, "step": 94000 }, { "epoch": 30.413703943115706, "grad_norm": 1.065771460533142, "learning_rate": 0.001, "loss": 1.1167, "step": 94100 }, { "epoch": 30.44602456367162, "grad_norm": 1.1300389766693115, "learning_rate": 0.001, "loss": 1.1386, "step": 94200 }, { "epoch": 30.478345184227535, "grad_norm": 0.9842827916145325, "learning_rate": 0.001, "loss": 1.1402, "step": 94300 }, { "epoch": 30.51066580478345, "grad_norm": 1.1812926530838013, "learning_rate": 0.001, "loss": 1.149, "step": 94400 }, { "epoch": 30.542986425339368, "grad_norm": 1.3949733972549438, "learning_rate": 0.001, "loss": 1.1454, "step": 94500 }, { "epoch": 30.575307045895283, "grad_norm": 1.2021069526672363, "learning_rate": 0.001, "loss": 1.1596, "step": 94600 }, { "epoch": 30.607627666451197, "grad_norm": 1.3708795309066772, "learning_rate": 0.001, "loss": 1.1419, "step": 94700 }, { "epoch": 30.639948287007112, "grad_norm": 1.3510147333145142, "learning_rate": 0.001, "loss": 1.1555, "step": 94800 }, { "epoch": 30.672268907563026, "grad_norm": 1.262899398803711, "learning_rate": 0.001, "loss": 1.1711, "step": 94900 }, { "epoch": 30.70458952811894, "grad_norm": 1.3025704622268677, "learning_rate": 0.001, "loss": 1.1607, "step": 95000 }, { "epoch": 30.736910148674855, "grad_norm": 1.0963159799575806, "learning_rate": 0.001, "loss": 1.1625, "step": 95100 }, { "epoch": 30.76923076923077, "grad_norm": 1.3791433572769165, "learning_rate": 0.001, "loss": 1.1821, "step": 95200 }, { "epoch": 30.801551389786685, "grad_norm": 1.234837293624878, "learning_rate": 0.001, "loss": 1.172, "step": 95300 }, { "epoch": 30.8338720103426, "grad_norm": 1.2549554109573364, "learning_rate": 0.001, "loss": 1.1748, "step": 95400 }, { "epoch": 30.866192630898514, "grad_norm": 1.2774074077606201, "learning_rate": 0.001, "loss": 1.1882, "step": 95500 }, { "epoch": 30.89851325145443, "grad_norm": 1.0059778690338135, "learning_rate": 0.001, "loss": 1.1618, "step": 95600 }, { "epoch": 30.930833872010343, "grad_norm": 1.0495989322662354, "learning_rate": 0.001, "loss": 1.1937, "step": 95700 }, { "epoch": 30.963154492566257, "grad_norm": 1.2212027311325073, "learning_rate": 0.001, "loss": 1.1753, "step": 95800 }, { "epoch": 30.995475113122172, "grad_norm": 1.4093235731124878, "learning_rate": 0.001, "loss": 1.1818, "step": 95900 }, { "epoch": 31.027795733678087, "grad_norm": 1.3897080421447754, "learning_rate": 0.001, "loss": 1.063, "step": 96000 }, { "epoch": 31.060116354234, "grad_norm": 1.1094200611114502, "learning_rate": 0.001, "loss": 1.0117, "step": 96100 }, { "epoch": 31.092436974789916, "grad_norm": 1.5133776664733887, "learning_rate": 0.001, "loss": 1.0223, "step": 96200 }, { "epoch": 31.12475759534583, "grad_norm": 1.0746722221374512, "learning_rate": 0.001, "loss": 1.0374, "step": 96300 }, { "epoch": 31.157078215901745, "grad_norm": 1.0891294479370117, "learning_rate": 0.001, "loss": 1.0397, "step": 96400 }, { "epoch": 31.18939883645766, "grad_norm": 1.2259798049926758, "learning_rate": 0.001, "loss": 1.0539, "step": 96500 }, { "epoch": 31.221719457013574, "grad_norm": 1.1960943937301636, "learning_rate": 0.001, "loss": 1.0502, "step": 96600 }, { "epoch": 31.25404007756949, "grad_norm": 1.1311591863632202, "learning_rate": 0.001, "loss": 1.0632, "step": 96700 }, { "epoch": 31.286360698125403, "grad_norm": 1.1033596992492676, "learning_rate": 0.001, "loss": 1.0726, "step": 96800 }, { "epoch": 31.318681318681318, "grad_norm": 1.1648850440979004, "learning_rate": 0.001, "loss": 1.0888, "step": 96900 }, { "epoch": 31.351001939237232, "grad_norm": 1.3344072103500366, "learning_rate": 0.001, "loss": 1.0892, "step": 97000 }, { "epoch": 31.383322559793147, "grad_norm": 1.0022096633911133, "learning_rate": 0.001, "loss": 1.0916, "step": 97100 }, { "epoch": 31.41564318034906, "grad_norm": 1.1831748485565186, "learning_rate": 0.001, "loss": 1.0789, "step": 97200 }, { "epoch": 31.447963800904976, "grad_norm": 0.9473108053207397, "learning_rate": 0.001, "loss": 1.1024, "step": 97300 }, { "epoch": 31.48028442146089, "grad_norm": 1.2230292558670044, "learning_rate": 0.001, "loss": 1.1002, "step": 97400 }, { "epoch": 31.51260504201681, "grad_norm": 1.2893807888031006, "learning_rate": 0.001, "loss": 1.1161, "step": 97500 }, { "epoch": 31.544925662572723, "grad_norm": 1.1164630651474, "learning_rate": 0.001, "loss": 1.1116, "step": 97600 }, { "epoch": 31.577246283128638, "grad_norm": 1.2065080404281616, "learning_rate": 0.001, "loss": 1.1109, "step": 97700 }, { "epoch": 31.609566903684552, "grad_norm": 0.9655341506004333, "learning_rate": 0.001, "loss": 1.126, "step": 97800 }, { "epoch": 31.641887524240467, "grad_norm": 1.22157883644104, "learning_rate": 0.001, "loss": 1.1174, "step": 97900 }, { "epoch": 31.67420814479638, "grad_norm": 1.0583332777023315, "learning_rate": 0.001, "loss": 1.1371, "step": 98000 }, { "epoch": 31.706528765352296, "grad_norm": 1.1224162578582764, "learning_rate": 0.001, "loss": 1.1251, "step": 98100 }, { "epoch": 31.73884938590821, "grad_norm": 1.3786228895187378, "learning_rate": 0.001, "loss": 1.1385, "step": 98200 }, { "epoch": 31.771170006464125, "grad_norm": 1.0258780717849731, "learning_rate": 0.001, "loss": 1.1371, "step": 98300 }, { "epoch": 31.80349062702004, "grad_norm": 1.2368652820587158, "learning_rate": 0.001, "loss": 1.1487, "step": 98400 }, { "epoch": 31.835811247575954, "grad_norm": 1.310425877571106, "learning_rate": 0.001, "loss": 1.1435, "step": 98500 }, { "epoch": 31.86813186813187, "grad_norm": 1.394752025604248, "learning_rate": 0.001, "loss": 1.1653, "step": 98600 }, { "epoch": 31.900452488687783, "grad_norm": 1.1370538473129272, "learning_rate": 0.001, "loss": 1.1491, "step": 98700 }, { "epoch": 31.932773109243698, "grad_norm": 1.4837526082992554, "learning_rate": 0.001, "loss": 1.1581, "step": 98800 }, { "epoch": 31.965093729799612, "grad_norm": 1.0629994869232178, "learning_rate": 0.001, "loss": 1.1481, "step": 98900 }, { "epoch": 31.997414350355527, "grad_norm": 1.4832051992416382, "learning_rate": 0.001, "loss": 1.1519, "step": 99000 }, { "epoch": 32.02973497091144, "grad_norm": 1.11814546585083, "learning_rate": 0.001, "loss": 1.0001, "step": 99100 }, { "epoch": 32.062055591467356, "grad_norm": 1.2712990045547485, "learning_rate": 0.001, "loss": 0.9871, "step": 99200 }, { "epoch": 32.09437621202327, "grad_norm": 1.2008378505706787, "learning_rate": 0.001, "loss": 1.014, "step": 99300 }, { "epoch": 32.126696832579185, "grad_norm": 1.1301990747451782, "learning_rate": 0.001, "loss": 1.0068, "step": 99400 }, { "epoch": 32.1590174531351, "grad_norm": 1.0728790760040283, "learning_rate": 0.001, "loss": 1.0166, "step": 99500 }, { "epoch": 32.191338073691014, "grad_norm": 1.0921303033828735, "learning_rate": 0.001, "loss": 1.0306, "step": 99600 }, { "epoch": 32.22365869424693, "grad_norm": 1.2775609493255615, "learning_rate": 0.001, "loss": 1.0246, "step": 99700 }, { "epoch": 32.25597931480284, "grad_norm": 1.38579523563385, "learning_rate": 0.001, "loss": 1.028, "step": 99800 }, { "epoch": 32.28829993535876, "grad_norm": 1.011831283569336, "learning_rate": 0.001, "loss": 1.037, "step": 99900 }, { "epoch": 32.32062055591467, "grad_norm": 1.125707745552063, "learning_rate": 0.001, "loss": 1.0336, "step": 100000 }, { "epoch": 32.35294117647059, "grad_norm": 1.22984778881073, "learning_rate": 0.001, "loss": 1.0517, "step": 100100 }, { "epoch": 32.3852617970265, "grad_norm": 1.1265634298324585, "learning_rate": 0.001, "loss": 1.0506, "step": 100200 }, { "epoch": 32.417582417582416, "grad_norm": 1.173170566558838, "learning_rate": 0.001, "loss": 1.0649, "step": 100300 }, { "epoch": 32.44990303813833, "grad_norm": 1.3880183696746826, "learning_rate": 0.001, "loss": 1.0609, "step": 100400 }, { "epoch": 32.482223658694245, "grad_norm": 1.3186455965042114, "learning_rate": 0.001, "loss": 1.0727, "step": 100500 }, { "epoch": 32.51454427925016, "grad_norm": 1.3318346738815308, "learning_rate": 0.001, "loss": 1.0773, "step": 100600 }, { "epoch": 32.546864899806074, "grad_norm": 1.4159512519836426, "learning_rate": 0.001, "loss": 1.063, "step": 100700 }, { "epoch": 32.57918552036199, "grad_norm": 1.1840486526489258, "learning_rate": 0.001, "loss": 1.0905, "step": 100800 }, { "epoch": 32.6115061409179, "grad_norm": 1.3723328113555908, "learning_rate": 0.001, "loss": 1.0947, "step": 100900 }, { "epoch": 32.64382676147382, "grad_norm": 1.252594232559204, "learning_rate": 0.001, "loss": 1.1027, "step": 101000 }, { "epoch": 32.67614738202973, "grad_norm": 1.3631998300552368, "learning_rate": 0.001, "loss": 1.0952, "step": 101100 }, { "epoch": 32.70846800258565, "grad_norm": 1.2055110931396484, "learning_rate": 0.001, "loss": 1.1028, "step": 101200 }, { "epoch": 32.74078862314156, "grad_norm": 1.1945734024047852, "learning_rate": 0.001, "loss": 1.1086, "step": 101300 }, { "epoch": 32.773109243697476, "grad_norm": 1.0929081439971924, "learning_rate": 0.001, "loss": 1.1045, "step": 101400 }, { "epoch": 32.80542986425339, "grad_norm": 1.2250012159347534, "learning_rate": 0.001, "loss": 1.111, "step": 101500 }, { "epoch": 32.837750484809305, "grad_norm": 1.1387169361114502, "learning_rate": 0.001, "loss": 1.1224, "step": 101600 }, { "epoch": 32.87007110536522, "grad_norm": 1.329113245010376, "learning_rate": 0.001, "loss": 1.1198, "step": 101700 }, { "epoch": 32.902391725921134, "grad_norm": 1.1801583766937256, "learning_rate": 0.001, "loss": 1.1186, "step": 101800 }, { "epoch": 32.93471234647705, "grad_norm": 1.4228872060775757, "learning_rate": 0.001, "loss": 1.1176, "step": 101900 }, { "epoch": 32.967032967032964, "grad_norm": 1.2361241579055786, "learning_rate": 0.001, "loss": 1.1195, "step": 102000 }, { "epoch": 32.999353587588885, "grad_norm": 1.18082594871521, "learning_rate": 0.001, "loss": 1.1224, "step": 102100 }, { "epoch": 33.0316742081448, "grad_norm": 1.5248245000839233, "learning_rate": 0.001, "loss": 0.9501, "step": 102200 }, { "epoch": 33.063994828700714, "grad_norm": 1.400156855583191, "learning_rate": 0.001, "loss": 0.9621, "step": 102300 }, { "epoch": 33.09631544925663, "grad_norm": 1.1114094257354736, "learning_rate": 0.001, "loss": 0.9721, "step": 102400 }, { "epoch": 33.12863606981254, "grad_norm": 1.5030548572540283, "learning_rate": 0.001, "loss": 0.9872, "step": 102500 }, { "epoch": 33.16095669036846, "grad_norm": 1.2758039236068726, "learning_rate": 0.001, "loss": 0.9961, "step": 102600 }, { "epoch": 33.19327731092437, "grad_norm": 1.3599331378936768, "learning_rate": 0.001, "loss": 0.9971, "step": 102700 }, { "epoch": 33.22559793148029, "grad_norm": 1.206902265548706, "learning_rate": 0.001, "loss": 0.9982, "step": 102800 }, { "epoch": 33.2579185520362, "grad_norm": 1.2418851852416992, "learning_rate": 0.001, "loss": 0.9859, "step": 102900 }, { "epoch": 33.290239172592116, "grad_norm": 1.3933645486831665, "learning_rate": 0.001, "loss": 1.0115, "step": 103000 }, { "epoch": 33.32255979314803, "grad_norm": 1.371748685836792, "learning_rate": 0.001, "loss": 1.0016, "step": 103100 }, { "epoch": 33.354880413703945, "grad_norm": 1.4530311822891235, "learning_rate": 0.001, "loss": 1.0195, "step": 103200 }, { "epoch": 33.38720103425986, "grad_norm": 1.4381709098815918, "learning_rate": 0.001, "loss": 1.0253, "step": 103300 }, { "epoch": 33.419521654815775, "grad_norm": 1.274289846420288, "learning_rate": 0.001, "loss": 1.0193, "step": 103400 }, { "epoch": 33.45184227537169, "grad_norm": 1.3395988941192627, "learning_rate": 0.001, "loss": 1.0287, "step": 103500 }, { "epoch": 33.484162895927604, "grad_norm": 1.6195616722106934, "learning_rate": 0.001, "loss": 1.0194, "step": 103600 }, { "epoch": 33.51648351648352, "grad_norm": 1.5956976413726807, "learning_rate": 0.001, "loss": 1.0482, "step": 103700 }, { "epoch": 33.54880413703943, "grad_norm": 1.384737491607666, "learning_rate": 0.001, "loss": 1.0506, "step": 103800 }, { "epoch": 33.58112475759535, "grad_norm": 1.3139123916625977, "learning_rate": 0.001, "loss": 1.0527, "step": 103900 }, { "epoch": 33.61344537815126, "grad_norm": 1.3733874559402466, "learning_rate": 0.001, "loss": 1.0569, "step": 104000 }, { "epoch": 33.645765998707176, "grad_norm": 1.4283957481384277, "learning_rate": 0.001, "loss": 1.0697, "step": 104100 }, { "epoch": 33.67808661926309, "grad_norm": 1.1641042232513428, "learning_rate": 0.001, "loss": 1.0708, "step": 104200 }, { "epoch": 33.710407239819006, "grad_norm": 1.6381504535675049, "learning_rate": 0.001, "loss": 1.06, "step": 104300 }, { "epoch": 33.74272786037492, "grad_norm": 1.3026893138885498, "learning_rate": 0.001, "loss": 1.0676, "step": 104400 }, { "epoch": 33.775048480930835, "grad_norm": 1.3498927354812622, "learning_rate": 0.001, "loss": 1.0872, "step": 104500 }, { "epoch": 33.80736910148675, "grad_norm": 1.3915456533432007, "learning_rate": 0.001, "loss": 1.0869, "step": 104600 }, { "epoch": 33.839689722042664, "grad_norm": 1.4343451261520386, "learning_rate": 0.001, "loss": 1.0943, "step": 104700 }, { "epoch": 33.87201034259858, "grad_norm": 1.5240042209625244, "learning_rate": 0.001, "loss": 1.0904, "step": 104800 }, { "epoch": 33.90433096315449, "grad_norm": 1.1961963176727295, "learning_rate": 0.001, "loss": 1.1052, "step": 104900 }, { "epoch": 33.93665158371041, "grad_norm": 1.4134830236434937, "learning_rate": 0.001, "loss": 1.0978, "step": 105000 }, { "epoch": 33.96897220426632, "grad_norm": 1.376602292060852, "learning_rate": 0.001, "loss": 1.0954, "step": 105100 }, { "epoch": 34.00129282482224, "grad_norm": 1.0264028310775757, "learning_rate": 0.001, "loss": 1.0842, "step": 105200 }, { "epoch": 34.03361344537815, "grad_norm": 1.1075730323791504, "learning_rate": 0.001, "loss": 0.9345, "step": 105300 }, { "epoch": 34.065934065934066, "grad_norm": 1.3032543659210205, "learning_rate": 0.001, "loss": 0.9431, "step": 105400 }, { "epoch": 34.09825468648998, "grad_norm": 1.2016291618347168, "learning_rate": 0.001, "loss": 0.9469, "step": 105500 }, { "epoch": 34.130575307045895, "grad_norm": 1.2190241813659668, "learning_rate": 0.001, "loss": 0.9471, "step": 105600 }, { "epoch": 34.16289592760181, "grad_norm": 1.3855435848236084, "learning_rate": 0.001, "loss": 0.9571, "step": 105700 }, { "epoch": 34.195216548157724, "grad_norm": 1.2441275119781494, "learning_rate": 0.001, "loss": 0.9507, "step": 105800 }, { "epoch": 34.22753716871364, "grad_norm": 1.170262098312378, "learning_rate": 0.001, "loss": 0.9649, "step": 105900 }, { "epoch": 34.25985778926955, "grad_norm": 1.1690294742584229, "learning_rate": 0.001, "loss": 0.9819, "step": 106000 }, { "epoch": 34.29217840982547, "grad_norm": 1.3831874132156372, "learning_rate": 0.001, "loss": 0.9721, "step": 106100 }, { "epoch": 34.32449903038138, "grad_norm": 1.336683988571167, "learning_rate": 0.001, "loss": 0.9852, "step": 106200 }, { "epoch": 34.3568196509373, "grad_norm": 1.281592845916748, "learning_rate": 0.001, "loss": 1.0011, "step": 106300 }, { "epoch": 34.38914027149321, "grad_norm": 1.105830430984497, "learning_rate": 0.001, "loss": 0.993, "step": 106400 }, { "epoch": 34.421460892049126, "grad_norm": 1.1887004375457764, "learning_rate": 0.001, "loss": 1.0018, "step": 106500 }, { "epoch": 34.45378151260504, "grad_norm": 1.2523738145828247, "learning_rate": 0.001, "loss": 1.012, "step": 106600 }, { "epoch": 34.486102133160955, "grad_norm": 1.4884276390075684, "learning_rate": 0.001, "loss": 1.0152, "step": 106700 }, { "epoch": 34.51842275371687, "grad_norm": 1.0027998685836792, "learning_rate": 0.001, "loss": 1.0031, "step": 106800 }, { "epoch": 34.550743374272784, "grad_norm": 1.082810878753662, "learning_rate": 0.001, "loss": 1.021, "step": 106900 }, { "epoch": 34.5830639948287, "grad_norm": 0.9935569167137146, "learning_rate": 0.001, "loss": 1.0117, "step": 107000 }, { "epoch": 34.61538461538461, "grad_norm": 1.2386692762374878, "learning_rate": 0.001, "loss": 1.0267, "step": 107100 }, { "epoch": 34.64770523594053, "grad_norm": 1.1848431825637817, "learning_rate": 0.001, "loss": 1.0393, "step": 107200 }, { "epoch": 34.68002585649644, "grad_norm": 1.3582178354263306, "learning_rate": 0.001, "loss": 1.0382, "step": 107300 }, { "epoch": 34.71234647705236, "grad_norm": 1.109234094619751, "learning_rate": 0.001, "loss": 1.0459, "step": 107400 }, { "epoch": 34.74466709760827, "grad_norm": 1.3687392473220825, "learning_rate": 0.001, "loss": 1.0563, "step": 107500 }, { "epoch": 34.776987718164186, "grad_norm": 1.0531941652297974, "learning_rate": 0.001, "loss": 1.0501, "step": 107600 }, { "epoch": 34.8093083387201, "grad_norm": 1.332018494606018, "learning_rate": 0.001, "loss": 1.06, "step": 107700 }, { "epoch": 34.841628959276015, "grad_norm": 1.750568151473999, "learning_rate": 0.001, "loss": 1.0573, "step": 107800 }, { "epoch": 34.87394957983193, "grad_norm": 1.3103808164596558, "learning_rate": 0.001, "loss": 1.0557, "step": 107900 }, { "epoch": 34.906270200387844, "grad_norm": 1.301449179649353, "learning_rate": 0.001, "loss": 1.0603, "step": 108000 }, { "epoch": 34.93859082094376, "grad_norm": 1.2391163110733032, "learning_rate": 0.001, "loss": 1.0614, "step": 108100 }, { "epoch": 34.97091144149967, "grad_norm": 1.3433541059494019, "learning_rate": 0.001, "loss": 1.0633, "step": 108200 }, { "epoch": 35.003232062055595, "grad_norm": 1.3684951066970825, "learning_rate": 0.001, "loss": 1.0462, "step": 108300 }, { "epoch": 35.03555268261151, "grad_norm": 1.006211280822754, "learning_rate": 0.001, "loss": 0.9089, "step": 108400 }, { "epoch": 35.067873303167424, "grad_norm": 1.4141626358032227, "learning_rate": 0.001, "loss": 0.9201, "step": 108500 }, { "epoch": 35.10019392372334, "grad_norm": 1.0503674745559692, "learning_rate": 0.001, "loss": 0.905, "step": 108600 }, { "epoch": 35.13251454427925, "grad_norm": 1.2198293209075928, "learning_rate": 0.001, "loss": 0.914, "step": 108700 }, { "epoch": 35.16483516483517, "grad_norm": 1.1083400249481201, "learning_rate": 0.001, "loss": 0.945, "step": 108800 }, { "epoch": 35.19715578539108, "grad_norm": 1.2310866117477417, "learning_rate": 0.001, "loss": 0.9449, "step": 108900 }, { "epoch": 35.229476405947, "grad_norm": 1.3814125061035156, "learning_rate": 0.001, "loss": 0.9245, "step": 109000 }, { "epoch": 35.26179702650291, "grad_norm": 1.1721749305725098, "learning_rate": 0.001, "loss": 0.934, "step": 109100 }, { "epoch": 35.294117647058826, "grad_norm": 1.3420509099960327, "learning_rate": 0.001, "loss": 0.9458, "step": 109200 }, { "epoch": 35.32643826761474, "grad_norm": 1.0824869871139526, "learning_rate": 0.001, "loss": 0.9729, "step": 109300 }, { "epoch": 35.358758888170655, "grad_norm": 1.1203672885894775, "learning_rate": 0.001, "loss": 0.9752, "step": 109400 }, { "epoch": 35.39107950872657, "grad_norm": 1.0973600149154663, "learning_rate": 0.001, "loss": 0.9803, "step": 109500 }, { "epoch": 35.423400129282484, "grad_norm": 1.017338514328003, "learning_rate": 0.001, "loss": 0.9822, "step": 109600 }, { "epoch": 35.4557207498384, "grad_norm": 1.2218737602233887, "learning_rate": 0.001, "loss": 0.9879, "step": 109700 }, { "epoch": 35.48804137039431, "grad_norm": 0.9754787087440491, "learning_rate": 0.001, "loss": 0.9901, "step": 109800 }, { "epoch": 35.52036199095023, "grad_norm": 1.4641193151474, "learning_rate": 0.001, "loss": 0.9782, "step": 109900 }, { "epoch": 35.55268261150614, "grad_norm": 1.2894172668457031, "learning_rate": 0.001, "loss": 0.9825, "step": 110000 }, { "epoch": 35.58500323206206, "grad_norm": 1.1388012170791626, "learning_rate": 0.001, "loss": 0.9922, "step": 110100 }, { "epoch": 35.61732385261797, "grad_norm": 1.0259464979171753, "learning_rate": 0.001, "loss": 1.0041, "step": 110200 }, { "epoch": 35.649644473173886, "grad_norm": 1.0238407850265503, "learning_rate": 0.001, "loss": 1.002, "step": 110300 }, { "epoch": 35.6819650937298, "grad_norm": 1.423805832862854, "learning_rate": 0.001, "loss": 1.0068, "step": 110400 }, { "epoch": 35.714285714285715, "grad_norm": 1.193523645401001, "learning_rate": 0.001, "loss": 1.0022, "step": 110500 }, { "epoch": 35.74660633484163, "grad_norm": 1.086155891418457, "learning_rate": 0.001, "loss": 1.0245, "step": 110600 }, { "epoch": 35.778926955397544, "grad_norm": 1.0271650552749634, "learning_rate": 0.001, "loss": 1.0132, "step": 110700 }, { "epoch": 35.81124757595346, "grad_norm": 1.0137742757797241, "learning_rate": 0.001, "loss": 1.0176, "step": 110800 }, { "epoch": 35.84356819650937, "grad_norm": 1.2097448110580444, "learning_rate": 0.001, "loss": 1.0214, "step": 110900 }, { "epoch": 35.87588881706529, "grad_norm": 1.1151520013809204, "learning_rate": 0.001, "loss": 1.03, "step": 111000 }, { "epoch": 35.9082094376212, "grad_norm": 1.2367217540740967, "learning_rate": 0.001, "loss": 1.0288, "step": 111100 }, { "epoch": 35.94053005817712, "grad_norm": 1.291856288909912, "learning_rate": 0.001, "loss": 1.0458, "step": 111200 }, { "epoch": 35.97285067873303, "grad_norm": 0.9653595685958862, "learning_rate": 0.001, "loss": 1.043, "step": 111300 }, { "epoch": 36.005171299288946, "grad_norm": 1.006295084953308, "learning_rate": 0.001, "loss": 1.0201, "step": 111400 }, { "epoch": 36.03749191984486, "grad_norm": 0.9942673444747925, "learning_rate": 0.001, "loss": 0.8677, "step": 111500 }, { "epoch": 36.069812540400775, "grad_norm": 1.2639588117599487, "learning_rate": 0.001, "loss": 0.8853, "step": 111600 }, { "epoch": 36.10213316095669, "grad_norm": 1.2310287952423096, "learning_rate": 0.001, "loss": 0.9118, "step": 111700 }, { "epoch": 36.134453781512605, "grad_norm": 1.0878742933273315, "learning_rate": 0.001, "loss": 0.8903, "step": 111800 }, { "epoch": 36.16677440206852, "grad_norm": 1.389157772064209, "learning_rate": 0.001, "loss": 0.9087, "step": 111900 }, { "epoch": 36.199095022624434, "grad_norm": 0.9730111360549927, "learning_rate": 0.001, "loss": 0.9021, "step": 112000 }, { "epoch": 36.23141564318035, "grad_norm": 1.0636861324310303, "learning_rate": 0.001, "loss": 0.91, "step": 112100 }, { "epoch": 36.26373626373626, "grad_norm": 1.273271083831787, "learning_rate": 0.001, "loss": 0.9278, "step": 112200 }, { "epoch": 36.29605688429218, "grad_norm": 1.1767730712890625, "learning_rate": 0.001, "loss": 0.925, "step": 112300 }, { "epoch": 36.32837750484809, "grad_norm": 1.3249826431274414, "learning_rate": 0.001, "loss": 0.9198, "step": 112400 }, { "epoch": 36.36069812540401, "grad_norm": 1.2098668813705444, "learning_rate": 0.001, "loss": 0.9343, "step": 112500 }, { "epoch": 36.39301874595992, "grad_norm": 0.8858689069747925, "learning_rate": 0.001, "loss": 0.9435, "step": 112600 }, { "epoch": 36.425339366515836, "grad_norm": 1.0021246671676636, "learning_rate": 0.001, "loss": 0.9412, "step": 112700 }, { "epoch": 36.45765998707175, "grad_norm": 1.0806870460510254, "learning_rate": 0.001, "loss": 0.9557, "step": 112800 }, { "epoch": 36.489980607627665, "grad_norm": 1.208005428314209, "learning_rate": 0.001, "loss": 0.96, "step": 112900 }, { "epoch": 36.52230122818358, "grad_norm": 0.9865168929100037, "learning_rate": 0.001, "loss": 0.9678, "step": 113000 }, { "epoch": 36.554621848739494, "grad_norm": 1.5836715698242188, "learning_rate": 0.001, "loss": 0.9678, "step": 113100 }, { "epoch": 36.58694246929541, "grad_norm": 0.9653643369674683, "learning_rate": 0.001, "loss": 0.9712, "step": 113200 }, { "epoch": 36.61926308985132, "grad_norm": 1.1424298286437988, "learning_rate": 0.001, "loss": 0.9623, "step": 113300 }, { "epoch": 36.65158371040724, "grad_norm": 0.9989510774612427, "learning_rate": 0.001, "loss": 0.9873, "step": 113400 }, { "epoch": 36.68390433096315, "grad_norm": 1.3583859205245972, "learning_rate": 0.001, "loss": 0.9879, "step": 113500 }, { "epoch": 36.71622495151907, "grad_norm": 1.3508530855178833, "learning_rate": 0.001, "loss": 0.985, "step": 113600 }, { "epoch": 36.74854557207498, "grad_norm": 1.2119507789611816, "learning_rate": 0.001, "loss": 0.9799, "step": 113700 }, { "epoch": 36.780866192630896, "grad_norm": 1.0781590938568115, "learning_rate": 0.001, "loss": 0.9968, "step": 113800 }, { "epoch": 36.81318681318681, "grad_norm": 1.1065748929977417, "learning_rate": 0.001, "loss": 0.9994, "step": 113900 }, { "epoch": 36.845507433742725, "grad_norm": 1.1089667081832886, "learning_rate": 0.001, "loss": 0.996, "step": 114000 }, { "epoch": 36.87782805429864, "grad_norm": 1.2431050539016724, "learning_rate": 0.001, "loss": 0.9873, "step": 114100 }, { "epoch": 36.910148674854554, "grad_norm": 1.3218361139297485, "learning_rate": 0.001, "loss": 1.0104, "step": 114200 }, { "epoch": 36.94246929541047, "grad_norm": 1.0188748836517334, "learning_rate": 0.001, "loss": 1.0096, "step": 114300 }, { "epoch": 36.97478991596638, "grad_norm": 1.29617178440094, "learning_rate": 0.001, "loss": 1.0039, "step": 114400 }, { "epoch": 37.007110536522305, "grad_norm": 1.3056542873382568, "learning_rate": 0.001, "loss": 0.9897, "step": 114500 }, { "epoch": 37.03943115707822, "grad_norm": 0.9985322952270508, "learning_rate": 0.001, "loss": 0.8562, "step": 114600 }, { "epoch": 37.071751777634134, "grad_norm": 1.0383262634277344, "learning_rate": 0.001, "loss": 0.8548, "step": 114700 }, { "epoch": 37.10407239819005, "grad_norm": 0.9028170704841614, "learning_rate": 0.001, "loss": 0.8634, "step": 114800 }, { "epoch": 37.13639301874596, "grad_norm": 1.1589733362197876, "learning_rate": 0.001, "loss": 0.8702, "step": 114900 }, { "epoch": 37.16871363930188, "grad_norm": 1.5158370733261108, "learning_rate": 0.001, "loss": 0.8783, "step": 115000 }, { "epoch": 37.20103425985779, "grad_norm": 0.9679502248764038, "learning_rate": 0.001, "loss": 0.889, "step": 115100 }, { "epoch": 37.23335488041371, "grad_norm": 1.0344961881637573, "learning_rate": 0.001, "loss": 0.8907, "step": 115200 }, { "epoch": 37.26567550096962, "grad_norm": 1.049738883972168, "learning_rate": 0.001, "loss": 0.8958, "step": 115300 }, { "epoch": 37.297996121525536, "grad_norm": 1.2306166887283325, "learning_rate": 0.001, "loss": 0.9169, "step": 115400 }, { "epoch": 37.33031674208145, "grad_norm": 0.9682774543762207, "learning_rate": 0.001, "loss": 0.9128, "step": 115500 }, { "epoch": 37.362637362637365, "grad_norm": 0.9911801815032959, "learning_rate": 0.001, "loss": 0.9033, "step": 115600 }, { "epoch": 37.39495798319328, "grad_norm": 1.0882823467254639, "learning_rate": 0.001, "loss": 0.9204, "step": 115700 }, { "epoch": 37.427278603749194, "grad_norm": 1.1775611639022827, "learning_rate": 0.001, "loss": 0.926, "step": 115800 }, { "epoch": 37.45959922430511, "grad_norm": 1.4377920627593994, "learning_rate": 0.001, "loss": 0.9294, "step": 115900 }, { "epoch": 37.49191984486102, "grad_norm": 1.0641182661056519, "learning_rate": 0.001, "loss": 0.9304, "step": 116000 }, { "epoch": 37.52424046541694, "grad_norm": 1.3855056762695312, "learning_rate": 0.001, "loss": 0.9397, "step": 116100 }, { "epoch": 37.55656108597285, "grad_norm": 1.1065850257873535, "learning_rate": 0.001, "loss": 0.9363, "step": 116200 }, { "epoch": 37.58888170652877, "grad_norm": 1.1946288347244263, "learning_rate": 0.001, "loss": 0.9356, "step": 116300 }, { "epoch": 37.62120232708468, "grad_norm": 1.300480842590332, "learning_rate": 0.001, "loss": 0.9402, "step": 116400 }, { "epoch": 37.653522947640596, "grad_norm": 1.0597584247589111, "learning_rate": 0.001, "loss": 0.944, "step": 116500 }, { "epoch": 37.68584356819651, "grad_norm": 1.4359405040740967, "learning_rate": 0.001, "loss": 0.9456, "step": 116600 }, { "epoch": 37.718164188752425, "grad_norm": 0.9900608658790588, "learning_rate": 0.001, "loss": 0.9485, "step": 116700 }, { "epoch": 37.75048480930834, "grad_norm": 1.3499587774276733, "learning_rate": 0.001, "loss": 0.9521, "step": 116800 }, { "epoch": 37.782805429864254, "grad_norm": 1.0757677555084229, "learning_rate": 0.001, "loss": 0.9707, "step": 116900 }, { "epoch": 37.81512605042017, "grad_norm": 1.0229486227035522, "learning_rate": 0.001, "loss": 0.9781, "step": 117000 }, { "epoch": 37.84744667097608, "grad_norm": 1.229277491569519, "learning_rate": 0.001, "loss": 0.9739, "step": 117100 }, { "epoch": 37.879767291532, "grad_norm": 1.2252568006515503, "learning_rate": 0.001, "loss": 0.9741, "step": 117200 }, { "epoch": 37.91208791208791, "grad_norm": 1.5427048206329346, "learning_rate": 0.001, "loss": 0.9904, "step": 117300 }, { "epoch": 37.94440853264383, "grad_norm": 1.0018162727355957, "learning_rate": 0.001, "loss": 0.9775, "step": 117400 }, { "epoch": 37.97672915319974, "grad_norm": 1.0075654983520508, "learning_rate": 0.001, "loss": 0.9945, "step": 117500 }, { "epoch": 38.009049773755656, "grad_norm": 1.2897984981536865, "learning_rate": 0.001, "loss": 0.9409, "step": 117600 }, { "epoch": 38.04137039431157, "grad_norm": 1.0476512908935547, "learning_rate": 0.001, "loss": 0.8236, "step": 117700 }, { "epoch": 38.073691014867485, "grad_norm": 1.051007866859436, "learning_rate": 0.001, "loss": 0.8394, "step": 117800 }, { "epoch": 38.1060116354234, "grad_norm": 1.0521198511123657, "learning_rate": 0.001, "loss": 0.8499, "step": 117900 }, { "epoch": 38.138332255979314, "grad_norm": 1.4748420715332031, "learning_rate": 0.001, "loss": 0.8497, "step": 118000 }, { "epoch": 38.17065287653523, "grad_norm": 1.3025482892990112, "learning_rate": 0.001, "loss": 0.8499, "step": 118100 }, { "epoch": 38.20297349709114, "grad_norm": 1.2373261451721191, "learning_rate": 0.001, "loss": 0.8658, "step": 118200 }, { "epoch": 38.23529411764706, "grad_norm": 1.2480669021606445, "learning_rate": 0.001, "loss": 0.8699, "step": 118300 }, { "epoch": 38.26761473820297, "grad_norm": 1.533806562423706, "learning_rate": 0.001, "loss": 0.8717, "step": 118400 }, { "epoch": 38.29993535875889, "grad_norm": 1.100213885307312, "learning_rate": 0.001, "loss": 0.8822, "step": 118500 }, { "epoch": 38.3322559793148, "grad_norm": 1.1953175067901611, "learning_rate": 0.001, "loss": 0.8798, "step": 118600 }, { "epoch": 38.364576599870716, "grad_norm": 1.49982750415802, "learning_rate": 0.001, "loss": 0.8992, "step": 118700 }, { "epoch": 38.39689722042663, "grad_norm": 1.1356749534606934, "learning_rate": 0.001, "loss": 0.8941, "step": 118800 }, { "epoch": 38.429217840982545, "grad_norm": 1.3454540967941284, "learning_rate": 0.001, "loss": 0.8926, "step": 118900 }, { "epoch": 38.46153846153846, "grad_norm": 1.0836589336395264, "learning_rate": 0.001, "loss": 0.8972, "step": 119000 }, { "epoch": 38.493859082094374, "grad_norm": 0.9539032578468323, "learning_rate": 0.001, "loss": 0.8938, "step": 119100 }, { "epoch": 38.52617970265029, "grad_norm": 1.1788592338562012, "learning_rate": 0.001, "loss": 0.9192, "step": 119200 }, { "epoch": 38.558500323206204, "grad_norm": 1.0204296112060547, "learning_rate": 0.001, "loss": 0.9027, "step": 119300 }, { "epoch": 38.59082094376212, "grad_norm": 1.2549513578414917, "learning_rate": 0.001, "loss": 0.92, "step": 119400 }, { "epoch": 38.62314156431803, "grad_norm": 0.977759599685669, "learning_rate": 0.001, "loss": 0.9266, "step": 119500 }, { "epoch": 38.65546218487395, "grad_norm": 0.9779630303382874, "learning_rate": 0.001, "loss": 0.9341, "step": 119600 }, { "epoch": 38.68778280542986, "grad_norm": 1.121781587600708, "learning_rate": 0.001, "loss": 0.9297, "step": 119700 }, { "epoch": 38.720103425985776, "grad_norm": 1.264494776725769, "learning_rate": 0.001, "loss": 0.9365, "step": 119800 }, { "epoch": 38.75242404654169, "grad_norm": 1.0555070638656616, "learning_rate": 0.001, "loss": 0.9249, "step": 119900 }, { "epoch": 38.784744667097605, "grad_norm": 1.2729790210723877, "learning_rate": 0.001, "loss": 0.9531, "step": 120000 }, { "epoch": 38.81706528765352, "grad_norm": 1.322257161140442, "learning_rate": 0.001, "loss": 0.9453, "step": 120100 }, { "epoch": 38.849385908209435, "grad_norm": 1.1077519655227661, "learning_rate": 0.001, "loss": 0.9491, "step": 120200 }, { "epoch": 38.88170652876535, "grad_norm": 1.0410795211791992, "learning_rate": 0.001, "loss": 0.9474, "step": 120300 }, { "epoch": 38.914027149321264, "grad_norm": 1.0508451461791992, "learning_rate": 0.001, "loss": 0.948, "step": 120400 }, { "epoch": 38.94634776987718, "grad_norm": 1.2014192342758179, "learning_rate": 0.001, "loss": 0.9507, "step": 120500 }, { "epoch": 38.97866839043309, "grad_norm": 0.9971045255661011, "learning_rate": 0.001, "loss": 0.9586, "step": 120600 }, { "epoch": 39.010989010989015, "grad_norm": 1.2914472818374634, "learning_rate": 0.001, "loss": 0.9038, "step": 120700 }, { "epoch": 39.04330963154493, "grad_norm": 0.9278183579444885, "learning_rate": 0.001, "loss": 0.802, "step": 120800 }, { "epoch": 39.075630252100844, "grad_norm": 1.4596288204193115, "learning_rate": 0.001, "loss": 0.812, "step": 120900 }, { "epoch": 39.10795087265676, "grad_norm": 1.2344359159469604, "learning_rate": 0.001, "loss": 0.8199, "step": 121000 }, { "epoch": 39.14027149321267, "grad_norm": 1.4543335437774658, "learning_rate": 0.001, "loss": 0.8164, "step": 121100 }, { "epoch": 39.17259211376859, "grad_norm": 1.1448246240615845, "learning_rate": 0.001, "loss": 0.8401, "step": 121200 }, { "epoch": 39.2049127343245, "grad_norm": 1.3041157722473145, "learning_rate": 0.001, "loss": 0.8527, "step": 121300 }, { "epoch": 39.237233354880416, "grad_norm": 1.21819269657135, "learning_rate": 0.001, "loss": 0.8515, "step": 121400 }, { "epoch": 39.26955397543633, "grad_norm": 1.2304883003234863, "learning_rate": 0.001, "loss": 0.8522, "step": 121500 }, { "epoch": 39.301874595992246, "grad_norm": 1.2337646484375, "learning_rate": 0.001, "loss": 0.8546, "step": 121600 }, { "epoch": 39.33419521654816, "grad_norm": 1.0077661275863647, "learning_rate": 0.001, "loss": 0.8567, "step": 121700 }, { "epoch": 39.366515837104075, "grad_norm": 1.047959327697754, "learning_rate": 0.001, "loss": 0.8534, "step": 121800 }, { "epoch": 39.39883645765999, "grad_norm": 1.2398507595062256, "learning_rate": 0.001, "loss": 0.8706, "step": 121900 }, { "epoch": 39.431157078215904, "grad_norm": 1.407317042350769, "learning_rate": 0.001, "loss": 0.8615, "step": 122000 }, { "epoch": 39.46347769877182, "grad_norm": 1.0399892330169678, "learning_rate": 0.001, "loss": 0.8798, "step": 122100 }, { "epoch": 39.49579831932773, "grad_norm": 1.4030870199203491, "learning_rate": 0.001, "loss": 0.8827, "step": 122200 }, { "epoch": 39.52811893988365, "grad_norm": 1.2747329473495483, "learning_rate": 0.001, "loss": 0.8981, "step": 122300 }, { "epoch": 39.56043956043956, "grad_norm": 1.078689694404602, "learning_rate": 0.001, "loss": 0.887, "step": 122400 }, { "epoch": 39.59276018099548, "grad_norm": 1.1006649732589722, "learning_rate": 0.001, "loss": 0.9018, "step": 122500 }, { "epoch": 39.62508080155139, "grad_norm": 1.1648002862930298, "learning_rate": 0.001, "loss": 0.9084, "step": 122600 }, { "epoch": 39.657401422107306, "grad_norm": 1.0768827199935913, "learning_rate": 0.001, "loss": 0.8903, "step": 122700 }, { "epoch": 39.68972204266322, "grad_norm": 1.3314549922943115, "learning_rate": 0.001, "loss": 0.9111, "step": 122800 }, { "epoch": 39.722042663219135, "grad_norm": 1.0867133140563965, "learning_rate": 0.001, "loss": 0.9127, "step": 122900 }, { "epoch": 39.75436328377505, "grad_norm": 1.0628085136413574, "learning_rate": 0.001, "loss": 0.9122, "step": 123000 }, { "epoch": 39.786683904330964, "grad_norm": 1.1497098207473755, "learning_rate": 0.001, "loss": 0.9101, "step": 123100 }, { "epoch": 39.81900452488688, "grad_norm": 1.0475852489471436, "learning_rate": 0.001, "loss": 0.9204, "step": 123200 }, { "epoch": 39.85132514544279, "grad_norm": 1.2807620763778687, "learning_rate": 0.001, "loss": 0.9102, "step": 123300 }, { "epoch": 39.88364576599871, "grad_norm": 1.2606719732284546, "learning_rate": 0.001, "loss": 0.9247, "step": 123400 }, { "epoch": 39.91596638655462, "grad_norm": 1.3353443145751953, "learning_rate": 0.001, "loss": 0.9211, "step": 123500 }, { "epoch": 39.94828700711054, "grad_norm": 1.271468997001648, "learning_rate": 0.001, "loss": 0.94, "step": 123600 }, { "epoch": 39.98060762766645, "grad_norm": 1.1420928239822388, "learning_rate": 0.001, "loss": 0.9368, "step": 123700 }, { "epoch": 40.012928248222366, "grad_norm": 1.190773367881775, "learning_rate": 0.001, "loss": 0.8651, "step": 123800 }, { "epoch": 40.04524886877828, "grad_norm": 1.2790533304214478, "learning_rate": 0.001, "loss": 0.79, "step": 123900 }, { "epoch": 40.077569489334195, "grad_norm": 1.3552380800247192, "learning_rate": 0.001, "loss": 0.7898, "step": 124000 }, { "epoch": 40.10989010989011, "grad_norm": 1.3414243459701538, "learning_rate": 0.001, "loss": 0.7969, "step": 124100 }, { "epoch": 40.142210730446024, "grad_norm": 1.2973378896713257, "learning_rate": 0.001, "loss": 0.8031, "step": 124200 }, { "epoch": 40.17453135100194, "grad_norm": 1.1488345861434937, "learning_rate": 0.001, "loss": 0.8155, "step": 124300 }, { "epoch": 40.20685197155785, "grad_norm": 1.0833264589309692, "learning_rate": 0.001, "loss": 0.8169, "step": 124400 }, { "epoch": 40.23917259211377, "grad_norm": 1.3511290550231934, "learning_rate": 0.001, "loss": 0.815, "step": 124500 }, { "epoch": 40.27149321266968, "grad_norm": 1.1336244344711304, "learning_rate": 0.001, "loss": 0.8281, "step": 124600 }, { "epoch": 40.3038138332256, "grad_norm": 1.319153904914856, "learning_rate": 0.001, "loss": 0.8207, "step": 124700 }, { "epoch": 40.33613445378151, "grad_norm": 1.1569024324417114, "learning_rate": 0.001, "loss": 0.8401, "step": 124800 }, { "epoch": 40.368455074337426, "grad_norm": 1.2067766189575195, "learning_rate": 0.001, "loss": 0.8426, "step": 124900 }, { "epoch": 40.40077569489334, "grad_norm": 1.1894055604934692, "learning_rate": 0.001, "loss": 0.8481, "step": 125000 }, { "epoch": 40.433096315449255, "grad_norm": 1.2211947441101074, "learning_rate": 0.001, "loss": 0.8539, "step": 125100 }, { "epoch": 40.46541693600517, "grad_norm": 1.4371823072433472, "learning_rate": 0.001, "loss": 0.8512, "step": 125200 }, { "epoch": 40.497737556561084, "grad_norm": 1.1994086503982544, "learning_rate": 0.001, "loss": 0.8571, "step": 125300 }, { "epoch": 40.530058177117, "grad_norm": 1.1473608016967773, "learning_rate": 0.001, "loss": 0.8674, "step": 125400 }, { "epoch": 40.56237879767291, "grad_norm": 1.331875205039978, "learning_rate": 0.001, "loss": 0.8663, "step": 125500 }, { "epoch": 40.59469941822883, "grad_norm": 1.2817922830581665, "learning_rate": 0.001, "loss": 0.8702, "step": 125600 }, { "epoch": 40.62702003878474, "grad_norm": 1.3686707019805908, "learning_rate": 0.001, "loss": 0.8657, "step": 125700 }, { "epoch": 40.65934065934066, "grad_norm": 1.1966832876205444, "learning_rate": 0.001, "loss": 0.8723, "step": 125800 }, { "epoch": 40.69166127989657, "grad_norm": 1.0613871812820435, "learning_rate": 0.001, "loss": 0.8778, "step": 125900 }, { "epoch": 40.723981900452486, "grad_norm": 1.353318214416504, "learning_rate": 0.001, "loss": 0.8991, "step": 126000 }, { "epoch": 40.7563025210084, "grad_norm": 1.3145556449890137, "learning_rate": 0.001, "loss": 0.8922, "step": 126100 }, { "epoch": 40.788623141564315, "grad_norm": 1.3237497806549072, "learning_rate": 0.001, "loss": 0.8795, "step": 126200 }, { "epoch": 40.82094376212023, "grad_norm": 1.3631601333618164, "learning_rate": 0.001, "loss": 0.8997, "step": 126300 }, { "epoch": 40.853264382676144, "grad_norm": 1.4203535318374634, "learning_rate": 0.001, "loss": 0.9036, "step": 126400 }, { "epoch": 40.88558500323206, "grad_norm": 1.2617900371551514, "learning_rate": 0.001, "loss": 0.9092, "step": 126500 }, { "epoch": 40.91790562378797, "grad_norm": 1.1973419189453125, "learning_rate": 0.001, "loss": 0.9082, "step": 126600 }, { "epoch": 40.95022624434389, "grad_norm": 1.1878764629364014, "learning_rate": 0.001, "loss": 0.9101, "step": 126700 }, { "epoch": 40.9825468648998, "grad_norm": 1.2155219316482544, "learning_rate": 0.001, "loss": 0.9113, "step": 126800 }, { "epoch": 41.014867485455724, "grad_norm": 1.1163018941879272, "learning_rate": 0.001, "loss": 0.8328, "step": 126900 }, { "epoch": 41.04718810601164, "grad_norm": 1.463510274887085, "learning_rate": 0.001, "loss": 0.7706, "step": 127000 }, { "epoch": 41.07950872656755, "grad_norm": 1.6338567733764648, "learning_rate": 0.001, "loss": 0.7705, "step": 127100 }, { "epoch": 41.11182934712347, "grad_norm": 1.034222960472107, "learning_rate": 0.001, "loss": 0.7738, "step": 127200 }, { "epoch": 41.14414996767938, "grad_norm": 1.0902752876281738, "learning_rate": 0.001, "loss": 0.7779, "step": 127300 }, { "epoch": 41.1764705882353, "grad_norm": 1.3925044536590576, "learning_rate": 0.001, "loss": 0.7755, "step": 127400 }, { "epoch": 41.20879120879121, "grad_norm": 1.1720607280731201, "learning_rate": 0.001, "loss": 0.7961, "step": 127500 }, { "epoch": 41.241111829347126, "grad_norm": 1.0273833274841309, "learning_rate": 0.001, "loss": 0.7996, "step": 127600 }, { "epoch": 41.27343244990304, "grad_norm": 1.2068995237350464, "learning_rate": 0.001, "loss": 0.8157, "step": 127700 }, { "epoch": 41.305753070458955, "grad_norm": 1.2198127508163452, "learning_rate": 0.001, "loss": 0.8181, "step": 127800 }, { "epoch": 41.33807369101487, "grad_norm": 1.1657899618148804, "learning_rate": 0.001, "loss": 0.8239, "step": 127900 }, { "epoch": 41.370394311570784, "grad_norm": 1.7020061016082764, "learning_rate": 0.001, "loss": 0.8048, "step": 128000 }, { "epoch": 41.4027149321267, "grad_norm": 1.2665622234344482, "learning_rate": 0.001, "loss": 0.8222, "step": 128100 }, { "epoch": 41.43503555268261, "grad_norm": 1.1721093654632568, "learning_rate": 0.001, "loss": 0.8215, "step": 128200 }, { "epoch": 41.46735617323853, "grad_norm": 1.5689226388931274, "learning_rate": 0.001, "loss": 0.8294, "step": 128300 }, { "epoch": 41.49967679379444, "grad_norm": 1.3154977560043335, "learning_rate": 0.001, "loss": 0.8324, "step": 128400 }, { "epoch": 41.53199741435036, "grad_norm": 1.2950934171676636, "learning_rate": 0.001, "loss": 0.8337, "step": 128500 }, { "epoch": 41.56431803490627, "grad_norm": 1.4449738264083862, "learning_rate": 0.001, "loss": 0.8394, "step": 128600 }, { "epoch": 41.596638655462186, "grad_norm": 1.4431105852127075, "learning_rate": 0.001, "loss": 0.8547, "step": 128700 }, { "epoch": 41.6289592760181, "grad_norm": 1.4692765474319458, "learning_rate": 0.001, "loss": 0.8474, "step": 128800 }, { "epoch": 41.661279896574015, "grad_norm": 1.1824754476547241, "learning_rate": 0.001, "loss": 0.8722, "step": 128900 }, { "epoch": 41.69360051712993, "grad_norm": 1.551052451133728, "learning_rate": 0.001, "loss": 0.8593, "step": 129000 }, { "epoch": 41.725921137685845, "grad_norm": 1.3379920721054077, "learning_rate": 0.001, "loss": 0.8598, "step": 129100 }, { "epoch": 41.75824175824176, "grad_norm": 1.2818305492401123, "learning_rate": 0.001, "loss": 0.8696, "step": 129200 }, { "epoch": 41.790562378797674, "grad_norm": 1.0629348754882812, "learning_rate": 0.001, "loss": 0.8758, "step": 129300 }, { "epoch": 41.82288299935359, "grad_norm": 1.319840431213379, "learning_rate": 0.001, "loss": 0.8722, "step": 129400 }, { "epoch": 41.8552036199095, "grad_norm": 1.3441827297210693, "learning_rate": 0.001, "loss": 0.871, "step": 129500 }, { "epoch": 41.88752424046542, "grad_norm": 1.2514570951461792, "learning_rate": 0.001, "loss": 0.8846, "step": 129600 }, { "epoch": 41.91984486102133, "grad_norm": 1.0442184209823608, "learning_rate": 0.001, "loss": 0.8849, "step": 129700 }, { "epoch": 41.95216548157725, "grad_norm": 1.0661392211914062, "learning_rate": 0.001, "loss": 0.8815, "step": 129800 }, { "epoch": 41.98448610213316, "grad_norm": 1.1643658876419067, "learning_rate": 0.001, "loss": 0.8925, "step": 129900 }, { "epoch": 42.016806722689076, "grad_norm": 1.108484148979187, "learning_rate": 0.001, "loss": 0.8004, "step": 130000 }, { "epoch": 42.04912734324499, "grad_norm": 1.8217283487319946, "learning_rate": 0.001, "loss": 0.7389, "step": 130100 }, { "epoch": 42.081447963800905, "grad_norm": 1.28733229637146, "learning_rate": 0.001, "loss": 0.7519, "step": 130200 }, { "epoch": 42.11376858435682, "grad_norm": 1.0450654029846191, "learning_rate": 0.001, "loss": 0.7637, "step": 130300 }, { "epoch": 42.146089204912734, "grad_norm": 1.2855758666992188, "learning_rate": 0.001, "loss": 0.764, "step": 130400 }, { "epoch": 42.17840982546865, "grad_norm": 1.4532465934753418, "learning_rate": 0.001, "loss": 0.7691, "step": 130500 }, { "epoch": 42.21073044602456, "grad_norm": 1.2499496936798096, "learning_rate": 0.001, "loss": 0.7826, "step": 130600 }, { "epoch": 42.24305106658048, "grad_norm": 1.026951551437378, "learning_rate": 0.001, "loss": 0.7855, "step": 130700 }, { "epoch": 42.27537168713639, "grad_norm": 1.2407318353652954, "learning_rate": 0.001, "loss": 0.7784, "step": 130800 }, { "epoch": 42.30769230769231, "grad_norm": 1.4100878238677979, "learning_rate": 0.001, "loss": 0.7857, "step": 130900 }, { "epoch": 42.34001292824822, "grad_norm": 1.1213184595108032, "learning_rate": 0.001, "loss": 0.8007, "step": 131000 }, { "epoch": 42.372333548804136, "grad_norm": 1.2094000577926636, "learning_rate": 0.001, "loss": 0.8038, "step": 131100 }, { "epoch": 42.40465416936005, "grad_norm": 1.297213077545166, "learning_rate": 0.001, "loss": 0.8039, "step": 131200 }, { "epoch": 42.436974789915965, "grad_norm": 1.3779114484786987, "learning_rate": 0.001, "loss": 0.7987, "step": 131300 }, { "epoch": 42.46929541047188, "grad_norm": 1.2620872259140015, "learning_rate": 0.001, "loss": 0.8139, "step": 131400 }, { "epoch": 42.501616031027794, "grad_norm": 1.2722561359405518, "learning_rate": 0.001, "loss": 0.8133, "step": 131500 }, { "epoch": 42.53393665158371, "grad_norm": 1.6127259731292725, "learning_rate": 0.001, "loss": 0.8146, "step": 131600 }, { "epoch": 42.56625727213962, "grad_norm": 1.2732130289077759, "learning_rate": 0.001, "loss": 0.815, "step": 131700 }, { "epoch": 42.59857789269554, "grad_norm": 1.185979962348938, "learning_rate": 0.001, "loss": 0.8273, "step": 131800 }, { "epoch": 42.63089851325145, "grad_norm": 2.0785205364227295, "learning_rate": 0.001, "loss": 0.8181, "step": 131900 }, { "epoch": 42.66321913380737, "grad_norm": 1.135526418685913, "learning_rate": 0.001, "loss": 0.8287, "step": 132000 }, { "epoch": 42.69553975436328, "grad_norm": 1.3667227029800415, "learning_rate": 0.001, "loss": 0.8334, "step": 132100 }, { "epoch": 42.727860374919196, "grad_norm": 1.1868656873703003, "learning_rate": 0.001, "loss": 0.8326, "step": 132200 }, { "epoch": 42.76018099547511, "grad_norm": 1.7427518367767334, "learning_rate": 0.001, "loss": 0.8374, "step": 132300 }, { "epoch": 42.792501616031025, "grad_norm": 1.1879456043243408, "learning_rate": 0.001, "loss": 0.832, "step": 132400 }, { "epoch": 42.82482223658694, "grad_norm": 1.0775614976882935, "learning_rate": 0.001, "loss": 0.8516, "step": 132500 }, { "epoch": 42.857142857142854, "grad_norm": 1.4639785289764404, "learning_rate": 0.001, "loss": 0.8617, "step": 132600 }, { "epoch": 42.88946347769877, "grad_norm": 1.2370153665542603, "learning_rate": 0.001, "loss": 0.8638, "step": 132700 }, { "epoch": 42.92178409825468, "grad_norm": 1.253002643585205, "learning_rate": 0.001, "loss": 0.8647, "step": 132800 }, { "epoch": 42.9541047188106, "grad_norm": 1.2491612434387207, "learning_rate": 0.001, "loss": 0.8485, "step": 132900 }, { "epoch": 42.98642533936652, "grad_norm": 1.532210350036621, "learning_rate": 0.001, "loss": 0.8618, "step": 133000 }, { "epoch": 43.018745959922434, "grad_norm": 1.1262866258621216, "learning_rate": 0.001, "loss": 0.7859, "step": 133100 }, { "epoch": 43.05106658047835, "grad_norm": 1.1634641885757446, "learning_rate": 0.001, "loss": 0.7195, "step": 133200 }, { "epoch": 43.08338720103426, "grad_norm": 1.3079708814620972, "learning_rate": 0.001, "loss": 0.7318, "step": 133300 }, { "epoch": 43.11570782159018, "grad_norm": 1.2116271257400513, "learning_rate": 0.001, "loss": 0.7467, "step": 133400 }, { "epoch": 43.14802844214609, "grad_norm": 1.1801396608352661, "learning_rate": 0.001, "loss": 0.7422, "step": 133500 }, { "epoch": 43.18034906270201, "grad_norm": 1.2730531692504883, "learning_rate": 0.001, "loss": 0.7357, "step": 133600 }, { "epoch": 43.21266968325792, "grad_norm": 1.0888086557388306, "learning_rate": 0.001, "loss": 0.7565, "step": 133700 }, { "epoch": 43.244990303813836, "grad_norm": 1.1815632581710815, "learning_rate": 0.001, "loss": 0.7564, "step": 133800 }, { "epoch": 43.27731092436975, "grad_norm": 1.1528383493423462, "learning_rate": 0.001, "loss": 0.7549, "step": 133900 }, { "epoch": 43.309631544925665, "grad_norm": 0.9393887519836426, "learning_rate": 0.001, "loss": 0.7656, "step": 134000 }, { "epoch": 43.34195216548158, "grad_norm": 1.2430299520492554, "learning_rate": 0.001, "loss": 0.7706, "step": 134100 }, { "epoch": 43.374272786037494, "grad_norm": 1.410615086555481, "learning_rate": 0.001, "loss": 0.7942, "step": 134200 }, { "epoch": 43.40659340659341, "grad_norm": 1.009611964225769, "learning_rate": 0.001, "loss": 0.7812, "step": 134300 }, { "epoch": 43.43891402714932, "grad_norm": 1.143725872039795, "learning_rate": 0.001, "loss": 0.7716, "step": 134400 }, { "epoch": 43.47123464770524, "grad_norm": 1.0802397727966309, "learning_rate": 0.001, "loss": 0.7923, "step": 134500 }, { "epoch": 43.50355526826115, "grad_norm": 1.2585951089859009, "learning_rate": 0.001, "loss": 0.7893, "step": 134600 }, { "epoch": 43.53587588881707, "grad_norm": 1.1097944974899292, "learning_rate": 0.001, "loss": 0.7961, "step": 134700 }, { "epoch": 43.56819650937298, "grad_norm": 1.122867226600647, "learning_rate": 0.001, "loss": 0.7971, "step": 134800 }, { "epoch": 43.600517129928896, "grad_norm": 1.2161085605621338, "learning_rate": 0.001, "loss": 0.8017, "step": 134900 }, { "epoch": 43.63283775048481, "grad_norm": 1.0487866401672363, "learning_rate": 0.001, "loss": 0.8158, "step": 135000 }, { "epoch": 43.665158371040725, "grad_norm": 1.2142281532287598, "learning_rate": 0.001, "loss": 0.8159, "step": 135100 }, { "epoch": 43.69747899159664, "grad_norm": 1.232129693031311, "learning_rate": 0.001, "loss": 0.8157, "step": 135200 }, { "epoch": 43.729799612152554, "grad_norm": 1.1264606714248657, "learning_rate": 0.001, "loss": 0.828, "step": 135300 }, { "epoch": 43.76212023270847, "grad_norm": 1.2625343799591064, "learning_rate": 0.001, "loss": 0.8229, "step": 135400 }, { "epoch": 43.79444085326438, "grad_norm": 1.0773369073867798, "learning_rate": 0.001, "loss": 0.8286, "step": 135500 }, { "epoch": 43.8267614738203, "grad_norm": 1.0520135164260864, "learning_rate": 0.001, "loss": 0.8347, "step": 135600 }, { "epoch": 43.85908209437621, "grad_norm": 1.0921969413757324, "learning_rate": 0.001, "loss": 0.8259, "step": 135700 }, { "epoch": 43.89140271493213, "grad_norm": 1.32478666305542, "learning_rate": 0.001, "loss": 0.8438, "step": 135800 }, { "epoch": 43.92372333548804, "grad_norm": 1.3219280242919922, "learning_rate": 0.001, "loss": 0.8526, "step": 135900 }, { "epoch": 43.956043956043956, "grad_norm": 1.5354793071746826, "learning_rate": 0.001, "loss": 0.8441, "step": 136000 }, { "epoch": 43.98836457659987, "grad_norm": 1.0277066230773926, "learning_rate": 0.001, "loss": 0.8435, "step": 136100 }, { "epoch": 44.020685197155785, "grad_norm": 1.2237446308135986, "learning_rate": 0.001, "loss": 0.7571, "step": 136200 }, { "epoch": 44.0530058177117, "grad_norm": 1.0195887088775635, "learning_rate": 0.001, "loss": 0.7167, "step": 136300 }, { "epoch": 44.085326438267614, "grad_norm": 1.142837405204773, "learning_rate": 0.001, "loss": 0.7119, "step": 136400 }, { "epoch": 44.11764705882353, "grad_norm": 0.9948951005935669, "learning_rate": 0.001, "loss": 0.7196, "step": 136500 }, { "epoch": 44.14996767937944, "grad_norm": 1.2872686386108398, "learning_rate": 0.001, "loss": 0.726, "step": 136600 }, { "epoch": 44.18228829993536, "grad_norm": 1.2630436420440674, "learning_rate": 0.001, "loss": 0.7295, "step": 136700 }, { "epoch": 44.21460892049127, "grad_norm": 1.1319466829299927, "learning_rate": 0.001, "loss": 0.7427, "step": 136800 }, { "epoch": 44.24692954104719, "grad_norm": 1.683190941810608, "learning_rate": 0.001, "loss": 0.7393, "step": 136900 }, { "epoch": 44.2792501616031, "grad_norm": 0.9938874840736389, "learning_rate": 0.001, "loss": 0.7438, "step": 137000 }, { "epoch": 44.311570782159016, "grad_norm": 1.3265200853347778, "learning_rate": 0.001, "loss": 0.757, "step": 137100 }, { "epoch": 44.34389140271493, "grad_norm": 1.1081351041793823, "learning_rate": 0.001, "loss": 0.7571, "step": 137200 }, { "epoch": 44.376212023270845, "grad_norm": 1.3274530172348022, "learning_rate": 0.001, "loss": 0.7598, "step": 137300 }, { "epoch": 44.40853264382676, "grad_norm": 1.3422248363494873, "learning_rate": 0.001, "loss": 0.7632, "step": 137400 }, { "epoch": 44.440853264382675, "grad_norm": 1.0407700538635254, "learning_rate": 0.001, "loss": 0.7693, "step": 137500 }, { "epoch": 44.47317388493859, "grad_norm": 1.0699604749679565, "learning_rate": 0.001, "loss": 0.7736, "step": 137600 }, { "epoch": 44.505494505494504, "grad_norm": 0.9574213027954102, "learning_rate": 0.001, "loss": 0.7601, "step": 137700 }, { "epoch": 44.53781512605042, "grad_norm": 1.094552993774414, "learning_rate": 0.001, "loss": 0.7792, "step": 137800 }, { "epoch": 44.57013574660633, "grad_norm": 1.20696222782135, "learning_rate": 0.001, "loss": 0.7824, "step": 137900 }, { "epoch": 44.60245636716225, "grad_norm": 0.9959917664527893, "learning_rate": 0.001, "loss": 0.795, "step": 138000 }, { "epoch": 44.63477698771816, "grad_norm": 1.094424843788147, "learning_rate": 0.001, "loss": 0.7806, "step": 138100 }, { "epoch": 44.66709760827408, "grad_norm": 1.1750280857086182, "learning_rate": 0.001, "loss": 0.7962, "step": 138200 }, { "epoch": 44.69941822882999, "grad_norm": 0.921217679977417, "learning_rate": 0.001, "loss": 0.8117, "step": 138300 }, { "epoch": 44.731738849385906, "grad_norm": 1.086727499961853, "learning_rate": 0.001, "loss": 0.8077, "step": 138400 }, { "epoch": 44.76405946994182, "grad_norm": 0.9572423100471497, "learning_rate": 0.001, "loss": 0.8058, "step": 138500 }, { "epoch": 44.796380090497735, "grad_norm": 1.166356086730957, "learning_rate": 0.001, "loss": 0.8063, "step": 138600 }, { "epoch": 44.82870071105365, "grad_norm": 0.9920932054519653, "learning_rate": 0.001, "loss": 0.8041, "step": 138700 }, { "epoch": 44.861021331609564, "grad_norm": 1.7634618282318115, "learning_rate": 0.001, "loss": 0.8159, "step": 138800 }, { "epoch": 44.89334195216548, "grad_norm": 1.253023624420166, "learning_rate": 0.001, "loss": 0.8178, "step": 138900 }, { "epoch": 44.92566257272139, "grad_norm": 1.2091656923294067, "learning_rate": 0.001, "loss": 0.82, "step": 139000 }, { "epoch": 44.95798319327731, "grad_norm": 1.030481219291687, "learning_rate": 0.001, "loss": 0.8162, "step": 139100 }, { "epoch": 44.99030381383322, "grad_norm": 1.1589797735214233, "learning_rate": 0.001, "loss": 0.8063, "step": 139200 }, { "epoch": 45.022624434389144, "grad_norm": 1.1717332601547241, "learning_rate": 0.001, "loss": 0.7349, "step": 139300 }, { "epoch": 45.05494505494506, "grad_norm": 1.0423474311828613, "learning_rate": 0.001, "loss": 0.6871, "step": 139400 }, { "epoch": 45.08726567550097, "grad_norm": 1.2191455364227295, "learning_rate": 0.001, "loss": 0.6935, "step": 139500 }, { "epoch": 45.11958629605689, "grad_norm": 1.0837911367416382, "learning_rate": 0.001, "loss": 0.6955, "step": 139600 }, { "epoch": 45.1519069166128, "grad_norm": 1.1097484827041626, "learning_rate": 0.001, "loss": 0.7089, "step": 139700 }, { "epoch": 45.18422753716872, "grad_norm": 1.104050636291504, "learning_rate": 0.001, "loss": 0.7119, "step": 139800 }, { "epoch": 45.21654815772463, "grad_norm": 1.0573209524154663, "learning_rate": 0.001, "loss": 0.7131, "step": 139900 }, { "epoch": 45.248868778280546, "grad_norm": 0.8985626697540283, "learning_rate": 0.001, "loss": 0.7187, "step": 140000 }, { "epoch": 45.28118939883646, "grad_norm": 1.310064673423767, "learning_rate": 0.001, "loss": 0.7201, "step": 140100 }, { "epoch": 45.313510019392375, "grad_norm": 0.9245189428329468, "learning_rate": 0.001, "loss": 0.7328, "step": 140200 }, { "epoch": 45.34583063994829, "grad_norm": 1.1032655239105225, "learning_rate": 0.001, "loss": 0.7273, "step": 140300 }, { "epoch": 45.378151260504204, "grad_norm": 0.990897536277771, "learning_rate": 0.001, "loss": 0.7364, "step": 140400 }, { "epoch": 45.41047188106012, "grad_norm": 1.181758999824524, "learning_rate": 0.001, "loss": 0.7442, "step": 140500 }, { "epoch": 45.44279250161603, "grad_norm": 1.3388699293136597, "learning_rate": 0.001, "loss": 0.7511, "step": 140600 }, { "epoch": 45.47511312217195, "grad_norm": 0.8885520100593567, "learning_rate": 0.001, "loss": 0.7496, "step": 140700 }, { "epoch": 45.50743374272786, "grad_norm": 1.316257357597351, "learning_rate": 0.001, "loss": 0.7516, "step": 140800 }, { "epoch": 45.53975436328378, "grad_norm": 1.033570647239685, "learning_rate": 0.001, "loss": 0.7628, "step": 140900 }, { "epoch": 45.57207498383969, "grad_norm": 1.183793306350708, "learning_rate": 0.001, "loss": 0.7527, "step": 141000 }, { "epoch": 45.604395604395606, "grad_norm": 0.9284672141075134, "learning_rate": 0.001, "loss": 0.7638, "step": 141100 }, { "epoch": 45.63671622495152, "grad_norm": 1.4275416135787964, "learning_rate": 0.001, "loss": 0.7848, "step": 141200 }, { "epoch": 45.669036845507435, "grad_norm": 1.1278414726257324, "learning_rate": 0.001, "loss": 0.7659, "step": 141300 }, { "epoch": 45.70135746606335, "grad_norm": 1.2252744436264038, "learning_rate": 0.001, "loss": 0.775, "step": 141400 }, { "epoch": 45.733678086619264, "grad_norm": 1.1980767250061035, "learning_rate": 0.001, "loss": 0.7857, "step": 141500 }, { "epoch": 45.76599870717518, "grad_norm": 1.0100624561309814, "learning_rate": 0.001, "loss": 0.7885, "step": 141600 }, { "epoch": 45.79831932773109, "grad_norm": 1.0407867431640625, "learning_rate": 0.001, "loss": 0.7922, "step": 141700 }, { "epoch": 45.83063994828701, "grad_norm": 1.325645923614502, "learning_rate": 0.001, "loss": 0.7907, "step": 141800 }, { "epoch": 45.86296056884292, "grad_norm": 1.332195520401001, "learning_rate": 0.001, "loss": 0.8018, "step": 141900 }, { "epoch": 45.89528118939884, "grad_norm": 0.9816655516624451, "learning_rate": 0.001, "loss": 0.7945, "step": 142000 }, { "epoch": 45.92760180995475, "grad_norm": 1.0350703001022339, "learning_rate": 0.001, "loss": 0.7957, "step": 142100 }, { "epoch": 45.959922430510666, "grad_norm": 1.0534844398498535, "learning_rate": 0.001, "loss": 0.8023, "step": 142200 }, { "epoch": 45.99224305106658, "grad_norm": 1.0965960025787354, "learning_rate": 0.001, "loss": 0.8214, "step": 142300 }, { "epoch": 46.024563671622495, "grad_norm": 1.144916296005249, "learning_rate": 0.001, "loss": 0.7036, "step": 142400 }, { "epoch": 46.05688429217841, "grad_norm": 1.2183438539505005, "learning_rate": 0.001, "loss": 0.6672, "step": 142500 }, { "epoch": 46.089204912734324, "grad_norm": 1.0891343355178833, "learning_rate": 0.001, "loss": 0.6792, "step": 142600 }, { "epoch": 46.12152553329024, "grad_norm": 1.156198263168335, "learning_rate": 0.001, "loss": 0.6776, "step": 142700 }, { "epoch": 46.15384615384615, "grad_norm": 0.9083780646324158, "learning_rate": 0.001, "loss": 0.6933, "step": 142800 }, { "epoch": 46.18616677440207, "grad_norm": 1.4602667093276978, "learning_rate": 0.001, "loss": 0.6929, "step": 142900 }, { "epoch": 46.21848739495798, "grad_norm": 1.3694496154785156, "learning_rate": 0.001, "loss": 0.702, "step": 143000 }, { "epoch": 46.2508080155139, "grad_norm": 1.2055649757385254, "learning_rate": 0.001, "loss": 0.7076, "step": 143100 }, { "epoch": 46.28312863606981, "grad_norm": 1.5634335279464722, "learning_rate": 0.001, "loss": 0.7121, "step": 143200 }, { "epoch": 46.315449256625726, "grad_norm": 1.1664983034133911, "learning_rate": 0.001, "loss": 0.7065, "step": 143300 }, { "epoch": 46.34776987718164, "grad_norm": 1.1601449251174927, "learning_rate": 0.001, "loss": 0.7142, "step": 143400 }, { "epoch": 46.380090497737555, "grad_norm": 0.9292743802070618, "learning_rate": 0.001, "loss": 0.7195, "step": 143500 }, { "epoch": 46.41241111829347, "grad_norm": 0.9101577997207642, "learning_rate": 0.001, "loss": 0.7188, "step": 143600 }, { "epoch": 46.444731738849384, "grad_norm": 1.1427319049835205, "learning_rate": 0.001, "loss": 0.7397, "step": 143700 }, { "epoch": 46.4770523594053, "grad_norm": 1.1437196731567383, "learning_rate": 0.001, "loss": 0.7398, "step": 143800 }, { "epoch": 46.50937297996121, "grad_norm": 1.1757135391235352, "learning_rate": 0.001, "loss": 0.7379, "step": 143900 }, { "epoch": 46.54169360051713, "grad_norm": 1.065805196762085, "learning_rate": 0.001, "loss": 0.7405, "step": 144000 }, { "epoch": 46.57401422107304, "grad_norm": 1.1385811567306519, "learning_rate": 0.001, "loss": 0.7412, "step": 144100 }, { "epoch": 46.60633484162896, "grad_norm": 1.4648467302322388, "learning_rate": 0.001, "loss": 0.7548, "step": 144200 }, { "epoch": 46.63865546218487, "grad_norm": 1.3018345832824707, "learning_rate": 0.001, "loss": 0.7558, "step": 144300 }, { "epoch": 46.670976082740786, "grad_norm": 1.063948631286621, "learning_rate": 0.001, "loss": 0.7626, "step": 144400 }, { "epoch": 46.7032967032967, "grad_norm": 1.0153378248214722, "learning_rate": 0.001, "loss": 0.7573, "step": 144500 }, { "epoch": 46.735617323852615, "grad_norm": 1.225581169128418, "learning_rate": 0.001, "loss": 0.7613, "step": 144600 }, { "epoch": 46.76793794440853, "grad_norm": 0.9249482750892639, "learning_rate": 0.001, "loss": 0.7661, "step": 144700 }, { "epoch": 46.800258564964444, "grad_norm": 1.2745779752731323, "learning_rate": 0.001, "loss": 0.78, "step": 144800 }, { "epoch": 46.83257918552036, "grad_norm": 1.3414521217346191, "learning_rate": 0.001, "loss": 0.7695, "step": 144900 }, { "epoch": 46.864899806076274, "grad_norm": 0.9675344228744507, "learning_rate": 0.001, "loss": 0.7826, "step": 145000 }, { "epoch": 46.89722042663219, "grad_norm": 1.215022087097168, "learning_rate": 0.001, "loss": 0.7846, "step": 145100 }, { "epoch": 46.9295410471881, "grad_norm": 1.2138264179229736, "learning_rate": 0.001, "loss": 0.7701, "step": 145200 }, { "epoch": 46.96186166774402, "grad_norm": 1.205086350440979, "learning_rate": 0.001, "loss": 0.7883, "step": 145300 }, { "epoch": 46.99418228829994, "grad_norm": 1.1200098991394043, "learning_rate": 0.001, "loss": 0.7914, "step": 145400 }, { "epoch": 47.02650290885585, "grad_norm": 0.9899723529815674, "learning_rate": 0.001, "loss": 0.6745, "step": 145500 }, { "epoch": 47.05882352941177, "grad_norm": 1.075188398361206, "learning_rate": 0.001, "loss": 0.6558, "step": 145600 }, { "epoch": 47.09114414996768, "grad_norm": 1.4099141359329224, "learning_rate": 0.001, "loss": 0.657, "step": 145700 }, { "epoch": 47.1234647705236, "grad_norm": 1.0751066207885742, "learning_rate": 0.001, "loss": 0.6595, "step": 145800 }, { "epoch": 47.15578539107951, "grad_norm": 1.1294362545013428, "learning_rate": 0.001, "loss": 0.6692, "step": 145900 }, { "epoch": 47.188106011635426, "grad_norm": 1.1147383451461792, "learning_rate": 0.001, "loss": 0.6745, "step": 146000 }, { "epoch": 47.22042663219134, "grad_norm": 1.3118993043899536, "learning_rate": 0.001, "loss": 0.6865, "step": 146100 }, { "epoch": 47.252747252747255, "grad_norm": 1.0758987665176392, "learning_rate": 0.001, "loss": 0.6789, "step": 146200 }, { "epoch": 47.28506787330317, "grad_norm": 1.1692441701889038, "learning_rate": 0.001, "loss": 0.6979, "step": 146300 }, { "epoch": 47.317388493859085, "grad_norm": 1.0854567289352417, "learning_rate": 0.001, "loss": 0.7002, "step": 146400 }, { "epoch": 47.349709114415, "grad_norm": 1.092974305152893, "learning_rate": 0.001, "loss": 0.7065, "step": 146500 }, { "epoch": 47.382029734970914, "grad_norm": 1.3812941312789917, "learning_rate": 0.001, "loss": 0.708, "step": 146600 }, { "epoch": 47.41435035552683, "grad_norm": 0.9461132884025574, "learning_rate": 0.001, "loss": 0.7091, "step": 146700 }, { "epoch": 47.44667097608274, "grad_norm": 1.1208568811416626, "learning_rate": 0.001, "loss": 0.7072, "step": 146800 }, { "epoch": 47.47899159663866, "grad_norm": 1.2754037380218506, "learning_rate": 0.001, "loss": 0.7082, "step": 146900 }, { "epoch": 47.51131221719457, "grad_norm": 1.1170412302017212, "learning_rate": 0.001, "loss": 0.7238, "step": 147000 }, { "epoch": 47.543632837750486, "grad_norm": 1.2008103132247925, "learning_rate": 0.001, "loss": 0.7249, "step": 147100 }, { "epoch": 47.5759534583064, "grad_norm": 1.235460638999939, "learning_rate": 0.001, "loss": 0.7246, "step": 147200 }, { "epoch": 47.608274078862316, "grad_norm": 1.1848219633102417, "learning_rate": 0.001, "loss": 0.7356, "step": 147300 }, { "epoch": 47.64059469941823, "grad_norm": 1.230288028717041, "learning_rate": 0.001, "loss": 0.7352, "step": 147400 }, { "epoch": 47.672915319974145, "grad_norm": 1.3516244888305664, "learning_rate": 0.001, "loss": 0.7356, "step": 147500 }, { "epoch": 47.70523594053006, "grad_norm": 1.4048689603805542, "learning_rate": 0.001, "loss": 0.7382, "step": 147600 }, { "epoch": 47.737556561085974, "grad_norm": 1.4154736995697021, "learning_rate": 0.001, "loss": 0.7455, "step": 147700 }, { "epoch": 47.76987718164189, "grad_norm": 1.1774300336837769, "learning_rate": 0.001, "loss": 0.7536, "step": 147800 }, { "epoch": 47.8021978021978, "grad_norm": 1.1352239847183228, "learning_rate": 0.001, "loss": 0.754, "step": 147900 }, { "epoch": 47.83451842275372, "grad_norm": 1.2363958358764648, "learning_rate": 0.001, "loss": 0.7597, "step": 148000 }, { "epoch": 47.86683904330963, "grad_norm": 1.256335973739624, "learning_rate": 0.001, "loss": 0.7591, "step": 148100 }, { "epoch": 47.89915966386555, "grad_norm": 1.0832189321517944, "learning_rate": 0.001, "loss": 0.7663, "step": 148200 }, { "epoch": 47.93148028442146, "grad_norm": 1.0445327758789062, "learning_rate": 0.001, "loss": 0.7515, "step": 148300 }, { "epoch": 47.963800904977376, "grad_norm": 1.215279459953308, "learning_rate": 0.001, "loss": 0.7693, "step": 148400 }, { "epoch": 47.99612152553329, "grad_norm": 1.2704572677612305, "learning_rate": 0.001, "loss": 0.7574, "step": 148500 }, { "epoch": 48.028442146089205, "grad_norm": 1.3231985569000244, "learning_rate": 0.001, "loss": 0.6389, "step": 148600 }, { "epoch": 48.06076276664512, "grad_norm": 1.1322678327560425, "learning_rate": 0.001, "loss": 0.6415, "step": 148700 }, { "epoch": 48.093083387201034, "grad_norm": 1.2357839345932007, "learning_rate": 0.001, "loss": 0.6452, "step": 148800 }, { "epoch": 48.12540400775695, "grad_norm": 1.111811637878418, "learning_rate": 0.001, "loss": 0.6583, "step": 148900 }, { "epoch": 48.15772462831286, "grad_norm": 1.0918103456497192, "learning_rate": 0.001, "loss": 0.6533, "step": 149000 }, { "epoch": 48.19004524886878, "grad_norm": 1.1347297430038452, "learning_rate": 0.001, "loss": 0.6587, "step": 149100 }, { "epoch": 48.22236586942469, "grad_norm": 1.2501696348190308, "learning_rate": 0.001, "loss": 0.659, "step": 149200 }, { "epoch": 48.25468648998061, "grad_norm": 1.4791865348815918, "learning_rate": 0.001, "loss": 0.6715, "step": 149300 }, { "epoch": 48.28700711053652, "grad_norm": 1.260297179222107, "learning_rate": 0.001, "loss": 0.6774, "step": 149400 }, { "epoch": 48.319327731092436, "grad_norm": 1.0429021120071411, "learning_rate": 0.001, "loss": 0.6714, "step": 149500 }, { "epoch": 48.35164835164835, "grad_norm": 1.065678596496582, "learning_rate": 0.001, "loss": 0.6888, "step": 149600 }, { "epoch": 48.383968972204265, "grad_norm": 1.408837080001831, "learning_rate": 0.001, "loss": 0.6896, "step": 149700 }, { "epoch": 48.41628959276018, "grad_norm": 1.0948500633239746, "learning_rate": 0.001, "loss": 0.6885, "step": 149800 }, { "epoch": 48.448610213316094, "grad_norm": 1.298799991607666, "learning_rate": 0.001, "loss": 0.6984, "step": 149900 }, { "epoch": 48.48093083387201, "grad_norm": 1.519413948059082, "learning_rate": 0.001, "loss": 0.6926, "step": 150000 }, { "epoch": 48.51325145442792, "grad_norm": 1.0802823305130005, "learning_rate": 0.001, "loss": 0.7081, "step": 150100 }, { "epoch": 48.54557207498384, "grad_norm": 1.3252209424972534, "learning_rate": 0.001, "loss": 0.7159, "step": 150200 }, { "epoch": 48.57789269553975, "grad_norm": 1.1150238513946533, "learning_rate": 0.001, "loss": 0.7082, "step": 150300 }, { "epoch": 48.61021331609567, "grad_norm": 1.1258933544158936, "learning_rate": 0.001, "loss": 0.7168, "step": 150400 }, { "epoch": 48.64253393665158, "grad_norm": 1.1435340642929077, "learning_rate": 0.001, "loss": 0.7198, "step": 150500 }, { "epoch": 48.674854557207496, "grad_norm": 1.1697975397109985, "learning_rate": 0.001, "loss": 0.7225, "step": 150600 }, { "epoch": 48.70717517776341, "grad_norm": 1.4874383211135864, "learning_rate": 0.001, "loss": 0.73, "step": 150700 }, { "epoch": 48.739495798319325, "grad_norm": 1.2991604804992676, "learning_rate": 0.001, "loss": 0.7333, "step": 150800 }, { "epoch": 48.77181641887524, "grad_norm": 1.1365175247192383, "learning_rate": 0.001, "loss": 0.7275, "step": 150900 }, { "epoch": 48.804137039431154, "grad_norm": 1.1174911260604858, "learning_rate": 0.001, "loss": 0.7351, "step": 151000 }, { "epoch": 48.83645765998707, "grad_norm": 1.2326855659484863, "learning_rate": 0.001, "loss": 0.7416, "step": 151100 }, { "epoch": 48.86877828054298, "grad_norm": 1.0537230968475342, "learning_rate": 0.001, "loss": 0.7471, "step": 151200 }, { "epoch": 48.9010989010989, "grad_norm": 1.361183524131775, "learning_rate": 0.001, "loss": 0.7429, "step": 151300 }, { "epoch": 48.93341952165481, "grad_norm": 1.2473835945129395, "learning_rate": 0.001, "loss": 0.749, "step": 151400 }, { "epoch": 48.96574014221073, "grad_norm": 1.4853483438491821, "learning_rate": 0.001, "loss": 0.742, "step": 151500 }, { "epoch": 48.99806076276664, "grad_norm": 1.1820982694625854, "learning_rate": 0.001, "loss": 0.7374, "step": 151600 }, { "epoch": 49.03038138332256, "grad_norm": 1.0802925825119019, "learning_rate": 0.001, "loss": 0.6209, "step": 151700 }, { "epoch": 49.06270200387848, "grad_norm": 1.4810147285461426, "learning_rate": 0.001, "loss": 0.6262, "step": 151800 }, { "epoch": 49.09502262443439, "grad_norm": 0.8833537697792053, "learning_rate": 0.001, "loss": 0.6355, "step": 151900 }, { "epoch": 49.12734324499031, "grad_norm": 1.0637927055358887, "learning_rate": 0.001, "loss": 0.6326, "step": 152000 }, { "epoch": 49.15966386554622, "grad_norm": 1.1112260818481445, "learning_rate": 0.001, "loss": 0.6453, "step": 152100 }, { "epoch": 49.191984486102136, "grad_norm": 1.18427312374115, "learning_rate": 0.001, "loss": 0.6456, "step": 152200 }, { "epoch": 49.22430510665805, "grad_norm": 1.4300342798233032, "learning_rate": 0.001, "loss": 0.6536, "step": 152300 }, { "epoch": 49.256625727213965, "grad_norm": 1.257840633392334, "learning_rate": 0.001, "loss": 0.6553, "step": 152400 }, { "epoch": 49.28894634776988, "grad_norm": 1.214965581893921, "learning_rate": 0.001, "loss": 0.6576, "step": 152500 }, { "epoch": 49.321266968325794, "grad_norm": 1.1370794773101807, "learning_rate": 0.001, "loss": 0.6644, "step": 152600 }, { "epoch": 49.35358758888171, "grad_norm": 1.0847762823104858, "learning_rate": 0.001, "loss": 0.6773, "step": 152700 }, { "epoch": 49.38590820943762, "grad_norm": 1.175560712814331, "learning_rate": 0.001, "loss": 0.6634, "step": 152800 }, { "epoch": 49.41822882999354, "grad_norm": 1.1380037069320679, "learning_rate": 0.001, "loss": 0.6813, "step": 152900 }, { "epoch": 49.45054945054945, "grad_norm": 1.1089116334915161, "learning_rate": 0.001, "loss": 0.6813, "step": 153000 }, { "epoch": 49.48287007110537, "grad_norm": 1.3093582391738892, "learning_rate": 0.001, "loss": 0.6809, "step": 153100 }, { "epoch": 49.51519069166128, "grad_norm": 1.2866489887237549, "learning_rate": 0.001, "loss": 0.6903, "step": 153200 }, { "epoch": 49.547511312217196, "grad_norm": 1.3211435079574585, "learning_rate": 0.001, "loss": 0.6982, "step": 153300 }, { "epoch": 49.57983193277311, "grad_norm": 1.1937994956970215, "learning_rate": 0.001, "loss": 0.7029, "step": 153400 }, { "epoch": 49.612152553329025, "grad_norm": 1.2572121620178223, "learning_rate": 0.001, "loss": 0.6988, "step": 153500 }, { "epoch": 49.64447317388494, "grad_norm": 1.4131183624267578, "learning_rate": 0.001, "loss": 0.6971, "step": 153600 }, { "epoch": 49.676793794440854, "grad_norm": 1.2003507614135742, "learning_rate": 0.001, "loss": 0.6918, "step": 153700 }, { "epoch": 49.70911441499677, "grad_norm": 1.2218358516693115, "learning_rate": 0.001, "loss": 0.7101, "step": 153800 }, { "epoch": 49.74143503555268, "grad_norm": 1.0101897716522217, "learning_rate": 0.001, "loss": 0.7099, "step": 153900 }, { "epoch": 49.7737556561086, "grad_norm": 1.1838691234588623, "learning_rate": 0.001, "loss": 0.712, "step": 154000 }, { "epoch": 49.80607627666451, "grad_norm": 1.0682734251022339, "learning_rate": 0.001, "loss": 0.7074, "step": 154100 }, { "epoch": 49.83839689722043, "grad_norm": 1.2671561241149902, "learning_rate": 0.001, "loss": 0.7123, "step": 154200 }, { "epoch": 49.87071751777634, "grad_norm": 1.1231052875518799, "learning_rate": 0.001, "loss": 0.7214, "step": 154300 }, { "epoch": 49.903038138332256, "grad_norm": 1.2853941917419434, "learning_rate": 0.001, "loss": 0.7241, "step": 154400 }, { "epoch": 49.93535875888817, "grad_norm": 1.189399003982544, "learning_rate": 0.001, "loss": 0.7233, "step": 154500 }, { "epoch": 49.967679379444085, "grad_norm": 1.6364121437072754, "learning_rate": 0.001, "loss": 0.7338, "step": 154600 }, { "epoch": 50.0, "grad_norm": 1.3278017044067383, "learning_rate": 0.001, "loss": 0.7083, "step": 154700 }, { "epoch": 50.032320620555915, "grad_norm": 1.3197938203811646, "learning_rate": 0.001, "loss": 0.6029, "step": 154800 }, { "epoch": 50.06464124111183, "grad_norm": 1.2916477918624878, "learning_rate": 0.001, "loss": 0.616, "step": 154900 }, { "epoch": 50.096961861667744, "grad_norm": 1.0578433275222778, "learning_rate": 0.001, "loss": 0.6238, "step": 155000 }, { "epoch": 50.12928248222366, "grad_norm": 1.4749788045883179, "learning_rate": 0.001, "loss": 0.631, "step": 155100 }, { "epoch": 50.16160310277957, "grad_norm": 1.3231443166732788, "learning_rate": 0.001, "loss": 0.6284, "step": 155200 }, { "epoch": 50.19392372333549, "grad_norm": 1.2039141654968262, "learning_rate": 0.001, "loss": 0.636, "step": 155300 }, { "epoch": 50.2262443438914, "grad_norm": 1.3493560552597046, "learning_rate": 0.001, "loss": 0.6318, "step": 155400 }, { "epoch": 50.25856496444732, "grad_norm": 1.2373881340026855, "learning_rate": 0.001, "loss": 0.6377, "step": 155500 }, { "epoch": 50.29088558500323, "grad_norm": 1.3066976070404053, "learning_rate": 0.001, "loss": 0.6545, "step": 155600 }, { "epoch": 50.323206205559146, "grad_norm": 1.2292847633361816, "learning_rate": 0.001, "loss": 0.649, "step": 155700 }, { "epoch": 50.35552682611506, "grad_norm": 1.5923353433609009, "learning_rate": 0.001, "loss": 0.6537, "step": 155800 }, { "epoch": 50.387847446670975, "grad_norm": 1.265131950378418, "learning_rate": 0.001, "loss": 0.6584, "step": 155900 }, { "epoch": 50.42016806722689, "grad_norm": 1.321632981300354, "learning_rate": 0.001, "loss": 0.6681, "step": 156000 }, { "epoch": 50.452488687782804, "grad_norm": 1.4250664710998535, "learning_rate": 0.001, "loss": 0.6656, "step": 156100 }, { "epoch": 50.48480930833872, "grad_norm": 1.2905536890029907, "learning_rate": 0.001, "loss": 0.6602, "step": 156200 }, { "epoch": 50.51712992889463, "grad_norm": 1.3411585092544556, "learning_rate": 0.001, "loss": 0.6692, "step": 156300 }, { "epoch": 50.54945054945055, "grad_norm": 1.2888214588165283, "learning_rate": 0.001, "loss": 0.6744, "step": 156400 }, { "epoch": 50.58177117000646, "grad_norm": 1.051918625831604, "learning_rate": 0.001, "loss": 0.6711, "step": 156500 }, { "epoch": 50.61409179056238, "grad_norm": 1.3330830335617065, "learning_rate": 0.001, "loss": 0.6827, "step": 156600 }, { "epoch": 50.64641241111829, "grad_norm": 1.1981390714645386, "learning_rate": 0.001, "loss": 0.676, "step": 156700 }, { "epoch": 50.678733031674206, "grad_norm": 1.1901593208312988, "learning_rate": 0.001, "loss": 0.685, "step": 156800 }, { "epoch": 50.71105365223012, "grad_norm": 1.8077274560928345, "learning_rate": 0.001, "loss": 0.6851, "step": 156900 }, { "epoch": 50.743374272786035, "grad_norm": 1.3598157167434692, "learning_rate": 0.001, "loss": 0.7089, "step": 157000 }, { "epoch": 50.77569489334195, "grad_norm": 1.3024399280548096, "learning_rate": 0.001, "loss": 0.6961, "step": 157100 }, { "epoch": 50.808015513897864, "grad_norm": 1.2236247062683105, "learning_rate": 0.001, "loss": 0.6973, "step": 157200 }, { "epoch": 50.84033613445378, "grad_norm": 1.4646848440170288, "learning_rate": 0.001, "loss": 0.7062, "step": 157300 }, { "epoch": 50.87265675500969, "grad_norm": 1.259742259979248, "learning_rate": 0.001, "loss": 0.7107, "step": 157400 }, { "epoch": 50.90497737556561, "grad_norm": 1.1935348510742188, "learning_rate": 0.001, "loss": 0.7096, "step": 157500 }, { "epoch": 50.93729799612152, "grad_norm": 1.1307668685913086, "learning_rate": 0.001, "loss": 0.7192, "step": 157600 }, { "epoch": 50.96961861667744, "grad_norm": 1.1710362434387207, "learning_rate": 0.001, "loss": 0.7097, "step": 157700 }, { "epoch": 51.00193923723336, "grad_norm": 1.0766218900680542, "learning_rate": 0.001, "loss": 0.724, "step": 157800 }, { "epoch": 51.03425985778927, "grad_norm": 1.2327834367752075, "learning_rate": 0.001, "loss": 0.5865, "step": 157900 }, { "epoch": 51.06658047834519, "grad_norm": 1.1618703603744507, "learning_rate": 0.001, "loss": 0.5922, "step": 158000 }, { "epoch": 51.0989010989011, "grad_norm": 1.1166026592254639, "learning_rate": 0.001, "loss": 0.6006, "step": 158100 }, { "epoch": 51.13122171945702, "grad_norm": 1.084436058998108, "learning_rate": 0.001, "loss": 0.611, "step": 158200 }, { "epoch": 51.16354234001293, "grad_norm": 1.2213963270187378, "learning_rate": 0.001, "loss": 0.6031, "step": 158300 }, { "epoch": 51.195862960568846, "grad_norm": 1.0626939535140991, "learning_rate": 0.001, "loss": 0.6139, "step": 158400 }, { "epoch": 51.22818358112476, "grad_norm": 1.164626955986023, "learning_rate": 0.001, "loss": 0.631, "step": 158500 }, { "epoch": 51.260504201680675, "grad_norm": 1.3495136499404907, "learning_rate": 0.001, "loss": 0.629, "step": 158600 }, { "epoch": 51.29282482223659, "grad_norm": 1.10512113571167, "learning_rate": 0.001, "loss": 0.6419, "step": 158700 }, { "epoch": 51.325145442792504, "grad_norm": 1.1061911582946777, "learning_rate": 0.001, "loss": 0.6322, "step": 158800 }, { "epoch": 51.35746606334842, "grad_norm": 1.1518558263778687, "learning_rate": 0.001, "loss": 0.6353, "step": 158900 }, { "epoch": 51.38978668390433, "grad_norm": 1.334631085395813, "learning_rate": 0.001, "loss": 0.6482, "step": 159000 }, { "epoch": 51.42210730446025, "grad_norm": 1.0934808254241943, "learning_rate": 0.001, "loss": 0.637, "step": 159100 }, { "epoch": 51.45442792501616, "grad_norm": 1.08160400390625, "learning_rate": 0.001, "loss": 0.6567, "step": 159200 }, { "epoch": 51.48674854557208, "grad_norm": 1.1291413307189941, "learning_rate": 0.001, "loss": 0.6487, "step": 159300 }, { "epoch": 51.51906916612799, "grad_norm": 1.089605450630188, "learning_rate": 0.001, "loss": 0.6488, "step": 159400 }, { "epoch": 51.551389786683906, "grad_norm": 1.0568206310272217, "learning_rate": 0.001, "loss": 0.6667, "step": 159500 }, { "epoch": 51.58371040723982, "grad_norm": 1.167698860168457, "learning_rate": 0.001, "loss": 0.6641, "step": 159600 }, { "epoch": 51.616031027795735, "grad_norm": 1.6438590288162231, "learning_rate": 0.001, "loss": 0.6656, "step": 159700 }, { "epoch": 51.64835164835165, "grad_norm": 1.1556907892227173, "learning_rate": 0.001, "loss": 0.6733, "step": 159800 }, { "epoch": 51.680672268907564, "grad_norm": 1.0009846687316895, "learning_rate": 0.001, "loss": 0.6732, "step": 159900 }, { "epoch": 51.71299288946348, "grad_norm": 1.324784278869629, "learning_rate": 0.001, "loss": 0.6757, "step": 160000 }, { "epoch": 51.74531351001939, "grad_norm": 1.3305933475494385, "learning_rate": 0.001, "loss": 0.6833, "step": 160100 }, { "epoch": 51.77763413057531, "grad_norm": 1.3474105596542358, "learning_rate": 0.001, "loss": 0.684, "step": 160200 }, { "epoch": 51.80995475113122, "grad_norm": 1.9692738056182861, "learning_rate": 0.001, "loss": 0.6815, "step": 160300 }, { "epoch": 51.84227537168714, "grad_norm": 1.2325831651687622, "learning_rate": 0.001, "loss": 0.6872, "step": 160400 }, { "epoch": 51.87459599224305, "grad_norm": 1.1473625898361206, "learning_rate": 0.001, "loss": 0.6924, "step": 160500 }, { "epoch": 51.906916612798966, "grad_norm": 1.1032822132110596, "learning_rate": 0.001, "loss": 0.7017, "step": 160600 }, { "epoch": 51.93923723335488, "grad_norm": 1.099583625793457, "learning_rate": 0.001, "loss": 0.7005, "step": 160700 }, { "epoch": 51.971557853910795, "grad_norm": 1.0599101781845093, "learning_rate": 0.001, "loss": 0.6942, "step": 160800 }, { "epoch": 52.00387847446671, "grad_norm": 1.2283196449279785, "learning_rate": 0.001, "loss": 0.6921, "step": 160900 }, { "epoch": 52.036199095022624, "grad_norm": 1.194020390510559, "learning_rate": 0.001, "loss": 0.5768, "step": 161000 }, { "epoch": 52.06851971557854, "grad_norm": 1.2069391012191772, "learning_rate": 0.001, "loss": 0.5867, "step": 161100 }, { "epoch": 52.10084033613445, "grad_norm": 1.0906869173049927, "learning_rate": 0.001, "loss": 0.5989, "step": 161200 }, { "epoch": 52.13316095669037, "grad_norm": 1.2393169403076172, "learning_rate": 0.001, "loss": 0.5955, "step": 161300 }, { "epoch": 52.16548157724628, "grad_norm": 1.1581658124923706, "learning_rate": 0.001, "loss": 0.5983, "step": 161400 }, { "epoch": 52.1978021978022, "grad_norm": 0.9090473651885986, "learning_rate": 0.001, "loss": 0.6022, "step": 161500 }, { "epoch": 52.23012281835811, "grad_norm": 1.035650372505188, "learning_rate": 0.001, "loss": 0.6124, "step": 161600 }, { "epoch": 52.262443438914026, "grad_norm": 1.295927882194519, "learning_rate": 0.001, "loss": 0.6105, "step": 161700 }, { "epoch": 52.29476405946994, "grad_norm": 1.4918652772903442, "learning_rate": 0.001, "loss": 0.6202, "step": 161800 }, { "epoch": 52.327084680025855, "grad_norm": 1.08669912815094, "learning_rate": 0.001, "loss": 0.6152, "step": 161900 }, { "epoch": 52.35940530058177, "grad_norm": 1.0953961610794067, "learning_rate": 0.001, "loss": 0.6191, "step": 162000 }, { "epoch": 52.391725921137684, "grad_norm": 1.4131934642791748, "learning_rate": 0.001, "loss": 0.6368, "step": 162100 }, { "epoch": 52.4240465416936, "grad_norm": 1.4726077318191528, "learning_rate": 0.001, "loss": 0.6356, "step": 162200 }, { "epoch": 52.456367162249514, "grad_norm": 1.0413191318511963, "learning_rate": 0.001, "loss": 0.636, "step": 162300 }, { "epoch": 52.48868778280543, "grad_norm": 1.0428353548049927, "learning_rate": 0.001, "loss": 0.642, "step": 162400 }, { "epoch": 52.52100840336134, "grad_norm": 1.068846344947815, "learning_rate": 0.001, "loss": 0.6423, "step": 162500 }, { "epoch": 52.55332902391726, "grad_norm": 1.114834189414978, "learning_rate": 0.001, "loss": 0.6433, "step": 162600 }, { "epoch": 52.58564964447317, "grad_norm": 1.0366978645324707, "learning_rate": 0.001, "loss": 0.6433, "step": 162700 }, { "epoch": 52.617970265029086, "grad_norm": 1.0152711868286133, "learning_rate": 0.001, "loss": 0.6506, "step": 162800 }, { "epoch": 52.650290885585, "grad_norm": 0.8862453699111938, "learning_rate": 0.001, "loss": 0.6603, "step": 162900 }, { "epoch": 52.682611506140915, "grad_norm": 1.0154690742492676, "learning_rate": 0.001, "loss": 0.6606, "step": 163000 }, { "epoch": 52.71493212669683, "grad_norm": 1.4053330421447754, "learning_rate": 0.001, "loss": 0.659, "step": 163100 }, { "epoch": 52.747252747252745, "grad_norm": 1.0044070482254028, "learning_rate": 0.001, "loss": 0.658, "step": 163200 }, { "epoch": 52.77957336780866, "grad_norm": 1.2066134214401245, "learning_rate": 0.001, "loss": 0.6568, "step": 163300 }, { "epoch": 52.811893988364574, "grad_norm": 1.1077797412872314, "learning_rate": 0.001, "loss": 0.6591, "step": 163400 }, { "epoch": 52.84421460892049, "grad_norm": 1.3316359519958496, "learning_rate": 0.001, "loss": 0.6667, "step": 163500 }, { "epoch": 52.8765352294764, "grad_norm": 1.1195502281188965, "learning_rate": 0.001, "loss": 0.678, "step": 163600 }, { "epoch": 52.90885585003232, "grad_norm": 1.1606311798095703, "learning_rate": 0.001, "loss": 0.6676, "step": 163700 }, { "epoch": 52.94117647058823, "grad_norm": 1.5537647008895874, "learning_rate": 0.001, "loss": 0.6906, "step": 163800 }, { "epoch": 52.97349709114415, "grad_norm": 1.1430984735488892, "learning_rate": 0.001, "loss": 0.6875, "step": 163900 }, { "epoch": 53.00581771170007, "grad_norm": 1.1011264324188232, "learning_rate": 0.001, "loss": 0.6679, "step": 164000 }, { "epoch": 53.03813833225598, "grad_norm": 1.1812912225723267, "learning_rate": 0.001, "loss": 0.5668, "step": 164100 }, { "epoch": 53.0704589528119, "grad_norm": 1.0356165170669556, "learning_rate": 0.001, "loss": 0.5733, "step": 164200 }, { "epoch": 53.10277957336781, "grad_norm": 1.1119260787963867, "learning_rate": 0.001, "loss": 0.5649, "step": 164300 }, { "epoch": 53.135100193923726, "grad_norm": 1.2559431791305542, "learning_rate": 0.001, "loss": 0.5773, "step": 164400 }, { "epoch": 53.16742081447964, "grad_norm": 1.2053600549697876, "learning_rate": 0.001, "loss": 0.5877, "step": 164500 }, { "epoch": 53.199741435035556, "grad_norm": 0.9627402424812317, "learning_rate": 0.001, "loss": 0.5933, "step": 164600 }, { "epoch": 53.23206205559147, "grad_norm": 1.0943775177001953, "learning_rate": 0.001, "loss": 0.5949, "step": 164700 }, { "epoch": 53.264382676147385, "grad_norm": 0.9794513583183289, "learning_rate": 0.001, "loss": 0.5941, "step": 164800 }, { "epoch": 53.2967032967033, "grad_norm": 0.9890439510345459, "learning_rate": 0.001, "loss": 0.6095, "step": 164900 }, { "epoch": 53.329023917259214, "grad_norm": 1.1350634098052979, "learning_rate": 0.001, "loss": 0.6077, "step": 165000 }, { "epoch": 53.36134453781513, "grad_norm": 1.0793288946151733, "learning_rate": 0.001, "loss": 0.6089, "step": 165100 }, { "epoch": 53.39366515837104, "grad_norm": 1.3281943798065186, "learning_rate": 0.001, "loss": 0.6245, "step": 165200 }, { "epoch": 53.42598577892696, "grad_norm": 1.111703872680664, "learning_rate": 0.001, "loss": 0.615, "step": 165300 }, { "epoch": 53.45830639948287, "grad_norm": 1.0915775299072266, "learning_rate": 0.001, "loss": 0.6191, "step": 165400 }, { "epoch": 53.49062702003879, "grad_norm": 1.1385273933410645, "learning_rate": 0.001, "loss": 0.6296, "step": 165500 }, { "epoch": 53.5229476405947, "grad_norm": 1.0138121843338013, "learning_rate": 0.001, "loss": 0.6334, "step": 165600 }, { "epoch": 53.555268261150616, "grad_norm": 1.0273572206497192, "learning_rate": 0.001, "loss": 0.6325, "step": 165700 }, { "epoch": 53.58758888170653, "grad_norm": 1.1230597496032715, "learning_rate": 0.001, "loss": 0.6237, "step": 165800 }, { "epoch": 53.619909502262445, "grad_norm": 1.057519793510437, "learning_rate": 0.001, "loss": 0.6423, "step": 165900 }, { "epoch": 53.65223012281836, "grad_norm": 1.4807605743408203, "learning_rate": 0.001, "loss": 0.6421, "step": 166000 }, { "epoch": 53.684550743374274, "grad_norm": 1.0008964538574219, "learning_rate": 0.001, "loss": 0.6452, "step": 166100 }, { "epoch": 53.71687136393019, "grad_norm": 1.1489962339401245, "learning_rate": 0.001, "loss": 0.6493, "step": 166200 }, { "epoch": 53.7491919844861, "grad_norm": 1.016244649887085, "learning_rate": 0.001, "loss": 0.6549, "step": 166300 }, { "epoch": 53.78151260504202, "grad_norm": 1.006843090057373, "learning_rate": 0.001, "loss": 0.6578, "step": 166400 }, { "epoch": 53.81383322559793, "grad_norm": 1.3928852081298828, "learning_rate": 0.001, "loss": 0.6556, "step": 166500 }, { "epoch": 53.84615384615385, "grad_norm": 0.9100160002708435, "learning_rate": 0.001, "loss": 0.6666, "step": 166600 }, { "epoch": 53.87847446670976, "grad_norm": 1.075441837310791, "learning_rate": 0.001, "loss": 0.6697, "step": 166700 }, { "epoch": 53.910795087265676, "grad_norm": 1.1663261651992798, "learning_rate": 0.001, "loss": 0.653, "step": 166800 }, { "epoch": 53.94311570782159, "grad_norm": 1.1321035623550415, "learning_rate": 0.001, "loss": 0.6717, "step": 166900 }, { "epoch": 53.975436328377505, "grad_norm": 1.277856707572937, "learning_rate": 0.001, "loss": 0.6661, "step": 167000 }, { "epoch": 54.00775694893342, "grad_norm": 1.0469721555709839, "learning_rate": 0.001, "loss": 0.6442, "step": 167100 }, { "epoch": 54.040077569489334, "grad_norm": 1.1702386140823364, "learning_rate": 0.001, "loss": 0.5511, "step": 167200 }, { "epoch": 54.07239819004525, "grad_norm": 1.306152105331421, "learning_rate": 0.001, "loss": 0.5642, "step": 167300 }, { "epoch": 54.10471881060116, "grad_norm": 1.106188416481018, "learning_rate": 0.001, "loss": 0.5681, "step": 167400 }, { "epoch": 54.13703943115708, "grad_norm": 0.9119999408721924, "learning_rate": 0.001, "loss": 0.5659, "step": 167500 }, { "epoch": 54.16936005171299, "grad_norm": 1.1136019229888916, "learning_rate": 0.001, "loss": 0.5767, "step": 167600 }, { "epoch": 54.20168067226891, "grad_norm": 0.9394569396972656, "learning_rate": 0.001, "loss": 0.5807, "step": 167700 }, { "epoch": 54.23400129282482, "grad_norm": 1.0913560390472412, "learning_rate": 0.001, "loss": 0.5786, "step": 167800 }, { "epoch": 54.266321913380736, "grad_norm": 1.195210576057434, "learning_rate": 0.001, "loss": 0.5877, "step": 167900 }, { "epoch": 54.29864253393665, "grad_norm": 0.9882518649101257, "learning_rate": 0.001, "loss": 0.6006, "step": 168000 }, { "epoch": 54.330963154492565, "grad_norm": 1.2774509191513062, "learning_rate": 0.001, "loss": 0.5962, "step": 168100 }, { "epoch": 54.36328377504848, "grad_norm": 1.320940613746643, "learning_rate": 0.001, "loss": 0.6033, "step": 168200 }, { "epoch": 54.395604395604394, "grad_norm": 1.0663779973983765, "learning_rate": 0.001, "loss": 0.6049, "step": 168300 }, { "epoch": 54.42792501616031, "grad_norm": 1.1528825759887695, "learning_rate": 0.001, "loss": 0.6013, "step": 168400 }, { "epoch": 54.46024563671622, "grad_norm": 1.0260385274887085, "learning_rate": 0.001, "loss": 0.607, "step": 168500 }, { "epoch": 54.49256625727214, "grad_norm": 1.1429572105407715, "learning_rate": 0.001, "loss": 0.6077, "step": 168600 }, { "epoch": 54.52488687782805, "grad_norm": 1.0280804634094238, "learning_rate": 0.001, "loss": 0.622, "step": 168700 }, { "epoch": 54.55720749838397, "grad_norm": 0.9293566346168518, "learning_rate": 0.001, "loss": 0.6139, "step": 168800 }, { "epoch": 54.58952811893988, "grad_norm": 0.9391525387763977, "learning_rate": 0.001, "loss": 0.6196, "step": 168900 }, { "epoch": 54.621848739495796, "grad_norm": 1.711184024810791, "learning_rate": 0.001, "loss": 0.6245, "step": 169000 }, { "epoch": 54.65416936005171, "grad_norm": 1.2077080011367798, "learning_rate": 0.001, "loss": 0.6307, "step": 169100 }, { "epoch": 54.686489980607625, "grad_norm": 1.1752902269363403, "learning_rate": 0.001, "loss": 0.6365, "step": 169200 }, { "epoch": 54.71881060116354, "grad_norm": 1.0716947317123413, "learning_rate": 0.001, "loss": 0.6351, "step": 169300 }, { "epoch": 54.751131221719454, "grad_norm": 1.178391933441162, "learning_rate": 0.001, "loss": 0.6348, "step": 169400 }, { "epoch": 54.78345184227537, "grad_norm": 0.9536128044128418, "learning_rate": 0.001, "loss": 0.646, "step": 169500 }, { "epoch": 54.81577246283128, "grad_norm": 1.2693361043930054, "learning_rate": 0.001, "loss": 0.6381, "step": 169600 }, { "epoch": 54.8480930833872, "grad_norm": 1.2697904109954834, "learning_rate": 0.001, "loss": 0.6534, "step": 169700 }, { "epoch": 54.88041370394311, "grad_norm": 1.0619981288909912, "learning_rate": 0.001, "loss": 0.6492, "step": 169800 }, { "epoch": 54.91273432449903, "grad_norm": 1.1231200695037842, "learning_rate": 0.001, "loss": 0.6486, "step": 169900 }, { "epoch": 54.94505494505494, "grad_norm": 1.1606202125549316, "learning_rate": 0.001, "loss": 0.641, "step": 170000 }, { "epoch": 54.977375565610856, "grad_norm": 1.0270097255706787, "learning_rate": 0.001, "loss": 0.6566, "step": 170100 }, { "epoch": 55.00969618616678, "grad_norm": 1.0949488878250122, "learning_rate": 0.001, "loss": 0.6129, "step": 170200 }, { "epoch": 55.04201680672269, "grad_norm": 1.044655680656433, "learning_rate": 0.001, "loss": 0.5373, "step": 170300 }, { "epoch": 55.07433742727861, "grad_norm": 1.0644351243972778, "learning_rate": 0.001, "loss": 0.5487, "step": 170400 }, { "epoch": 55.10665804783452, "grad_norm": 1.09498131275177, "learning_rate": 0.001, "loss": 0.5563, "step": 170500 }, { "epoch": 55.138978668390436, "grad_norm": 1.2268824577331543, "learning_rate": 0.001, "loss": 0.5582, "step": 170600 }, { "epoch": 55.17129928894635, "grad_norm": 0.9542291760444641, "learning_rate": 0.001, "loss": 0.5694, "step": 170700 }, { "epoch": 55.203619909502265, "grad_norm": 0.8919881582260132, "learning_rate": 0.001, "loss": 0.5756, "step": 170800 }, { "epoch": 55.23594053005818, "grad_norm": 1.2534515857696533, "learning_rate": 0.001, "loss": 0.5685, "step": 170900 }, { "epoch": 55.268261150614094, "grad_norm": 1.1573138236999512, "learning_rate": 0.001, "loss": 0.5746, "step": 171000 }, { "epoch": 55.30058177117001, "grad_norm": 0.9393195509910583, "learning_rate": 0.001, "loss": 0.5674, "step": 171100 }, { "epoch": 55.33290239172592, "grad_norm": 1.1613494157791138, "learning_rate": 0.001, "loss": 0.5856, "step": 171200 }, { "epoch": 55.36522301228184, "grad_norm": 1.1391974687576294, "learning_rate": 0.001, "loss": 0.5852, "step": 171300 }, { "epoch": 55.39754363283775, "grad_norm": 1.0151652097702026, "learning_rate": 0.001, "loss": 0.5806, "step": 171400 }, { "epoch": 55.42986425339367, "grad_norm": 0.9999426603317261, "learning_rate": 0.001, "loss": 0.5899, "step": 171500 }, { "epoch": 55.46218487394958, "grad_norm": 1.19833242893219, "learning_rate": 0.001, "loss": 0.5956, "step": 171600 }, { "epoch": 55.494505494505496, "grad_norm": 1.1464076042175293, "learning_rate": 0.001, "loss": 0.6051, "step": 171700 }, { "epoch": 55.52682611506141, "grad_norm": 1.0189952850341797, "learning_rate": 0.001, "loss": 0.6053, "step": 171800 }, { "epoch": 55.559146735617325, "grad_norm": 1.1603256464004517, "learning_rate": 0.001, "loss": 0.6016, "step": 171900 }, { "epoch": 55.59146735617324, "grad_norm": 1.160248041152954, "learning_rate": 0.001, "loss": 0.6119, "step": 172000 }, { "epoch": 55.623787976729155, "grad_norm": 1.1146880388259888, "learning_rate": 0.001, "loss": 0.6176, "step": 172100 }, { "epoch": 55.65610859728507, "grad_norm": 1.0276296138763428, "learning_rate": 0.001, "loss": 0.6148, "step": 172200 }, { "epoch": 55.688429217840984, "grad_norm": 1.408799409866333, "learning_rate": 0.001, "loss": 0.6204, "step": 172300 }, { "epoch": 55.7207498383969, "grad_norm": 1.5312975645065308, "learning_rate": 0.001, "loss": 0.6115, "step": 172400 }, { "epoch": 55.75307045895281, "grad_norm": 0.9730359315872192, "learning_rate": 0.001, "loss": 0.6253, "step": 172500 }, { "epoch": 55.78539107950873, "grad_norm": 1.0635464191436768, "learning_rate": 0.001, "loss": 0.6208, "step": 172600 }, { "epoch": 55.81771170006464, "grad_norm": 1.173057198524475, "learning_rate": 0.001, "loss": 0.6432, "step": 172700 }, { "epoch": 55.85003232062056, "grad_norm": 1.083106517791748, "learning_rate": 0.001, "loss": 0.6267, "step": 172800 }, { "epoch": 55.88235294117647, "grad_norm": 1.0348014831542969, "learning_rate": 0.001, "loss": 0.633, "step": 172900 }, { "epoch": 55.914673561732386, "grad_norm": 0.9200881719589233, "learning_rate": 0.001, "loss": 0.6424, "step": 173000 }, { "epoch": 55.9469941822883, "grad_norm": 1.0963901281356812, "learning_rate": 0.001, "loss": 0.6413, "step": 173100 }, { "epoch": 55.979314802844215, "grad_norm": 0.9636910557746887, "learning_rate": 0.001, "loss": 0.641, "step": 173200 }, { "epoch": 56.01163542340013, "grad_norm": 0.9490830302238464, "learning_rate": 0.001, "loss": 0.5881, "step": 173300 }, { "epoch": 56.043956043956044, "grad_norm": 1.1364893913269043, "learning_rate": 0.001, "loss": 0.5367, "step": 173400 }, { "epoch": 56.07627666451196, "grad_norm": 1.1960574388504028, "learning_rate": 0.001, "loss": 0.5346, "step": 173500 }, { "epoch": 56.10859728506787, "grad_norm": 1.2074171304702759, "learning_rate": 0.001, "loss": 0.5414, "step": 173600 }, { "epoch": 56.14091790562379, "grad_norm": 1.0118510723114014, "learning_rate": 0.001, "loss": 0.5469, "step": 173700 }, { "epoch": 56.1732385261797, "grad_norm": 1.1069551706314087, "learning_rate": 0.001, "loss": 0.5503, "step": 173800 }, { "epoch": 56.20555914673562, "grad_norm": 1.3581571578979492, "learning_rate": 0.001, "loss": 0.5464, "step": 173900 }, { "epoch": 56.23787976729153, "grad_norm": 1.1338752508163452, "learning_rate": 0.001, "loss": 0.5734, "step": 174000 }, { "epoch": 56.270200387847446, "grad_norm": 0.8690041899681091, "learning_rate": 0.001, "loss": 0.5649, "step": 174100 }, { "epoch": 56.30252100840336, "grad_norm": 1.5786833763122559, "learning_rate": 0.001, "loss": 0.5668, "step": 174200 }, { "epoch": 56.334841628959275, "grad_norm": 1.1835829019546509, "learning_rate": 0.001, "loss": 0.5613, "step": 174300 }, { "epoch": 56.36716224951519, "grad_norm": 0.9856243133544922, "learning_rate": 0.001, "loss": 0.5612, "step": 174400 }, { "epoch": 56.399482870071104, "grad_norm": 1.1615772247314453, "learning_rate": 0.001, "loss": 0.5731, "step": 174500 }, { "epoch": 56.43180349062702, "grad_norm": 1.2246466875076294, "learning_rate": 0.001, "loss": 0.5803, "step": 174600 }, { "epoch": 56.46412411118293, "grad_norm": 1.1777148246765137, "learning_rate": 0.001, "loss": 0.5838, "step": 174700 }, { "epoch": 56.49644473173885, "grad_norm": 1.0059620141983032, "learning_rate": 0.001, "loss": 0.5894, "step": 174800 }, { "epoch": 56.52876535229476, "grad_norm": 1.485269546508789, "learning_rate": 0.001, "loss": 0.5911, "step": 174900 }, { "epoch": 56.56108597285068, "grad_norm": 1.0750970840454102, "learning_rate": 0.001, "loss": 0.5857, "step": 175000 }, { "epoch": 56.59340659340659, "grad_norm": 1.896737813949585, "learning_rate": 0.001, "loss": 0.5919, "step": 175100 }, { "epoch": 56.625727213962506, "grad_norm": 1.1534926891326904, "learning_rate": 0.001, "loss": 0.5886, "step": 175200 }, { "epoch": 56.65804783451842, "grad_norm": 1.3018267154693604, "learning_rate": 0.001, "loss": 0.5955, "step": 175300 }, { "epoch": 56.690368455074335, "grad_norm": 0.993018388748169, "learning_rate": 0.001, "loss": 0.6071, "step": 175400 }, { "epoch": 56.72268907563025, "grad_norm": 1.1218879222869873, "learning_rate": 0.001, "loss": 0.6028, "step": 175500 }, { "epoch": 56.755009696186164, "grad_norm": 1.3716107606887817, "learning_rate": 0.001, "loss": 0.6102, "step": 175600 }, { "epoch": 56.78733031674208, "grad_norm": 1.0167415142059326, "learning_rate": 0.001, "loss": 0.6112, "step": 175700 }, { "epoch": 56.81965093729799, "grad_norm": 1.1123212575912476, "learning_rate": 0.001, "loss": 0.6179, "step": 175800 }, { "epoch": 56.85197155785391, "grad_norm": 1.0015865564346313, "learning_rate": 0.001, "loss": 0.6246, "step": 175900 }, { "epoch": 56.88429217840982, "grad_norm": 0.9963439702987671, "learning_rate": 0.001, "loss": 0.6264, "step": 176000 }, { "epoch": 56.91661279896574, "grad_norm": 1.0971077680587769, "learning_rate": 0.001, "loss": 0.626, "step": 176100 }, { "epoch": 56.94893341952165, "grad_norm": 1.1502227783203125, "learning_rate": 0.001, "loss": 0.623, "step": 176200 }, { "epoch": 56.981254040077566, "grad_norm": 1.2392164468765259, "learning_rate": 0.001, "loss": 0.6277, "step": 176300 }, { "epoch": 57.01357466063349, "grad_norm": 1.0210953950881958, "learning_rate": 0.001, "loss": 0.5756, "step": 176400 }, { "epoch": 57.0458952811894, "grad_norm": 1.0728343725204468, "learning_rate": 0.001, "loss": 0.5275, "step": 176500 }, { "epoch": 57.07821590174532, "grad_norm": 1.1027979850769043, "learning_rate": 0.001, "loss": 0.5262, "step": 176600 }, { "epoch": 57.11053652230123, "grad_norm": 1.1607871055603027, "learning_rate": 0.001, "loss": 0.5268, "step": 176700 }, { "epoch": 57.142857142857146, "grad_norm": 0.9915329813957214, "learning_rate": 0.001, "loss": 0.5339, "step": 176800 }, { "epoch": 57.17517776341306, "grad_norm": 1.335005760192871, "learning_rate": 0.001, "loss": 0.5448, "step": 176900 }, { "epoch": 57.207498383968975, "grad_norm": 0.9758956432342529, "learning_rate": 0.001, "loss": 0.5465, "step": 177000 }, { "epoch": 57.23981900452489, "grad_norm": 1.4532945156097412, "learning_rate": 0.001, "loss": 0.5442, "step": 177100 }, { "epoch": 57.272139625080804, "grad_norm": 1.2290732860565186, "learning_rate": 0.001, "loss": 0.5446, "step": 177200 }, { "epoch": 57.30446024563672, "grad_norm": 0.9689935445785522, "learning_rate": 0.001, "loss": 0.5569, "step": 177300 }, { "epoch": 57.33678086619263, "grad_norm": 0.9686174988746643, "learning_rate": 0.001, "loss": 0.5506, "step": 177400 }, { "epoch": 57.36910148674855, "grad_norm": 1.3565677404403687, "learning_rate": 0.001, "loss": 0.5682, "step": 177500 }, { "epoch": 57.40142210730446, "grad_norm": 1.1003592014312744, "learning_rate": 0.001, "loss": 0.5687, "step": 177600 }, { "epoch": 57.43374272786038, "grad_norm": 1.6163768768310547, "learning_rate": 0.001, "loss": 0.57, "step": 177700 }, { "epoch": 57.46606334841629, "grad_norm": 1.0163804292678833, "learning_rate": 0.001, "loss": 0.5729, "step": 177800 }, { "epoch": 57.498383968972206, "grad_norm": 1.0957610607147217, "learning_rate": 0.001, "loss": 0.5734, "step": 177900 }, { "epoch": 57.53070458952812, "grad_norm": 1.0814255475997925, "learning_rate": 0.001, "loss": 0.5758, "step": 178000 }, { "epoch": 57.563025210084035, "grad_norm": 0.9718095064163208, "learning_rate": 0.001, "loss": 0.582, "step": 178100 }, { "epoch": 57.59534583063995, "grad_norm": 1.1260954141616821, "learning_rate": 0.001, "loss": 0.5864, "step": 178200 }, { "epoch": 57.627666451195864, "grad_norm": 1.145377278327942, "learning_rate": 0.001, "loss": 0.5918, "step": 178300 }, { "epoch": 57.65998707175178, "grad_norm": 1.0569645166397095, "learning_rate": 0.001, "loss": 0.5841, "step": 178400 }, { "epoch": 57.69230769230769, "grad_norm": 1.341930866241455, "learning_rate": 0.001, "loss": 0.5964, "step": 178500 }, { "epoch": 57.72462831286361, "grad_norm": 0.9709081649780273, "learning_rate": 0.001, "loss": 0.6002, "step": 178600 }, { "epoch": 57.75694893341952, "grad_norm": 1.1165202856063843, "learning_rate": 0.001, "loss": 0.5944, "step": 178700 }, { "epoch": 57.78926955397544, "grad_norm": 1.2093615531921387, "learning_rate": 0.001, "loss": 0.6038, "step": 178800 }, { "epoch": 57.82159017453135, "grad_norm": 1.1175601482391357, "learning_rate": 0.001, "loss": 0.605, "step": 178900 }, { "epoch": 57.853910795087266, "grad_norm": 0.9581983685493469, "learning_rate": 0.001, "loss": 0.6015, "step": 179000 }, { "epoch": 57.88623141564318, "grad_norm": 0.9580773115158081, "learning_rate": 0.001, "loss": 0.611, "step": 179100 }, { "epoch": 57.918552036199095, "grad_norm": 1.0998668670654297, "learning_rate": 0.001, "loss": 0.6008, "step": 179200 }, { "epoch": 57.95087265675501, "grad_norm": 1.296061396598816, "learning_rate": 0.001, "loss": 0.6119, "step": 179300 }, { "epoch": 57.983193277310924, "grad_norm": 1.3698599338531494, "learning_rate": 0.001, "loss": 0.6211, "step": 179400 }, { "epoch": 58.01551389786684, "grad_norm": 0.8802805542945862, "learning_rate": 0.001, "loss": 0.545, "step": 179500 }, { "epoch": 58.04783451842275, "grad_norm": 0.847582221031189, "learning_rate": 0.001, "loss": 0.51, "step": 179600 }, { "epoch": 58.08015513897867, "grad_norm": 1.3516604900360107, "learning_rate": 0.001, "loss": 0.509, "step": 179700 }, { "epoch": 58.11247575953458, "grad_norm": 0.9700368642807007, "learning_rate": 0.001, "loss": 0.519, "step": 179800 }, { "epoch": 58.1447963800905, "grad_norm": 1.3214926719665527, "learning_rate": 0.001, "loss": 0.5273, "step": 179900 }, { "epoch": 58.17711700064641, "grad_norm": 1.2129672765731812, "learning_rate": 0.001, "loss": 0.5297, "step": 180000 }, { "epoch": 58.209437621202326, "grad_norm": 0.9577348232269287, "learning_rate": 0.001, "loss": 0.528, "step": 180100 }, { "epoch": 58.24175824175824, "grad_norm": 1.0350674390792847, "learning_rate": 0.001, "loss": 0.5388, "step": 180200 }, { "epoch": 58.274078862314155, "grad_norm": 1.1026049852371216, "learning_rate": 0.001, "loss": 0.5468, "step": 180300 }, { "epoch": 58.30639948287007, "grad_norm": 0.9450744390487671, "learning_rate": 0.001, "loss": 0.5382, "step": 180400 }, { "epoch": 58.338720103425985, "grad_norm": 1.3151814937591553, "learning_rate": 0.001, "loss": 0.5439, "step": 180500 }, { "epoch": 58.3710407239819, "grad_norm": 0.9207949638366699, "learning_rate": 0.001, "loss": 0.5478, "step": 180600 }, { "epoch": 58.403361344537814, "grad_norm": 1.0910813808441162, "learning_rate": 0.001, "loss": 0.5536, "step": 180700 }, { "epoch": 58.43568196509373, "grad_norm": 1.2546454668045044, "learning_rate": 0.001, "loss": 0.5611, "step": 180800 }, { "epoch": 58.46800258564964, "grad_norm": 1.8501858711242676, "learning_rate": 0.001, "loss": 0.5578, "step": 180900 }, { "epoch": 58.50032320620556, "grad_norm": 1.2990965843200684, "learning_rate": 0.001, "loss": 0.5644, "step": 181000 }, { "epoch": 58.53264382676147, "grad_norm": 1.2277289628982544, "learning_rate": 0.001, "loss": 0.5648, "step": 181100 }, { "epoch": 58.56496444731739, "grad_norm": 1.1264605522155762, "learning_rate": 0.001, "loss": 0.5678, "step": 181200 }, { "epoch": 58.5972850678733, "grad_norm": 0.9597664475440979, "learning_rate": 0.001, "loss": 0.5704, "step": 181300 }, { "epoch": 58.629605688429216, "grad_norm": 1.224787950515747, "learning_rate": 0.001, "loss": 0.5725, "step": 181400 }, { "epoch": 58.66192630898513, "grad_norm": 1.297468662261963, "learning_rate": 0.001, "loss": 0.5778, "step": 181500 }, { "epoch": 58.694246929541045, "grad_norm": 1.222617268562317, "learning_rate": 0.001, "loss": 0.5832, "step": 181600 }, { "epoch": 58.72656755009696, "grad_norm": 1.0155010223388672, "learning_rate": 0.001, "loss": 0.5916, "step": 181700 }, { "epoch": 58.758888170652874, "grad_norm": 1.3524972200393677, "learning_rate": 0.001, "loss": 0.5792, "step": 181800 }, { "epoch": 58.79120879120879, "grad_norm": 1.0363658666610718, "learning_rate": 0.001, "loss": 0.5905, "step": 181900 }, { "epoch": 58.8235294117647, "grad_norm": 1.498112678527832, "learning_rate": 0.001, "loss": 0.59, "step": 182000 }, { "epoch": 58.85585003232062, "grad_norm": 1.0122989416122437, "learning_rate": 0.001, "loss": 0.5904, "step": 182100 }, { "epoch": 58.88817065287653, "grad_norm": 1.0240206718444824, "learning_rate": 0.001, "loss": 0.59, "step": 182200 }, { "epoch": 58.92049127343245, "grad_norm": 1.0902669429779053, "learning_rate": 0.001, "loss": 0.6039, "step": 182300 }, { "epoch": 58.95281189398836, "grad_norm": 1.3768160343170166, "learning_rate": 0.001, "loss": 0.5971, "step": 182400 }, { "epoch": 58.985132514544276, "grad_norm": 1.3342143297195435, "learning_rate": 0.001, "loss": 0.6057, "step": 182500 }, { "epoch": 59.0174531351002, "grad_norm": 1.288487195968628, "learning_rate": 0.001, "loss": 0.5455, "step": 182600 }, { "epoch": 59.04977375565611, "grad_norm": 1.0407581329345703, "learning_rate": 0.001, "loss": 0.5001, "step": 182700 }, { "epoch": 59.08209437621203, "grad_norm": 1.0589823722839355, "learning_rate": 0.001, "loss": 0.5067, "step": 182800 }, { "epoch": 59.11441499676794, "grad_norm": 1.087250828742981, "learning_rate": 0.001, "loss": 0.5068, "step": 182900 }, { "epoch": 59.146735617323856, "grad_norm": 1.1226871013641357, "learning_rate": 0.001, "loss": 0.5135, "step": 183000 }, { "epoch": 59.17905623787977, "grad_norm": 1.2189900875091553, "learning_rate": 0.001, "loss": 0.5231, "step": 183100 }, { "epoch": 59.211376858435685, "grad_norm": 1.248438835144043, "learning_rate": 0.001, "loss": 0.5167, "step": 183200 }, { "epoch": 59.2436974789916, "grad_norm": 1.376298427581787, "learning_rate": 0.001, "loss": 0.527, "step": 183300 }, { "epoch": 59.276018099547514, "grad_norm": 0.9579793214797974, "learning_rate": 0.001, "loss": 0.5285, "step": 183400 }, { "epoch": 59.30833872010343, "grad_norm": 1.0104893445968628, "learning_rate": 0.001, "loss": 0.5269, "step": 183500 }, { "epoch": 59.34065934065934, "grad_norm": 1.0930922031402588, "learning_rate": 0.001, "loss": 0.5344, "step": 183600 }, { "epoch": 59.37297996121526, "grad_norm": 1.4185326099395752, "learning_rate": 0.001, "loss": 0.5366, "step": 183700 }, { "epoch": 59.40530058177117, "grad_norm": 0.9121148586273193, "learning_rate": 0.001, "loss": 0.5399, "step": 183800 }, { "epoch": 59.43762120232709, "grad_norm": 1.162555456161499, "learning_rate": 0.001, "loss": 0.5534, "step": 183900 }, { "epoch": 59.469941822883, "grad_norm": 1.2287561893463135, "learning_rate": 0.001, "loss": 0.5514, "step": 184000 }, { "epoch": 59.502262443438916, "grad_norm": 1.4040770530700684, "learning_rate": 0.001, "loss": 0.5553, "step": 184100 }, { "epoch": 59.53458306399483, "grad_norm": 1.099869966506958, "learning_rate": 0.001, "loss": 0.5556, "step": 184200 }, { "epoch": 59.566903684550745, "grad_norm": 1.0999058485031128, "learning_rate": 0.001, "loss": 0.5608, "step": 184300 }, { "epoch": 59.59922430510666, "grad_norm": 1.1080540418624878, "learning_rate": 0.001, "loss": 0.5616, "step": 184400 }, { "epoch": 59.631544925662574, "grad_norm": 1.1601338386535645, "learning_rate": 0.001, "loss": 0.5647, "step": 184500 }, { "epoch": 59.66386554621849, "grad_norm": 1.3953640460968018, "learning_rate": 0.001, "loss": 0.5665, "step": 184600 }, { "epoch": 59.6961861667744, "grad_norm": 1.4000911712646484, "learning_rate": 0.001, "loss": 0.5675, "step": 184700 }, { "epoch": 59.72850678733032, "grad_norm": 1.092083215713501, "learning_rate": 0.001, "loss": 0.5753, "step": 184800 }, { "epoch": 59.76082740788623, "grad_norm": 1.2371938228607178, "learning_rate": 0.001, "loss": 0.5752, "step": 184900 }, { "epoch": 59.79314802844215, "grad_norm": 1.199756383895874, "learning_rate": 0.001, "loss": 0.5759, "step": 185000 }, { "epoch": 59.82546864899806, "grad_norm": 1.2876224517822266, "learning_rate": 0.001, "loss": 0.5806, "step": 185100 }, { "epoch": 59.857789269553976, "grad_norm": 1.5293830633163452, "learning_rate": 0.001, "loss": 0.5769, "step": 185200 }, { "epoch": 59.89010989010989, "grad_norm": 1.5801347494125366, "learning_rate": 0.001, "loss": 0.5786, "step": 185300 }, { "epoch": 59.922430510665805, "grad_norm": 1.190049648284912, "learning_rate": 0.001, "loss": 0.5828, "step": 185400 }, { "epoch": 59.95475113122172, "grad_norm": 1.2189525365829468, "learning_rate": 0.001, "loss": 0.5831, "step": 185500 }, { "epoch": 59.987071751777634, "grad_norm": 1.421107292175293, "learning_rate": 0.001, "loss": 0.5927, "step": 185600 }, { "epoch": 60.01939237233355, "grad_norm": 1.0409767627716064, "learning_rate": 0.001, "loss": 0.528, "step": 185700 }, { "epoch": 60.05171299288946, "grad_norm": 1.553772211074829, "learning_rate": 0.001, "loss": 0.4902, "step": 185800 }, { "epoch": 60.08403361344538, "grad_norm": 1.4109981060028076, "learning_rate": 0.001, "loss": 0.4998, "step": 185900 }, { "epoch": 60.11635423400129, "grad_norm": 1.209875464439392, "learning_rate": 0.001, "loss": 0.4964, "step": 186000 }, { "epoch": 60.14867485455721, "grad_norm": 1.0039783716201782, "learning_rate": 0.001, "loss": 0.5021, "step": 186100 }, { "epoch": 60.18099547511312, "grad_norm": 1.0329104661941528, "learning_rate": 0.001, "loss": 0.5078, "step": 186200 }, { "epoch": 60.213316095669036, "grad_norm": 1.1111763715744019, "learning_rate": 0.001, "loss": 0.512, "step": 186300 }, { "epoch": 60.24563671622495, "grad_norm": 0.9719598889350891, "learning_rate": 0.001, "loss": 0.5117, "step": 186400 }, { "epoch": 60.277957336780865, "grad_norm": 1.2747825384140015, "learning_rate": 0.001, "loss": 0.5237, "step": 186500 }, { "epoch": 60.31027795733678, "grad_norm": 1.0429623126983643, "learning_rate": 0.001, "loss": 0.5276, "step": 186600 }, { "epoch": 60.342598577892694, "grad_norm": 1.3258812427520752, "learning_rate": 0.001, "loss": 0.5292, "step": 186700 }, { "epoch": 60.37491919844861, "grad_norm": 1.2872059345245361, "learning_rate": 0.001, "loss": 0.523, "step": 186800 }, { "epoch": 60.40723981900452, "grad_norm": 1.227522611618042, "learning_rate": 0.001, "loss": 0.5285, "step": 186900 }, { "epoch": 60.43956043956044, "grad_norm": 1.255584478378296, "learning_rate": 0.001, "loss": 0.5296, "step": 187000 }, { "epoch": 60.47188106011635, "grad_norm": 0.99484783411026, "learning_rate": 0.001, "loss": 0.5316, "step": 187100 }, { "epoch": 60.50420168067227, "grad_norm": 0.9971467852592468, "learning_rate": 0.001, "loss": 0.5402, "step": 187200 }, { "epoch": 60.53652230122818, "grad_norm": 1.2867799997329712, "learning_rate": 0.001, "loss": 0.5394, "step": 187300 }, { "epoch": 60.568842921784096, "grad_norm": 1.0854240655899048, "learning_rate": 0.001, "loss": 0.5584, "step": 187400 }, { "epoch": 60.60116354234001, "grad_norm": 1.2230095863342285, "learning_rate": 0.001, "loss": 0.5498, "step": 187500 }, { "epoch": 60.633484162895925, "grad_norm": 1.4905229806900024, "learning_rate": 0.001, "loss": 0.5461, "step": 187600 }, { "epoch": 60.66580478345184, "grad_norm": 1.0441514253616333, "learning_rate": 0.001, "loss": 0.5585, "step": 187700 }, { "epoch": 60.698125404007754, "grad_norm": 1.012478232383728, "learning_rate": 0.001, "loss": 0.5597, "step": 187800 }, { "epoch": 60.73044602456367, "grad_norm": 1.1057881116867065, "learning_rate": 0.001, "loss": 0.5601, "step": 187900 }, { "epoch": 60.762766645119584, "grad_norm": 1.323387622833252, "learning_rate": 0.001, "loss": 0.5642, "step": 188000 }, { "epoch": 60.7950872656755, "grad_norm": 1.0577166080474854, "learning_rate": 0.001, "loss": 0.5661, "step": 188100 }, { "epoch": 60.82740788623141, "grad_norm": 0.9372554421424866, "learning_rate": 0.001, "loss": 0.5637, "step": 188200 }, { "epoch": 60.85972850678733, "grad_norm": 1.5374869108200073, "learning_rate": 0.001, "loss": 0.5798, "step": 188300 }, { "epoch": 60.89204912734324, "grad_norm": 1.681485891342163, "learning_rate": 0.001, "loss": 0.5788, "step": 188400 }, { "epoch": 60.924369747899156, "grad_norm": 1.3271396160125732, "learning_rate": 0.001, "loss": 0.5758, "step": 188500 }, { "epoch": 60.95669036845507, "grad_norm": 2.263211488723755, "learning_rate": 0.001, "loss": 0.5868, "step": 188600 }, { "epoch": 60.98901098901099, "grad_norm": 1.1185808181762695, "learning_rate": 0.001, "loss": 0.5796, "step": 188700 }, { "epoch": 61.02133160956691, "grad_norm": 1.0168153047561646, "learning_rate": 0.001, "loss": 0.5137, "step": 188800 }, { "epoch": 61.05365223012282, "grad_norm": 0.968043863773346, "learning_rate": 0.001, "loss": 0.4805, "step": 188900 }, { "epoch": 61.085972850678736, "grad_norm": 0.9076022505760193, "learning_rate": 0.001, "loss": 0.4854, "step": 189000 }, { "epoch": 61.11829347123465, "grad_norm": 1.1426094770431519, "learning_rate": 0.001, "loss": 0.4931, "step": 189100 }, { "epoch": 61.150614091790565, "grad_norm": 1.0185136795043945, "learning_rate": 0.001, "loss": 0.4909, "step": 189200 }, { "epoch": 61.18293471234648, "grad_norm": 1.2896956205368042, "learning_rate": 0.001, "loss": 0.4925, "step": 189300 }, { "epoch": 61.215255332902395, "grad_norm": 1.0353734493255615, "learning_rate": 0.001, "loss": 0.4972, "step": 189400 }, { "epoch": 61.24757595345831, "grad_norm": 1.007733702659607, "learning_rate": 0.001, "loss": 0.5021, "step": 189500 }, { "epoch": 61.279896574014224, "grad_norm": 0.9368767142295837, "learning_rate": 0.001, "loss": 0.4969, "step": 189600 }, { "epoch": 61.31221719457014, "grad_norm": 1.2648391723632812, "learning_rate": 0.001, "loss": 0.516, "step": 189700 }, { "epoch": 61.34453781512605, "grad_norm": 1.064302921295166, "learning_rate": 0.001, "loss": 0.5191, "step": 189800 }, { "epoch": 61.37685843568197, "grad_norm": 1.2099469900131226, "learning_rate": 0.001, "loss": 0.5139, "step": 189900 }, { "epoch": 61.40917905623788, "grad_norm": 0.9272972345352173, "learning_rate": 0.001, "loss": 0.5271, "step": 190000 }, { "epoch": 61.441499676793796, "grad_norm": 1.2889097929000854, "learning_rate": 0.001, "loss": 0.5244, "step": 190100 }, { "epoch": 61.47382029734971, "grad_norm": 1.3665945529937744, "learning_rate": 0.001, "loss": 0.5294, "step": 190200 }, { "epoch": 61.506140917905626, "grad_norm": 1.0314524173736572, "learning_rate": 0.001, "loss": 0.5373, "step": 190300 }, { "epoch": 61.53846153846154, "grad_norm": 1.1413110494613647, "learning_rate": 0.001, "loss": 0.5305, "step": 190400 }, { "epoch": 61.570782159017455, "grad_norm": 1.3104863166809082, "learning_rate": 0.001, "loss": 0.5344, "step": 190500 }, { "epoch": 61.60310277957337, "grad_norm": 1.1210652589797974, "learning_rate": 0.001, "loss": 0.5406, "step": 190600 }, { "epoch": 61.635423400129284, "grad_norm": 1.3267130851745605, "learning_rate": 0.001, "loss": 0.5329, "step": 190700 }, { "epoch": 61.6677440206852, "grad_norm": 0.941440999507904, "learning_rate": 0.001, "loss": 0.5518, "step": 190800 }, { "epoch": 61.70006464124111, "grad_norm": 0.9702414274215698, "learning_rate": 0.001, "loss": 0.5482, "step": 190900 }, { "epoch": 61.73238526179703, "grad_norm": 1.260135531425476, "learning_rate": 0.001, "loss": 0.5448, "step": 191000 }, { "epoch": 61.76470588235294, "grad_norm": 1.2690566778182983, "learning_rate": 0.001, "loss": 0.5484, "step": 191100 }, { "epoch": 61.79702650290886, "grad_norm": 1.2342361211776733, "learning_rate": 0.001, "loss": 0.5497, "step": 191200 }, { "epoch": 61.82934712346477, "grad_norm": 1.072845458984375, "learning_rate": 0.001, "loss": 0.5576, "step": 191300 }, { "epoch": 61.861667744020686, "grad_norm": 1.2858518362045288, "learning_rate": 0.001, "loss": 0.562, "step": 191400 }, { "epoch": 61.8939883645766, "grad_norm": 0.9664008021354675, "learning_rate": 0.001, "loss": 0.574, "step": 191500 }, { "epoch": 61.926308985132515, "grad_norm": 1.1883646249771118, "learning_rate": 0.001, "loss": 0.5653, "step": 191600 }, { "epoch": 61.95862960568843, "grad_norm": 1.3640427589416504, "learning_rate": 0.001, "loss": 0.5719, "step": 191700 }, { "epoch": 61.990950226244344, "grad_norm": 0.9717844724655151, "learning_rate": 0.001, "loss": 0.5731, "step": 191800 }, { "epoch": 62.02327084680026, "grad_norm": 1.2393680810928345, "learning_rate": 0.001, "loss": 0.4972, "step": 191900 }, { "epoch": 62.05559146735617, "grad_norm": 1.30513334274292, "learning_rate": 0.001, "loss": 0.4723, "step": 192000 }, { "epoch": 62.08791208791209, "grad_norm": 1.0650439262390137, "learning_rate": 0.001, "loss": 0.477, "step": 192100 }, { "epoch": 62.120232708468, "grad_norm": 1.057212471961975, "learning_rate": 0.001, "loss": 0.4764, "step": 192200 }, { "epoch": 62.15255332902392, "grad_norm": 1.0308864116668701, "learning_rate": 0.001, "loss": 0.48, "step": 192300 }, { "epoch": 62.18487394957983, "grad_norm": 0.9770814180374146, "learning_rate": 0.001, "loss": 0.4919, "step": 192400 }, { "epoch": 62.217194570135746, "grad_norm": 1.120018720626831, "learning_rate": 0.001, "loss": 0.4944, "step": 192500 }, { "epoch": 62.24951519069166, "grad_norm": 0.9853366017341614, "learning_rate": 0.001, "loss": 0.4981, "step": 192600 }, { "epoch": 62.281835811247575, "grad_norm": 0.9427825808525085, "learning_rate": 0.001, "loss": 0.5026, "step": 192700 }, { "epoch": 62.31415643180349, "grad_norm": 1.0558254718780518, "learning_rate": 0.001, "loss": 0.5015, "step": 192800 }, { "epoch": 62.346477052359404, "grad_norm": 1.1786587238311768, "learning_rate": 0.001, "loss": 0.5007, "step": 192900 }, { "epoch": 62.37879767291532, "grad_norm": 0.9285547733306885, "learning_rate": 0.001, "loss": 0.5129, "step": 193000 }, { "epoch": 62.41111829347123, "grad_norm": 1.1568740606307983, "learning_rate": 0.001, "loss": 0.5122, "step": 193100 }, { "epoch": 62.44343891402715, "grad_norm": 0.9869513511657715, "learning_rate": 0.001, "loss": 0.5127, "step": 193200 }, { "epoch": 62.47575953458306, "grad_norm": 1.182227373123169, "learning_rate": 0.001, "loss": 0.5195, "step": 193300 }, { "epoch": 62.50808015513898, "grad_norm": 1.2155927419662476, "learning_rate": 0.001, "loss": 0.5182, "step": 193400 }, { "epoch": 62.54040077569489, "grad_norm": 1.0359050035476685, "learning_rate": 0.001, "loss": 0.5281, "step": 193500 }, { "epoch": 62.572721396250806, "grad_norm": 1.1231380701065063, "learning_rate": 0.001, "loss": 0.5314, "step": 193600 }, { "epoch": 62.60504201680672, "grad_norm": 1.0002139806747437, "learning_rate": 0.001, "loss": 0.5285, "step": 193700 }, { "epoch": 62.637362637362635, "grad_norm": 1.141382098197937, "learning_rate": 0.001, "loss": 0.5367, "step": 193800 }, { "epoch": 62.66968325791855, "grad_norm": 0.9562490582466125, "learning_rate": 0.001, "loss": 0.535, "step": 193900 }, { "epoch": 62.702003878474464, "grad_norm": 0.9702553749084473, "learning_rate": 0.001, "loss": 0.5493, "step": 194000 }, { "epoch": 62.73432449903038, "grad_norm": 1.1728858947753906, "learning_rate": 0.001, "loss": 0.5407, "step": 194100 }, { "epoch": 62.76664511958629, "grad_norm": 1.1090247631072998, "learning_rate": 0.001, "loss": 0.5406, "step": 194200 }, { "epoch": 62.79896574014221, "grad_norm": 1.2302325963974, "learning_rate": 0.001, "loss": 0.5522, "step": 194300 }, { "epoch": 62.83128636069812, "grad_norm": 1.0011643171310425, "learning_rate": 0.001, "loss": 0.5487, "step": 194400 }, { "epoch": 62.86360698125404, "grad_norm": 0.9649948477745056, "learning_rate": 0.001, "loss": 0.5458, "step": 194500 }, { "epoch": 62.89592760180995, "grad_norm": 1.1206198930740356, "learning_rate": 0.001, "loss": 0.5449, "step": 194600 }, { "epoch": 62.928248222365866, "grad_norm": 1.2993172407150269, "learning_rate": 0.001, "loss": 0.5673, "step": 194700 }, { "epoch": 62.96056884292178, "grad_norm": 1.0603526830673218, "learning_rate": 0.001, "loss": 0.5564, "step": 194800 }, { "epoch": 62.992889463477695, "grad_norm": 1.0976626873016357, "learning_rate": 0.001, "loss": 0.5572, "step": 194900 }, { "epoch": 63.02521008403362, "grad_norm": 1.0397326946258545, "learning_rate": 0.001, "loss": 0.4747, "step": 195000 }, { "epoch": 63.05753070458953, "grad_norm": 1.1978724002838135, "learning_rate": 0.001, "loss": 0.469, "step": 195100 }, { "epoch": 63.089851325145446, "grad_norm": 1.1078543663024902, "learning_rate": 0.001, "loss": 0.4726, "step": 195200 }, { "epoch": 63.12217194570136, "grad_norm": 1.2734791040420532, "learning_rate": 0.001, "loss": 0.4764, "step": 195300 }, { "epoch": 63.154492566257275, "grad_norm": 1.0650967359542847, "learning_rate": 0.001, "loss": 0.4712, "step": 195400 }, { "epoch": 63.18681318681319, "grad_norm": 0.9034045934677124, "learning_rate": 0.001, "loss": 0.4794, "step": 195500 }, { "epoch": 63.219133807369104, "grad_norm": 1.0714465379714966, "learning_rate": 0.001, "loss": 0.4798, "step": 195600 }, { "epoch": 63.25145442792502, "grad_norm": 1.3931492567062378, "learning_rate": 0.001, "loss": 0.4889, "step": 195700 }, { "epoch": 63.28377504848093, "grad_norm": 0.9365969300270081, "learning_rate": 0.001, "loss": 0.4888, "step": 195800 }, { "epoch": 63.31609566903685, "grad_norm": 1.1336215734481812, "learning_rate": 0.001, "loss": 0.497, "step": 195900 }, { "epoch": 63.34841628959276, "grad_norm": 1.1249505281448364, "learning_rate": 0.001, "loss": 0.5038, "step": 196000 }, { "epoch": 63.38073691014868, "grad_norm": 1.2850799560546875, "learning_rate": 0.001, "loss": 0.4975, "step": 196100 }, { "epoch": 63.41305753070459, "grad_norm": 0.8687011003494263, "learning_rate": 0.001, "loss": 0.508, "step": 196200 }, { "epoch": 63.445378151260506, "grad_norm": 1.2408093214035034, "learning_rate": 0.001, "loss": 0.5034, "step": 196300 }, { "epoch": 63.47769877181642, "grad_norm": 0.9300729632377625, "learning_rate": 0.001, "loss": 0.5045, "step": 196400 }, { "epoch": 63.510019392372335, "grad_norm": 1.347612738609314, "learning_rate": 0.001, "loss": 0.5104, "step": 196500 }, { "epoch": 63.54234001292825, "grad_norm": 1.1088213920593262, "learning_rate": 0.001, "loss": 0.5154, "step": 196600 }, { "epoch": 63.574660633484164, "grad_norm": 0.9200842976570129, "learning_rate": 0.001, "loss": 0.5177, "step": 196700 }, { "epoch": 63.60698125404008, "grad_norm": 1.057627558708191, "learning_rate": 0.001, "loss": 0.5158, "step": 196800 }, { "epoch": 63.63930187459599, "grad_norm": 0.9637535214424133, "learning_rate": 0.001, "loss": 0.5309, "step": 196900 }, { "epoch": 63.67162249515191, "grad_norm": 1.0072401762008667, "learning_rate": 0.001, "loss": 0.523, "step": 197000 }, { "epoch": 63.70394311570782, "grad_norm": 0.9770321249961853, "learning_rate": 0.001, "loss": 0.5306, "step": 197100 }, { "epoch": 63.73626373626374, "grad_norm": 1.0539439916610718, "learning_rate": 0.001, "loss": 0.5334, "step": 197200 }, { "epoch": 63.76858435681965, "grad_norm": 0.9808959364891052, "learning_rate": 0.001, "loss": 0.5284, "step": 197300 }, { "epoch": 63.800904977375566, "grad_norm": 1.0914591550827026, "learning_rate": 0.001, "loss": 0.5329, "step": 197400 }, { "epoch": 63.83322559793148, "grad_norm": 1.00962495803833, "learning_rate": 0.001, "loss": 0.5376, "step": 197500 }, { "epoch": 63.865546218487395, "grad_norm": 0.9482107162475586, "learning_rate": 0.001, "loss": 0.5463, "step": 197600 }, { "epoch": 63.89786683904331, "grad_norm": 1.1720030307769775, "learning_rate": 0.001, "loss": 0.5443, "step": 197700 }, { "epoch": 63.930187459599225, "grad_norm": 1.0804449319839478, "learning_rate": 0.001, "loss": 0.5414, "step": 197800 }, { "epoch": 63.96250808015514, "grad_norm": 1.112795352935791, "learning_rate": 0.001, "loss": 0.5513, "step": 197900 }, { "epoch": 63.994828700711054, "grad_norm": 1.2671788930892944, "learning_rate": 0.001, "loss": 0.5448, "step": 198000 }, { "epoch": 64.02714932126698, "grad_norm": 0.9619100689888, "learning_rate": 0.001, "loss": 0.4599, "step": 198100 }, { "epoch": 64.05946994182288, "grad_norm": 1.3109772205352783, "learning_rate": 0.001, "loss": 0.4539, "step": 198200 }, { "epoch": 64.0917905623788, "grad_norm": 0.9422193169593811, "learning_rate": 0.001, "loss": 0.4576, "step": 198300 }, { "epoch": 64.12411118293471, "grad_norm": 1.0276201963424683, "learning_rate": 0.001, "loss": 0.4617, "step": 198400 }, { "epoch": 64.15643180349063, "grad_norm": 1.002912163734436, "learning_rate": 0.001, "loss": 0.4653, "step": 198500 }, { "epoch": 64.18875242404654, "grad_norm": 1.2103850841522217, "learning_rate": 0.001, "loss": 0.4688, "step": 198600 }, { "epoch": 64.22107304460246, "grad_norm": 1.0457946062088013, "learning_rate": 0.001, "loss": 0.4787, "step": 198700 }, { "epoch": 64.25339366515837, "grad_norm": 1.295259714126587, "learning_rate": 0.001, "loss": 0.4779, "step": 198800 }, { "epoch": 64.28571428571429, "grad_norm": 1.6081501245498657, "learning_rate": 0.001, "loss": 0.4818, "step": 198900 }, { "epoch": 64.3180349062702, "grad_norm": 0.9987143874168396, "learning_rate": 0.001, "loss": 0.479, "step": 199000 }, { "epoch": 64.35035552682612, "grad_norm": 1.4530272483825684, "learning_rate": 0.001, "loss": 0.4837, "step": 199100 }, { "epoch": 64.38267614738203, "grad_norm": 0.9374475479125977, "learning_rate": 0.001, "loss": 0.492, "step": 199200 }, { "epoch": 64.41499676793795, "grad_norm": 1.4348639249801636, "learning_rate": 0.001, "loss": 0.4925, "step": 199300 }, { "epoch": 64.44731738849386, "grad_norm": 0.995640218257904, "learning_rate": 0.001, "loss": 0.4952, "step": 199400 }, { "epoch": 64.47963800904978, "grad_norm": 1.1049662828445435, "learning_rate": 0.001, "loss": 0.4983, "step": 199500 }, { "epoch": 64.51195862960569, "grad_norm": 1.1890124082565308, "learning_rate": 0.001, "loss": 0.5068, "step": 199600 }, { "epoch": 64.54427925016161, "grad_norm": 1.0749454498291016, "learning_rate": 0.001, "loss": 0.5019, "step": 199700 }, { "epoch": 64.57659987071752, "grad_norm": 1.172202706336975, "learning_rate": 0.001, "loss": 0.5068, "step": 199800 }, { "epoch": 64.60892049127344, "grad_norm": 1.4445570707321167, "learning_rate": 0.001, "loss": 0.5114, "step": 199900 }, { "epoch": 64.64124111182934, "grad_norm": 1.2424432039260864, "learning_rate": 0.001, "loss": 0.5115, "step": 200000 }, { "epoch": 64.67356173238527, "grad_norm": 1.0834548473358154, "learning_rate": 0.001, "loss": 0.5143, "step": 200100 }, { "epoch": 64.70588235294117, "grad_norm": 1.0966613292694092, "learning_rate": 0.001, "loss": 0.5244, "step": 200200 }, { "epoch": 64.7382029734971, "grad_norm": 1.1262820959091187, "learning_rate": 0.001, "loss": 0.5188, "step": 200300 }, { "epoch": 64.770523594053, "grad_norm": 0.967513382434845, "learning_rate": 0.001, "loss": 0.5249, "step": 200400 }, { "epoch": 64.80284421460892, "grad_norm": 1.079886555671692, "learning_rate": 0.001, "loss": 0.527, "step": 200500 }, { "epoch": 64.83516483516483, "grad_norm": 0.9943940043449402, "learning_rate": 0.001, "loss": 0.5263, "step": 200600 }, { "epoch": 64.86748545572075, "grad_norm": 1.2942136526107788, "learning_rate": 0.001, "loss": 0.5367, "step": 200700 }, { "epoch": 64.89980607627666, "grad_norm": 1.0185751914978027, "learning_rate": 0.001, "loss": 0.5282, "step": 200800 }, { "epoch": 64.93212669683258, "grad_norm": 1.0420331954956055, "learning_rate": 0.001, "loss": 0.5296, "step": 200900 }, { "epoch": 64.96444731738849, "grad_norm": 1.2306171655654907, "learning_rate": 0.001, "loss": 0.5439, "step": 201000 }, { "epoch": 64.99676793794441, "grad_norm": 0.9546437859535217, "learning_rate": 0.001, "loss": 0.527, "step": 201100 }, { "epoch": 65.02908855850032, "grad_norm": 1.189591646194458, "learning_rate": 0.001, "loss": 0.451, "step": 201200 }, { "epoch": 65.06140917905624, "grad_norm": 1.1122311353683472, "learning_rate": 0.001, "loss": 0.4449, "step": 201300 }, { "epoch": 65.09372979961215, "grad_norm": 1.0236417055130005, "learning_rate": 0.001, "loss": 0.4485, "step": 201400 }, { "epoch": 65.12605042016807, "grad_norm": 0.912553071975708, "learning_rate": 0.001, "loss": 0.4573, "step": 201500 }, { "epoch": 65.15837104072398, "grad_norm": 1.319473147392273, "learning_rate": 0.001, "loss": 0.4601, "step": 201600 }, { "epoch": 65.1906916612799, "grad_norm": 1.0190285444259644, "learning_rate": 0.001, "loss": 0.4633, "step": 201700 }, { "epoch": 65.2230122818358, "grad_norm": 1.0630074739456177, "learning_rate": 0.001, "loss": 0.4647, "step": 201800 }, { "epoch": 65.25533290239173, "grad_norm": 1.112196922302246, "learning_rate": 0.001, "loss": 0.4719, "step": 201900 }, { "epoch": 65.28765352294764, "grad_norm": 1.168743371963501, "learning_rate": 0.001, "loss": 0.474, "step": 202000 }, { "epoch": 65.31997414350356, "grad_norm": 1.140459656715393, "learning_rate": 0.001, "loss": 0.4688, "step": 202100 }, { "epoch": 65.35229476405947, "grad_norm": 1.141180157661438, "learning_rate": 0.001, "loss": 0.4822, "step": 202200 }, { "epoch": 65.38461538461539, "grad_norm": 1.1147860288619995, "learning_rate": 0.001, "loss": 0.479, "step": 202300 }, { "epoch": 65.4169360051713, "grad_norm": 1.0639548301696777, "learning_rate": 0.001, "loss": 0.4825, "step": 202400 }, { "epoch": 65.44925662572722, "grad_norm": 0.9733723402023315, "learning_rate": 0.001, "loss": 0.4875, "step": 202500 }, { "epoch": 65.48157724628312, "grad_norm": 1.5588597059249878, "learning_rate": 0.001, "loss": 0.4846, "step": 202600 }, { "epoch": 65.51389786683905, "grad_norm": 1.2536370754241943, "learning_rate": 0.001, "loss": 0.4947, "step": 202700 }, { "epoch": 65.54621848739495, "grad_norm": 1.096252679824829, "learning_rate": 0.001, "loss": 0.4952, "step": 202800 }, { "epoch": 65.57853910795087, "grad_norm": 1.0599154233932495, "learning_rate": 0.001, "loss": 0.4943, "step": 202900 }, { "epoch": 65.61085972850678, "grad_norm": 1.1033673286437988, "learning_rate": 0.001, "loss": 0.4986, "step": 203000 }, { "epoch": 65.6431803490627, "grad_norm": 1.0397197008132935, "learning_rate": 0.001, "loss": 0.5082, "step": 203100 }, { "epoch": 65.67550096961861, "grad_norm": 1.146864891052246, "learning_rate": 0.001, "loss": 0.514, "step": 203200 }, { "epoch": 65.70782159017453, "grad_norm": 1.1083571910858154, "learning_rate": 0.001, "loss": 0.5071, "step": 203300 }, { "epoch": 65.74014221073044, "grad_norm": 1.0147305727005005, "learning_rate": 0.001, "loss": 0.5134, "step": 203400 }, { "epoch": 65.77246283128636, "grad_norm": 1.1186457872390747, "learning_rate": 0.001, "loss": 0.5157, "step": 203500 }, { "epoch": 65.80478345184227, "grad_norm": 1.30697762966156, "learning_rate": 0.001, "loss": 0.513, "step": 203600 }, { "epoch": 65.83710407239819, "grad_norm": 1.1152093410491943, "learning_rate": 0.001, "loss": 0.5206, "step": 203700 }, { "epoch": 65.8694246929541, "grad_norm": 1.1004972457885742, "learning_rate": 0.001, "loss": 0.5232, "step": 203800 }, { "epoch": 65.90174531351002, "grad_norm": 1.133632779121399, "learning_rate": 0.001, "loss": 0.5166, "step": 203900 }, { "epoch": 65.93406593406593, "grad_norm": 1.1642577648162842, "learning_rate": 0.001, "loss": 0.5292, "step": 204000 }, { "epoch": 65.96638655462185, "grad_norm": 1.0551384687423706, "learning_rate": 0.001, "loss": 0.5312, "step": 204100 }, { "epoch": 65.99870717517777, "grad_norm": 1.46417236328125, "learning_rate": 0.001, "loss": 0.5276, "step": 204200 }, { "epoch": 66.03102779573368, "grad_norm": 1.066042423248291, "learning_rate": 0.001, "loss": 0.4336, "step": 204300 }, { "epoch": 66.0633484162896, "grad_norm": 0.9283245801925659, "learning_rate": 0.001, "loss": 0.4408, "step": 204400 }, { "epoch": 66.0956690368455, "grad_norm": 1.1362123489379883, "learning_rate": 0.001, "loss": 0.4402, "step": 204500 }, { "epoch": 66.12798965740143, "grad_norm": 0.8397954106330872, "learning_rate": 0.001, "loss": 0.452, "step": 204600 }, { "epoch": 66.16031027795734, "grad_norm": 1.2270851135253906, "learning_rate": 0.001, "loss": 0.451, "step": 204700 }, { "epoch": 66.19263089851326, "grad_norm": 1.0326131582260132, "learning_rate": 0.001, "loss": 0.4556, "step": 204800 }, { "epoch": 66.22495151906917, "grad_norm": 1.4140064716339111, "learning_rate": 0.001, "loss": 0.4582, "step": 204900 }, { "epoch": 66.25727213962509, "grad_norm": 1.034745216369629, "learning_rate": 0.001, "loss": 0.4641, "step": 205000 }, { "epoch": 66.289592760181, "grad_norm": 1.3551679849624634, "learning_rate": 0.001, "loss": 0.4587, "step": 205100 }, { "epoch": 66.32191338073692, "grad_norm": 1.0809026956558228, "learning_rate": 0.001, "loss": 0.4714, "step": 205200 }, { "epoch": 66.35423400129282, "grad_norm": 1.1291431188583374, "learning_rate": 0.001, "loss": 0.4671, "step": 205300 }, { "epoch": 66.38655462184875, "grad_norm": 1.3339194059371948, "learning_rate": 0.001, "loss": 0.4717, "step": 205400 }, { "epoch": 66.41887524240465, "grad_norm": 0.9346900582313538, "learning_rate": 0.001, "loss": 0.4751, "step": 205500 }, { "epoch": 66.45119586296057, "grad_norm": 1.0843149423599243, "learning_rate": 0.001, "loss": 0.4844, "step": 205600 }, { "epoch": 66.48351648351648, "grad_norm": 0.9538379311561584, "learning_rate": 0.001, "loss": 0.485, "step": 205700 }, { "epoch": 66.5158371040724, "grad_norm": 1.008241057395935, "learning_rate": 0.001, "loss": 0.4845, "step": 205800 }, { "epoch": 66.54815772462831, "grad_norm": 1.231938123703003, "learning_rate": 0.001, "loss": 0.4834, "step": 205900 }, { "epoch": 66.58047834518423, "grad_norm": 1.0960359573364258, "learning_rate": 0.001, "loss": 0.4885, "step": 206000 }, { "epoch": 66.61279896574014, "grad_norm": 1.256618618965149, "learning_rate": 0.001, "loss": 0.4925, "step": 206100 }, { "epoch": 66.64511958629606, "grad_norm": 1.2136703729629517, "learning_rate": 0.001, "loss": 0.4948, "step": 206200 }, { "epoch": 66.67744020685197, "grad_norm": 1.079554557800293, "learning_rate": 0.001, "loss": 0.4939, "step": 206300 }, { "epoch": 66.70976082740789, "grad_norm": 1.2305575609207153, "learning_rate": 0.001, "loss": 0.5012, "step": 206400 }, { "epoch": 66.7420814479638, "grad_norm": 1.234448790550232, "learning_rate": 0.001, "loss": 0.509, "step": 206500 }, { "epoch": 66.77440206851972, "grad_norm": 0.9879602789878845, "learning_rate": 0.001, "loss": 0.5039, "step": 206600 }, { "epoch": 66.80672268907563, "grad_norm": 1.3447580337524414, "learning_rate": 0.001, "loss": 0.5096, "step": 206700 }, { "epoch": 66.83904330963155, "grad_norm": 1.467726469039917, "learning_rate": 0.001, "loss": 0.5088, "step": 206800 }, { "epoch": 66.87136393018746, "grad_norm": 1.1262919902801514, "learning_rate": 0.001, "loss": 0.5096, "step": 206900 }, { "epoch": 66.90368455074338, "grad_norm": 1.0575158596038818, "learning_rate": 0.001, "loss": 0.508, "step": 207000 }, { "epoch": 66.93600517129929, "grad_norm": 1.211696743965149, "learning_rate": 0.001, "loss": 0.5187, "step": 207100 }, { "epoch": 66.96832579185521, "grad_norm": 1.0400581359863281, "learning_rate": 0.001, "loss": 0.5139, "step": 207200 }, { "epoch": 67.00064641241111, "grad_norm": 0.9642459750175476, "learning_rate": 0.001, "loss": 0.5096, "step": 207300 }, { "epoch": 67.03296703296704, "grad_norm": 1.2824528217315674, "learning_rate": 0.001, "loss": 0.4271, "step": 207400 }, { "epoch": 67.06528765352294, "grad_norm": 1.1130110025405884, "learning_rate": 0.001, "loss": 0.433, "step": 207500 }, { "epoch": 67.09760827407887, "grad_norm": 0.843521773815155, "learning_rate": 0.001, "loss": 0.4348, "step": 207600 }, { "epoch": 67.12992889463477, "grad_norm": 1.0790728330612183, "learning_rate": 0.001, "loss": 0.4347, "step": 207700 }, { "epoch": 67.1622495151907, "grad_norm": 1.0936672687530518, "learning_rate": 0.001, "loss": 0.4431, "step": 207800 }, { "epoch": 67.1945701357466, "grad_norm": 1.0248212814331055, "learning_rate": 0.001, "loss": 0.4452, "step": 207900 }, { "epoch": 67.22689075630252, "grad_norm": 1.1765912771224976, "learning_rate": 0.001, "loss": 0.4538, "step": 208000 }, { "epoch": 67.25921137685843, "grad_norm": 1.0497208833694458, "learning_rate": 0.001, "loss": 0.4501, "step": 208100 }, { "epoch": 67.29153199741435, "grad_norm": 1.245993971824646, "learning_rate": 0.001, "loss": 0.4557, "step": 208200 }, { "epoch": 67.32385261797026, "grad_norm": 1.7897554636001587, "learning_rate": 0.001, "loss": 0.4596, "step": 208300 }, { "epoch": 67.35617323852618, "grad_norm": 1.12723970413208, "learning_rate": 0.001, "loss": 0.4665, "step": 208400 }, { "epoch": 67.38849385908209, "grad_norm": 1.1734880208969116, "learning_rate": 0.001, "loss": 0.4632, "step": 208500 }, { "epoch": 67.42081447963801, "grad_norm": 1.541642427444458, "learning_rate": 0.001, "loss": 0.4636, "step": 208600 }, { "epoch": 67.45313510019392, "grad_norm": 1.093261957168579, "learning_rate": 0.001, "loss": 0.4698, "step": 208700 }, { "epoch": 67.48545572074984, "grad_norm": 1.0568324327468872, "learning_rate": 0.001, "loss": 0.4709, "step": 208800 }, { "epoch": 67.51777634130575, "grad_norm": 1.0981720685958862, "learning_rate": 0.001, "loss": 0.4755, "step": 208900 }, { "epoch": 67.55009696186167, "grad_norm": 1.3454633951187134, "learning_rate": 0.001, "loss": 0.4795, "step": 209000 }, { "epoch": 67.58241758241758, "grad_norm": 1.4329640865325928, "learning_rate": 0.001, "loss": 0.4883, "step": 209100 }, { "epoch": 67.6147382029735, "grad_norm": 1.489026665687561, "learning_rate": 0.001, "loss": 0.4861, "step": 209200 }, { "epoch": 67.6470588235294, "grad_norm": 1.081307291984558, "learning_rate": 0.001, "loss": 0.4896, "step": 209300 }, { "epoch": 67.67937944408533, "grad_norm": 2.474250555038452, "learning_rate": 0.001, "loss": 0.4902, "step": 209400 }, { "epoch": 67.71170006464124, "grad_norm": 1.5929138660430908, "learning_rate": 0.001, "loss": 0.4875, "step": 209500 }, { "epoch": 67.74402068519716, "grad_norm": 1.2115775346755981, "learning_rate": 0.001, "loss": 0.4877, "step": 209600 }, { "epoch": 67.77634130575306, "grad_norm": 1.062950611114502, "learning_rate": 0.001, "loss": 0.5034, "step": 209700 }, { "epoch": 67.80866192630899, "grad_norm": 1.0991734266281128, "learning_rate": 0.001, "loss": 0.5091, "step": 209800 }, { "epoch": 67.8409825468649, "grad_norm": 0.9609373807907104, "learning_rate": 0.001, "loss": 0.4963, "step": 209900 }, { "epoch": 67.87330316742081, "grad_norm": 1.4253969192504883, "learning_rate": 0.001, "loss": 0.5005, "step": 210000 }, { "epoch": 67.90562378797672, "grad_norm": 1.21868896484375, "learning_rate": 0.001, "loss": 0.5012, "step": 210100 }, { "epoch": 67.93794440853264, "grad_norm": 1.6356582641601562, "learning_rate": 0.001, "loss": 0.5034, "step": 210200 }, { "epoch": 67.97026502908855, "grad_norm": 1.1847609281539917, "learning_rate": 0.001, "loss": 0.5015, "step": 210300 }, { "epoch": 68.00258564964447, "grad_norm": 1.020921230316162, "learning_rate": 0.001, "loss": 0.5159, "step": 210400 }, { "epoch": 68.0349062702004, "grad_norm": 0.9778619408607483, "learning_rate": 0.001, "loss": 0.4194, "step": 210500 }, { "epoch": 68.0672268907563, "grad_norm": 1.1982563734054565, "learning_rate": 0.001, "loss": 0.4243, "step": 210600 }, { "epoch": 68.09954751131222, "grad_norm": 1.024571180343628, "learning_rate": 0.001, "loss": 0.4343, "step": 210700 }, { "epoch": 68.13186813186813, "grad_norm": 1.3403525352478027, "learning_rate": 0.001, "loss": 0.4244, "step": 210800 }, { "epoch": 68.16418875242405, "grad_norm": 0.9860188961029053, "learning_rate": 0.001, "loss": 0.434, "step": 210900 }, { "epoch": 68.19650937297996, "grad_norm": 1.2305492162704468, "learning_rate": 0.001, "loss": 0.4372, "step": 211000 }, { "epoch": 68.22882999353588, "grad_norm": 1.0205739736557007, "learning_rate": 0.001, "loss": 0.4456, "step": 211100 }, { "epoch": 68.26115061409179, "grad_norm": 1.068396806716919, "learning_rate": 0.001, "loss": 0.4472, "step": 211200 }, { "epoch": 68.29347123464771, "grad_norm": 0.9411461353302002, "learning_rate": 0.001, "loss": 0.4498, "step": 211300 }, { "epoch": 68.32579185520362, "grad_norm": 0.8969915509223938, "learning_rate": 0.001, "loss": 0.4566, "step": 211400 }, { "epoch": 68.35811247575954, "grad_norm": 1.0808418989181519, "learning_rate": 0.001, "loss": 0.45, "step": 211500 }, { "epoch": 68.39043309631545, "grad_norm": 1.0364251136779785, "learning_rate": 0.001, "loss": 0.459, "step": 211600 }, { "epoch": 68.42275371687137, "grad_norm": 1.0249592065811157, "learning_rate": 0.001, "loss": 0.4625, "step": 211700 }, { "epoch": 68.45507433742728, "grad_norm": 1.084763765335083, "learning_rate": 0.001, "loss": 0.4705, "step": 211800 }, { "epoch": 68.4873949579832, "grad_norm": 1.2441303730010986, "learning_rate": 0.001, "loss": 0.4665, "step": 211900 }, { "epoch": 68.5197155785391, "grad_norm": 1.000927209854126, "learning_rate": 0.001, "loss": 0.4701, "step": 212000 }, { "epoch": 68.55203619909503, "grad_norm": 1.0046494007110596, "learning_rate": 0.001, "loss": 0.4739, "step": 212100 }, { "epoch": 68.58435681965094, "grad_norm": 1.38204824924469, "learning_rate": 0.001, "loss": 0.4741, "step": 212200 }, { "epoch": 68.61667744020686, "grad_norm": 1.1469440460205078, "learning_rate": 0.001, "loss": 0.4748, "step": 212300 }, { "epoch": 68.64899806076276, "grad_norm": 1.1044620275497437, "learning_rate": 0.001, "loss": 0.4767, "step": 212400 }, { "epoch": 68.68131868131869, "grad_norm": 1.15409517288208, "learning_rate": 0.001, "loss": 0.4746, "step": 212500 }, { "epoch": 68.7136393018746, "grad_norm": 0.9297916293144226, "learning_rate": 0.001, "loss": 0.4839, "step": 212600 }, { "epoch": 68.74595992243052, "grad_norm": 1.3168245553970337, "learning_rate": 0.001, "loss": 0.4833, "step": 212700 }, { "epoch": 68.77828054298642, "grad_norm": 1.021378993988037, "learning_rate": 0.001, "loss": 0.4868, "step": 212800 }, { "epoch": 68.81060116354234, "grad_norm": 1.573776125907898, "learning_rate": 0.001, "loss": 0.4928, "step": 212900 }, { "epoch": 68.84292178409825, "grad_norm": 0.9867998361587524, "learning_rate": 0.001, "loss": 0.4937, "step": 213000 }, { "epoch": 68.87524240465417, "grad_norm": 1.0410293340682983, "learning_rate": 0.001, "loss": 0.492, "step": 213100 }, { "epoch": 68.90756302521008, "grad_norm": 1.2943593263626099, "learning_rate": 0.001, "loss": 0.4901, "step": 213200 }, { "epoch": 68.939883645766, "grad_norm": 1.0838546752929688, "learning_rate": 0.001, "loss": 0.5045, "step": 213300 }, { "epoch": 68.97220426632191, "grad_norm": 0.949532687664032, "learning_rate": 0.001, "loss": 0.4963, "step": 213400 }, { "epoch": 69.00452488687783, "grad_norm": 1.245707392692566, "learning_rate": 0.001, "loss": 0.5031, "step": 213500 }, { "epoch": 69.03684550743374, "grad_norm": 1.1587035655975342, "learning_rate": 0.001, "loss": 0.4112, "step": 213600 }, { "epoch": 69.06916612798966, "grad_norm": 1.0539730787277222, "learning_rate": 0.001, "loss": 0.4207, "step": 213700 }, { "epoch": 69.10148674854557, "grad_norm": 1.3368442058563232, "learning_rate": 0.001, "loss": 0.4224, "step": 213800 }, { "epoch": 69.13380736910149, "grad_norm": 0.9828703999519348, "learning_rate": 0.001, "loss": 0.4271, "step": 213900 }, { "epoch": 69.1661279896574, "grad_norm": 0.9627770781517029, "learning_rate": 0.001, "loss": 0.4397, "step": 214000 }, { "epoch": 69.19844861021332, "grad_norm": 1.1716887950897217, "learning_rate": 0.001, "loss": 0.432, "step": 214100 }, { "epoch": 69.23076923076923, "grad_norm": 1.0941144227981567, "learning_rate": 0.001, "loss": 0.4334, "step": 214200 }, { "epoch": 69.26308985132515, "grad_norm": 1.0086345672607422, "learning_rate": 0.001, "loss": 0.4399, "step": 214300 }, { "epoch": 69.29541047188106, "grad_norm": 1.019026279449463, "learning_rate": 0.001, "loss": 0.4366, "step": 214400 }, { "epoch": 69.32773109243698, "grad_norm": 0.9767369031906128, "learning_rate": 0.001, "loss": 0.4403, "step": 214500 }, { "epoch": 69.36005171299288, "grad_norm": 1.2241624593734741, "learning_rate": 0.001, "loss": 0.4469, "step": 214600 }, { "epoch": 69.3923723335488, "grad_norm": 0.8746280074119568, "learning_rate": 0.001, "loss": 0.4486, "step": 214700 }, { "epoch": 69.42469295410471, "grad_norm": 1.3239386081695557, "learning_rate": 0.001, "loss": 0.45, "step": 214800 }, { "epoch": 69.45701357466064, "grad_norm": 0.9036477208137512, "learning_rate": 0.001, "loss": 0.4595, "step": 214900 }, { "epoch": 69.48933419521654, "grad_norm": 1.2905906438827515, "learning_rate": 0.001, "loss": 0.4666, "step": 215000 }, { "epoch": 69.52165481577246, "grad_norm": 1.1258524656295776, "learning_rate": 0.001, "loss": 0.4608, "step": 215100 }, { "epoch": 69.55397543632837, "grad_norm": 0.9722356796264648, "learning_rate": 0.001, "loss": 0.4669, "step": 215200 }, { "epoch": 69.5862960568843, "grad_norm": 1.2494004964828491, "learning_rate": 0.001, "loss": 0.4682, "step": 215300 }, { "epoch": 69.6186166774402, "grad_norm": 1.3743317127227783, "learning_rate": 0.001, "loss": 0.4666, "step": 215400 }, { "epoch": 69.65093729799612, "grad_norm": 1.2711749076843262, "learning_rate": 0.001, "loss": 0.4706, "step": 215500 }, { "epoch": 69.68325791855203, "grad_norm": 1.0831326246261597, "learning_rate": 0.001, "loss": 0.4738, "step": 215600 }, { "epoch": 69.71557853910795, "grad_norm": 1.2341777086257935, "learning_rate": 0.001, "loss": 0.4803, "step": 215700 }, { "epoch": 69.74789915966386, "grad_norm": 1.0738404989242554, "learning_rate": 0.001, "loss": 0.4723, "step": 215800 }, { "epoch": 69.78021978021978, "grad_norm": 1.1397249698638916, "learning_rate": 0.001, "loss": 0.4744, "step": 215900 }, { "epoch": 69.81254040077569, "grad_norm": 1.1138410568237305, "learning_rate": 0.001, "loss": 0.4814, "step": 216000 }, { "epoch": 69.84486102133161, "grad_norm": 0.9972765445709229, "learning_rate": 0.001, "loss": 0.4809, "step": 216100 }, { "epoch": 69.87718164188752, "grad_norm": 1.349381685256958, "learning_rate": 0.001, "loss": 0.4837, "step": 216200 }, { "epoch": 69.90950226244344, "grad_norm": 0.8973768949508667, "learning_rate": 0.001, "loss": 0.49, "step": 216300 }, { "epoch": 69.94182288299935, "grad_norm": 1.0945274829864502, "learning_rate": 0.001, "loss": 0.4849, "step": 216400 }, { "epoch": 69.97414350355527, "grad_norm": 1.1991088390350342, "learning_rate": 0.001, "loss": 0.4973, "step": 216500 }, { "epoch": 70.00646412411119, "grad_norm": 0.9683839678764343, "learning_rate": 0.001, "loss": 0.4793, "step": 216600 }, { "epoch": 70.0387847446671, "grad_norm": 1.3583143949508667, "learning_rate": 0.001, "loss": 0.4063, "step": 216700 }, { "epoch": 70.07110536522302, "grad_norm": 1.1444014310836792, "learning_rate": 0.001, "loss": 0.4127, "step": 216800 }, { "epoch": 70.10342598577893, "grad_norm": 0.8774797320365906, "learning_rate": 0.001, "loss": 0.4156, "step": 216900 }, { "epoch": 70.13574660633485, "grad_norm": 0.9516248106956482, "learning_rate": 0.001, "loss": 0.4191, "step": 217000 }, { "epoch": 70.16806722689076, "grad_norm": 1.015852689743042, "learning_rate": 0.001, "loss": 0.4279, "step": 217100 }, { "epoch": 70.20038784744668, "grad_norm": 1.2133207321166992, "learning_rate": 0.001, "loss": 0.4211, "step": 217200 }, { "epoch": 70.23270846800258, "grad_norm": 1.0056369304656982, "learning_rate": 0.001, "loss": 0.4299, "step": 217300 }, { "epoch": 70.2650290885585, "grad_norm": 0.9653335809707642, "learning_rate": 0.001, "loss": 0.4262, "step": 217400 }, { "epoch": 70.29734970911441, "grad_norm": 1.120343804359436, "learning_rate": 0.001, "loss": 0.4317, "step": 217500 }, { "epoch": 70.32967032967034, "grad_norm": 1.306012749671936, "learning_rate": 0.001, "loss": 0.4448, "step": 217600 }, { "epoch": 70.36199095022624, "grad_norm": 1.0942025184631348, "learning_rate": 0.001, "loss": 0.4425, "step": 217700 }, { "epoch": 70.39431157078216, "grad_norm": 1.1833000183105469, "learning_rate": 0.001, "loss": 0.4402, "step": 217800 }, { "epoch": 70.42663219133807, "grad_norm": 1.0500011444091797, "learning_rate": 0.001, "loss": 0.4449, "step": 217900 }, { "epoch": 70.458952811894, "grad_norm": 0.9316534996032715, "learning_rate": 0.001, "loss": 0.4462, "step": 218000 }, { "epoch": 70.4912734324499, "grad_norm": 1.2627935409545898, "learning_rate": 0.001, "loss": 0.4523, "step": 218100 }, { "epoch": 70.52359405300582, "grad_norm": 1.0010849237442017, "learning_rate": 0.001, "loss": 0.4526, "step": 218200 }, { "epoch": 70.55591467356173, "grad_norm": 1.273710012435913, "learning_rate": 0.001, "loss": 0.4619, "step": 218300 }, { "epoch": 70.58823529411765, "grad_norm": 0.8980886936187744, "learning_rate": 0.001, "loss": 0.4568, "step": 218400 }, { "epoch": 70.62055591467356, "grad_norm": 1.0704022645950317, "learning_rate": 0.001, "loss": 0.4582, "step": 218500 }, { "epoch": 70.65287653522948, "grad_norm": 0.9373906850814819, "learning_rate": 0.001, "loss": 0.4563, "step": 218600 }, { "epoch": 70.68519715578539, "grad_norm": 1.0826210975646973, "learning_rate": 0.001, "loss": 0.4647, "step": 218700 }, { "epoch": 70.71751777634131, "grad_norm": 1.1782485246658325, "learning_rate": 0.001, "loss": 0.4634, "step": 218800 }, { "epoch": 70.74983839689722, "grad_norm": 1.3497968912124634, "learning_rate": 0.001, "loss": 0.4617, "step": 218900 }, { "epoch": 70.78215901745314, "grad_norm": 0.941879153251648, "learning_rate": 0.001, "loss": 0.474, "step": 219000 }, { "epoch": 70.81447963800905, "grad_norm": 1.1195127964019775, "learning_rate": 0.001, "loss": 0.4756, "step": 219100 }, { "epoch": 70.84680025856497, "grad_norm": 1.293348789215088, "learning_rate": 0.001, "loss": 0.4793, "step": 219200 }, { "epoch": 70.87912087912088, "grad_norm": 1.2809373140335083, "learning_rate": 0.001, "loss": 0.4781, "step": 219300 }, { "epoch": 70.9114414996768, "grad_norm": 1.1007211208343506, "learning_rate": 0.001, "loss": 0.4859, "step": 219400 }, { "epoch": 70.9437621202327, "grad_norm": 1.0545412302017212, "learning_rate": 0.001, "loss": 0.4849, "step": 219500 }, { "epoch": 70.97608274078863, "grad_norm": 1.1654821634292603, "learning_rate": 0.001, "loss": 0.4859, "step": 219600 }, { "epoch": 71.00840336134453, "grad_norm": 1.0754215717315674, "learning_rate": 0.001, "loss": 0.4579, "step": 219700 }, { "epoch": 71.04072398190046, "grad_norm": 0.882619321346283, "learning_rate": 0.001, "loss": 0.3972, "step": 219800 }, { "epoch": 71.07304460245636, "grad_norm": 1.1584864854812622, "learning_rate": 0.001, "loss": 0.4078, "step": 219900 }, { "epoch": 71.10536522301229, "grad_norm": 1.0002142190933228, "learning_rate": 0.001, "loss": 0.4078, "step": 220000 }, { "epoch": 71.13768584356819, "grad_norm": 1.4272032976150513, "learning_rate": 0.001, "loss": 0.4148, "step": 220100 }, { "epoch": 71.17000646412411, "grad_norm": 1.1477420330047607, "learning_rate": 0.001, "loss": 0.4128, "step": 220200 }, { "epoch": 71.20232708468002, "grad_norm": 1.1207064390182495, "learning_rate": 0.001, "loss": 0.4198, "step": 220300 }, { "epoch": 71.23464770523594, "grad_norm": 0.977531909942627, "learning_rate": 0.001, "loss": 0.4176, "step": 220400 }, { "epoch": 71.26696832579185, "grad_norm": 1.0667275190353394, "learning_rate": 0.001, "loss": 0.4212, "step": 220500 }, { "epoch": 71.29928894634777, "grad_norm": 1.0650066137313843, "learning_rate": 0.001, "loss": 0.4215, "step": 220600 }, { "epoch": 71.33160956690368, "grad_norm": 0.9417147040367126, "learning_rate": 0.001, "loss": 0.4322, "step": 220700 }, { "epoch": 71.3639301874596, "grad_norm": 1.1012738943099976, "learning_rate": 0.001, "loss": 0.4381, "step": 220800 }, { "epoch": 71.39625080801551, "grad_norm": 1.4074491262435913, "learning_rate": 0.001, "loss": 0.4393, "step": 220900 }, { "epoch": 71.42857142857143, "grad_norm": 1.063938021659851, "learning_rate": 0.001, "loss": 0.4401, "step": 221000 }, { "epoch": 71.46089204912734, "grad_norm": 1.2163597345352173, "learning_rate": 0.001, "loss": 0.445, "step": 221100 }, { "epoch": 71.49321266968326, "grad_norm": 1.1339733600616455, "learning_rate": 0.001, "loss": 0.4419, "step": 221200 }, { "epoch": 71.52553329023917, "grad_norm": 1.0219628810882568, "learning_rate": 0.001, "loss": 0.4445, "step": 221300 }, { "epoch": 71.55785391079509, "grad_norm": 0.947544515132904, "learning_rate": 0.001, "loss": 0.4559, "step": 221400 }, { "epoch": 71.590174531351, "grad_norm": 1.0326340198516846, "learning_rate": 0.001, "loss": 0.4487, "step": 221500 }, { "epoch": 71.62249515190692, "grad_norm": 1.0857256650924683, "learning_rate": 0.001, "loss": 0.4463, "step": 221600 }, { "epoch": 71.65481577246283, "grad_norm": 1.0161110162734985, "learning_rate": 0.001, "loss": 0.4613, "step": 221700 }, { "epoch": 71.68713639301875, "grad_norm": 1.0719902515411377, "learning_rate": 0.001, "loss": 0.4608, "step": 221800 }, { "epoch": 71.71945701357465, "grad_norm": 0.9406187534332275, "learning_rate": 0.001, "loss": 0.4547, "step": 221900 }, { "epoch": 71.75177763413058, "grad_norm": 1.0417615175247192, "learning_rate": 0.001, "loss": 0.463, "step": 222000 }, { "epoch": 71.78409825468648, "grad_norm": 1.1125191450119019, "learning_rate": 0.001, "loss": 0.4648, "step": 222100 }, { "epoch": 71.8164188752424, "grad_norm": 0.9216277599334717, "learning_rate": 0.001, "loss": 0.4683, "step": 222200 }, { "epoch": 71.84873949579831, "grad_norm": 1.156798243522644, "learning_rate": 0.001, "loss": 0.4673, "step": 222300 }, { "epoch": 71.88106011635423, "grad_norm": 1.2259422540664673, "learning_rate": 0.001, "loss": 0.4705, "step": 222400 }, { "epoch": 71.91338073691014, "grad_norm": 0.9750362634658813, "learning_rate": 0.001, "loss": 0.4726, "step": 222500 }, { "epoch": 71.94570135746606, "grad_norm": 1.1358957290649414, "learning_rate": 0.001, "loss": 0.4767, "step": 222600 }, { "epoch": 71.97802197802197, "grad_norm": 1.0237663984298706, "learning_rate": 0.001, "loss": 0.4794, "step": 222700 }, { "epoch": 72.01034259857789, "grad_norm": 0.9456234574317932, "learning_rate": 0.001, "loss": 0.4432, "step": 222800 }, { "epoch": 72.04266321913381, "grad_norm": 1.0779439210891724, "learning_rate": 0.001, "loss": 0.3942, "step": 222900 }, { "epoch": 72.07498383968972, "grad_norm": 0.9965770840644836, "learning_rate": 0.001, "loss": 0.3993, "step": 223000 }, { "epoch": 72.10730446024564, "grad_norm": 1.091644525527954, "learning_rate": 0.001, "loss": 0.4066, "step": 223100 }, { "epoch": 72.13962508080155, "grad_norm": 1.03946053981781, "learning_rate": 0.001, "loss": 0.4106, "step": 223200 }, { "epoch": 72.17194570135747, "grad_norm": 0.89607173204422, "learning_rate": 0.001, "loss": 0.4097, "step": 223300 }, { "epoch": 72.20426632191338, "grad_norm": 1.0291589498519897, "learning_rate": 0.001, "loss": 0.4098, "step": 223400 }, { "epoch": 72.2365869424693, "grad_norm": 0.8648993968963623, "learning_rate": 0.001, "loss": 0.4161, "step": 223500 }, { "epoch": 72.26890756302521, "grad_norm": 0.9925601482391357, "learning_rate": 0.001, "loss": 0.4156, "step": 223600 }, { "epoch": 72.30122818358113, "grad_norm": 0.8850096464157104, "learning_rate": 0.001, "loss": 0.4272, "step": 223700 }, { "epoch": 72.33354880413704, "grad_norm": 1.0364599227905273, "learning_rate": 0.001, "loss": 0.4239, "step": 223800 }, { "epoch": 72.36586942469296, "grad_norm": 1.0055111646652222, "learning_rate": 0.001, "loss": 0.423, "step": 223900 }, { "epoch": 72.39819004524887, "grad_norm": 1.0914433002471924, "learning_rate": 0.001, "loss": 0.4248, "step": 224000 }, { "epoch": 72.43051066580479, "grad_norm": 1.6031379699707031, "learning_rate": 0.001, "loss": 0.4341, "step": 224100 }, { "epoch": 72.4628312863607, "grad_norm": 1.1876634359359741, "learning_rate": 0.001, "loss": 0.4293, "step": 224200 }, { "epoch": 72.49515190691662, "grad_norm": 1.224788784980774, "learning_rate": 0.001, "loss": 0.4369, "step": 224300 }, { "epoch": 72.52747252747253, "grad_norm": 0.9895144701004028, "learning_rate": 0.001, "loss": 0.4429, "step": 224400 }, { "epoch": 72.55979314802845, "grad_norm": 1.3115063905715942, "learning_rate": 0.001, "loss": 0.4361, "step": 224500 }, { "epoch": 72.59211376858435, "grad_norm": 0.8714427947998047, "learning_rate": 0.001, "loss": 0.4466, "step": 224600 }, { "epoch": 72.62443438914028, "grad_norm": 1.3876723051071167, "learning_rate": 0.001, "loss": 0.4453, "step": 224700 }, { "epoch": 72.65675500969618, "grad_norm": 1.2396448850631714, "learning_rate": 0.001, "loss": 0.4425, "step": 224800 }, { "epoch": 72.6890756302521, "grad_norm": 1.384171485900879, "learning_rate": 0.001, "loss": 0.4553, "step": 224900 }, { "epoch": 72.72139625080801, "grad_norm": 1.1083046197891235, "learning_rate": 0.001, "loss": 0.4536, "step": 225000 }, { "epoch": 72.75371687136393, "grad_norm": 0.945099949836731, "learning_rate": 0.001, "loss": 0.4497, "step": 225100 }, { "epoch": 72.78603749191984, "grad_norm": 1.0911808013916016, "learning_rate": 0.001, "loss": 0.4535, "step": 225200 }, { "epoch": 72.81835811247576, "grad_norm": 0.9690731763839722, "learning_rate": 0.001, "loss": 0.4584, "step": 225300 }, { "epoch": 72.85067873303167, "grad_norm": 1.025905728340149, "learning_rate": 0.001, "loss": 0.4643, "step": 225400 }, { "epoch": 72.88299935358759, "grad_norm": 0.9114875793457031, "learning_rate": 0.001, "loss": 0.4634, "step": 225500 }, { "epoch": 72.9153199741435, "grad_norm": 1.184999942779541, "learning_rate": 0.001, "loss": 0.461, "step": 225600 }, { "epoch": 72.94764059469942, "grad_norm": 1.0200653076171875, "learning_rate": 0.001, "loss": 0.4636, "step": 225700 }, { "epoch": 72.97996121525533, "grad_norm": 1.0360325574874878, "learning_rate": 0.001, "loss": 0.4681, "step": 225800 }, { "epoch": 73.01228183581125, "grad_norm": 0.9453014135360718, "learning_rate": 0.001, "loss": 0.4289, "step": 225900 }, { "epoch": 73.04460245636716, "grad_norm": 1.0504060983657837, "learning_rate": 0.001, "loss": 0.3859, "step": 226000 }, { "epoch": 73.07692307692308, "grad_norm": 1.1331634521484375, "learning_rate": 0.001, "loss": 0.3932, "step": 226100 }, { "epoch": 73.10924369747899, "grad_norm": 0.8886730074882507, "learning_rate": 0.001, "loss": 0.3919, "step": 226200 }, { "epoch": 73.14156431803491, "grad_norm": 0.84075927734375, "learning_rate": 0.001, "loss": 0.401, "step": 226300 }, { "epoch": 73.17388493859082, "grad_norm": 0.9197028875350952, "learning_rate": 0.001, "loss": 0.3973, "step": 226400 }, { "epoch": 73.20620555914674, "grad_norm": 1.068198800086975, "learning_rate": 0.001, "loss": 0.4054, "step": 226500 }, { "epoch": 73.23852617970265, "grad_norm": 0.9718201160430908, "learning_rate": 0.001, "loss": 0.4098, "step": 226600 }, { "epoch": 73.27084680025857, "grad_norm": 1.0559133291244507, "learning_rate": 0.001, "loss": 0.4158, "step": 226700 }, { "epoch": 73.30316742081448, "grad_norm": 1.038392186164856, "learning_rate": 0.001, "loss": 0.4151, "step": 226800 }, { "epoch": 73.3354880413704, "grad_norm": 0.9937727451324463, "learning_rate": 0.001, "loss": 0.4202, "step": 226900 }, { "epoch": 73.3678086619263, "grad_norm": 0.9436622858047485, "learning_rate": 0.001, "loss": 0.4302, "step": 227000 }, { "epoch": 73.40012928248223, "grad_norm": 1.3598986864089966, "learning_rate": 0.001, "loss": 0.4155, "step": 227100 }, { "epoch": 73.43244990303813, "grad_norm": 1.0576720237731934, "learning_rate": 0.001, "loss": 0.4232, "step": 227200 }, { "epoch": 73.46477052359405, "grad_norm": 1.0063477754592896, "learning_rate": 0.001, "loss": 0.4335, "step": 227300 }, { "epoch": 73.49709114414996, "grad_norm": 1.862572431564331, "learning_rate": 0.001, "loss": 0.4273, "step": 227400 }, { "epoch": 73.52941176470588, "grad_norm": 1.2146365642547607, "learning_rate": 0.001, "loss": 0.4286, "step": 227500 }, { "epoch": 73.56173238526179, "grad_norm": 1.1539405584335327, "learning_rate": 0.001, "loss": 0.4288, "step": 227600 }, { "epoch": 73.59405300581771, "grad_norm": 1.286069393157959, "learning_rate": 0.001, "loss": 0.4386, "step": 227700 }, { "epoch": 73.62637362637362, "grad_norm": 1.2248523235321045, "learning_rate": 0.001, "loss": 0.442, "step": 227800 }, { "epoch": 73.65869424692954, "grad_norm": 1.0777546167373657, "learning_rate": 0.001, "loss": 0.4428, "step": 227900 }, { "epoch": 73.69101486748545, "grad_norm": 1.229854941368103, "learning_rate": 0.001, "loss": 0.4445, "step": 228000 }, { "epoch": 73.72333548804137, "grad_norm": 1.0533702373504639, "learning_rate": 0.001, "loss": 0.4419, "step": 228100 }, { "epoch": 73.75565610859728, "grad_norm": 1.1258604526519775, "learning_rate": 0.001, "loss": 0.4479, "step": 228200 }, { "epoch": 73.7879767291532, "grad_norm": 1.0455498695373535, "learning_rate": 0.001, "loss": 0.4526, "step": 228300 }, { "epoch": 73.82029734970911, "grad_norm": 0.9530727863311768, "learning_rate": 0.001, "loss": 0.4474, "step": 228400 }, { "epoch": 73.85261797026503, "grad_norm": 1.1385964155197144, "learning_rate": 0.001, "loss": 0.4498, "step": 228500 }, { "epoch": 73.88493859082094, "grad_norm": 1.0496107339859009, "learning_rate": 0.001, "loss": 0.4593, "step": 228600 }, { "epoch": 73.91725921137686, "grad_norm": 0.8963249325752258, "learning_rate": 0.001, "loss": 0.4576, "step": 228700 }, { "epoch": 73.94957983193277, "grad_norm": 1.2228879928588867, "learning_rate": 0.001, "loss": 0.464, "step": 228800 }, { "epoch": 73.98190045248869, "grad_norm": 0.9780979752540588, "learning_rate": 0.001, "loss": 0.4612, "step": 228900 }, { "epoch": 74.01422107304461, "grad_norm": 1.0230318307876587, "learning_rate": 0.001, "loss": 0.4167, "step": 229000 }, { "epoch": 74.04654169360052, "grad_norm": 0.9275617003440857, "learning_rate": 0.001, "loss": 0.3859, "step": 229100 }, { "epoch": 74.07886231415644, "grad_norm": 1.1309925317764282, "learning_rate": 0.001, "loss": 0.3869, "step": 229200 }, { "epoch": 74.11118293471235, "grad_norm": 0.8570504188537598, "learning_rate": 0.001, "loss": 0.3977, "step": 229300 }, { "epoch": 74.14350355526827, "grad_norm": 0.909989058971405, "learning_rate": 0.001, "loss": 0.3909, "step": 229400 }, { "epoch": 74.17582417582418, "grad_norm": 1.2503911256790161, "learning_rate": 0.001, "loss": 0.3969, "step": 229500 }, { "epoch": 74.2081447963801, "grad_norm": 0.893990159034729, "learning_rate": 0.001, "loss": 0.3998, "step": 229600 }, { "epoch": 74.240465416936, "grad_norm": 1.240500569343567, "learning_rate": 0.001, "loss": 0.4083, "step": 229700 }, { "epoch": 74.27278603749193, "grad_norm": 1.3762197494506836, "learning_rate": 0.001, "loss": 0.4021, "step": 229800 }, { "epoch": 74.30510665804783, "grad_norm": 1.0266424417495728, "learning_rate": 0.001, "loss": 0.4114, "step": 229900 }, { "epoch": 74.33742727860376, "grad_norm": 1.1121984720230103, "learning_rate": 0.001, "loss": 0.4097, "step": 230000 }, { "epoch": 74.36974789915966, "grad_norm": 0.9245661497116089, "learning_rate": 0.001, "loss": 0.4099, "step": 230100 }, { "epoch": 74.40206851971558, "grad_norm": 0.9320375919342041, "learning_rate": 0.001, "loss": 0.4197, "step": 230200 }, { "epoch": 74.43438914027149, "grad_norm": 1.0752108097076416, "learning_rate": 0.001, "loss": 0.4194, "step": 230300 }, { "epoch": 74.46670976082741, "grad_norm": 1.0367646217346191, "learning_rate": 0.001, "loss": 0.4301, "step": 230400 }, { "epoch": 74.49903038138332, "grad_norm": 1.0206599235534668, "learning_rate": 0.001, "loss": 0.4228, "step": 230500 }, { "epoch": 74.53135100193924, "grad_norm": 1.1224912405014038, "learning_rate": 0.001, "loss": 0.425, "step": 230600 }, { "epoch": 74.56367162249515, "grad_norm": 1.5630741119384766, "learning_rate": 0.001, "loss": 0.4197, "step": 230700 }, { "epoch": 74.59599224305107, "grad_norm": 0.9419373869895935, "learning_rate": 0.001, "loss": 0.4286, "step": 230800 }, { "epoch": 74.62831286360698, "grad_norm": 0.9263578057289124, "learning_rate": 0.001, "loss": 0.4267, "step": 230900 }, { "epoch": 74.6606334841629, "grad_norm": 0.9164773225784302, "learning_rate": 0.001, "loss": 0.4371, "step": 231000 }, { "epoch": 74.69295410471881, "grad_norm": 1.0429917573928833, "learning_rate": 0.001, "loss": 0.4347, "step": 231100 }, { "epoch": 74.72527472527473, "grad_norm": 1.0770678520202637, "learning_rate": 0.001, "loss": 0.4344, "step": 231200 }, { "epoch": 74.75759534583064, "grad_norm": 1.0608445405960083, "learning_rate": 0.001, "loss": 0.4426, "step": 231300 }, { "epoch": 74.78991596638656, "grad_norm": 1.0609241724014282, "learning_rate": 0.001, "loss": 0.4422, "step": 231400 }, { "epoch": 74.82223658694247, "grad_norm": 1.0566703081130981, "learning_rate": 0.001, "loss": 0.4437, "step": 231500 }, { "epoch": 74.85455720749839, "grad_norm": 1.1715798377990723, "learning_rate": 0.001, "loss": 0.4417, "step": 231600 }, { "epoch": 74.8868778280543, "grad_norm": 1.016530990600586, "learning_rate": 0.001, "loss": 0.4461, "step": 231700 }, { "epoch": 74.91919844861022, "grad_norm": 1.0436965227127075, "learning_rate": 0.001, "loss": 0.4541, "step": 231800 }, { "epoch": 74.95151906916612, "grad_norm": 1.0091532468795776, "learning_rate": 0.001, "loss": 0.4447, "step": 231900 }, { "epoch": 74.98383968972205, "grad_norm": 1.1452738046646118, "learning_rate": 0.001, "loss": 0.4532, "step": 232000 }, { "epoch": 75.01616031027795, "grad_norm": 1.1058101654052734, "learning_rate": 0.001, "loss": 0.4032, "step": 232100 }, { "epoch": 75.04848093083388, "grad_norm": 1.0272916555404663, "learning_rate": 0.001, "loss": 0.3715, "step": 232200 }, { "epoch": 75.08080155138978, "grad_norm": 1.074816346168518, "learning_rate": 0.001, "loss": 0.379, "step": 232300 }, { "epoch": 75.1131221719457, "grad_norm": 1.3209807872772217, "learning_rate": 0.001, "loss": 0.3858, "step": 232400 }, { "epoch": 75.14544279250161, "grad_norm": 1.026532769203186, "learning_rate": 0.001, "loss": 0.3928, "step": 232500 }, { "epoch": 75.17776341305753, "grad_norm": 1.197726845741272, "learning_rate": 0.001, "loss": 0.3888, "step": 232600 }, { "epoch": 75.21008403361344, "grad_norm": 1.358043909072876, "learning_rate": 0.001, "loss": 0.3954, "step": 232700 }, { "epoch": 75.24240465416936, "grad_norm": 0.9412125945091248, "learning_rate": 0.001, "loss": 0.3939, "step": 232800 }, { "epoch": 75.27472527472527, "grad_norm": 1.1666350364685059, "learning_rate": 0.001, "loss": 0.3958, "step": 232900 }, { "epoch": 75.30704589528119, "grad_norm": 1.049282193183899, "learning_rate": 0.001, "loss": 0.3985, "step": 233000 }, { "epoch": 75.3393665158371, "grad_norm": 0.9166626334190369, "learning_rate": 0.001, "loss": 0.3988, "step": 233100 }, { "epoch": 75.37168713639302, "grad_norm": 1.0819694995880127, "learning_rate": 0.001, "loss": 0.4113, "step": 233200 }, { "epoch": 75.40400775694893, "grad_norm": 1.1736875772476196, "learning_rate": 0.001, "loss": 0.4096, "step": 233300 }, { "epoch": 75.43632837750485, "grad_norm": 1.1497762203216553, "learning_rate": 0.001, "loss": 0.4136, "step": 233400 }, { "epoch": 75.46864899806076, "grad_norm": 1.1873536109924316, "learning_rate": 0.001, "loss": 0.413, "step": 233500 }, { "epoch": 75.50096961861668, "grad_norm": 1.3530832529067993, "learning_rate": 0.001, "loss": 0.4123, "step": 233600 }, { "epoch": 75.53329023917259, "grad_norm": 1.3079899549484253, "learning_rate": 0.001, "loss": 0.42, "step": 233700 }, { "epoch": 75.56561085972851, "grad_norm": 1.0940696001052856, "learning_rate": 0.001, "loss": 0.423, "step": 233800 }, { "epoch": 75.59793148028442, "grad_norm": 1.0679014921188354, "learning_rate": 0.001, "loss": 0.4245, "step": 233900 }, { "epoch": 75.63025210084034, "grad_norm": 0.9460535049438477, "learning_rate": 0.001, "loss": 0.4328, "step": 234000 }, { "epoch": 75.66257272139624, "grad_norm": 1.0160638093948364, "learning_rate": 0.001, "loss": 0.4307, "step": 234100 }, { "epoch": 75.69489334195217, "grad_norm": 1.0946762561798096, "learning_rate": 0.001, "loss": 0.4352, "step": 234200 }, { "epoch": 75.72721396250807, "grad_norm": 1.0878455638885498, "learning_rate": 0.001, "loss": 0.439, "step": 234300 }, { "epoch": 75.759534583064, "grad_norm": 1.2409273386001587, "learning_rate": 0.001, "loss": 0.4331, "step": 234400 }, { "epoch": 75.7918552036199, "grad_norm": 1.002909779548645, "learning_rate": 0.001, "loss": 0.4442, "step": 234500 }, { "epoch": 75.82417582417582, "grad_norm": 0.9150108098983765, "learning_rate": 0.001, "loss": 0.439, "step": 234600 }, { "epoch": 75.85649644473173, "grad_norm": 0.9707412123680115, "learning_rate": 0.001, "loss": 0.4447, "step": 234700 }, { "epoch": 75.88881706528765, "grad_norm": 1.134885549545288, "learning_rate": 0.001, "loss": 0.4374, "step": 234800 }, { "epoch": 75.92113768584356, "grad_norm": 1.3238224983215332, "learning_rate": 0.001, "loss": 0.4349, "step": 234900 }, { "epoch": 75.95345830639948, "grad_norm": 0.9404707551002502, "learning_rate": 0.001, "loss": 0.4379, "step": 235000 }, { "epoch": 75.98577892695539, "grad_norm": 1.0045686960220337, "learning_rate": 0.001, "loss": 0.4516, "step": 235100 }, { "epoch": 76.01809954751131, "grad_norm": 1.2287442684173584, "learning_rate": 0.001, "loss": 0.4096, "step": 235200 }, { "epoch": 76.05042016806723, "grad_norm": 1.0255287885665894, "learning_rate": 0.001, "loss": 0.3723, "step": 235300 }, { "epoch": 76.08274078862314, "grad_norm": 0.9578769207000732, "learning_rate": 0.001, "loss": 0.3749, "step": 235400 }, { "epoch": 76.11506140917906, "grad_norm": 1.1604915857315063, "learning_rate": 0.001, "loss": 0.3817, "step": 235500 }, { "epoch": 76.14738202973497, "grad_norm": 1.1190388202667236, "learning_rate": 0.001, "loss": 0.3778, "step": 235600 }, { "epoch": 76.17970265029089, "grad_norm": 1.2072975635528564, "learning_rate": 0.001, "loss": 0.3794, "step": 235700 }, { "epoch": 76.2120232708468, "grad_norm": 1.3905086517333984, "learning_rate": 0.001, "loss": 0.3937, "step": 235800 }, { "epoch": 76.24434389140272, "grad_norm": 1.1749638319015503, "learning_rate": 0.001, "loss": 0.3882, "step": 235900 }, { "epoch": 76.27666451195863, "grad_norm": 0.9612080454826355, "learning_rate": 0.001, "loss": 0.3962, "step": 236000 }, { "epoch": 76.30898513251455, "grad_norm": 0.9201638102531433, "learning_rate": 0.001, "loss": 0.3986, "step": 236100 }, { "epoch": 76.34130575307046, "grad_norm": 1.045946717262268, "learning_rate": 0.001, "loss": 0.3996, "step": 236200 }, { "epoch": 76.37362637362638, "grad_norm": 1.2873610258102417, "learning_rate": 0.001, "loss": 0.4, "step": 236300 }, { "epoch": 76.40594699418229, "grad_norm": 0.9165794253349304, "learning_rate": 0.001, "loss": 0.4071, "step": 236400 }, { "epoch": 76.43826761473821, "grad_norm": 1.4579963684082031, "learning_rate": 0.001, "loss": 0.4066, "step": 236500 }, { "epoch": 76.47058823529412, "grad_norm": 0.9426809549331665, "learning_rate": 0.001, "loss": 0.4066, "step": 236600 }, { "epoch": 76.50290885585004, "grad_norm": 1.0180877447128296, "learning_rate": 0.001, "loss": 0.4129, "step": 236700 }, { "epoch": 76.53522947640595, "grad_norm": 1.1259253025054932, "learning_rate": 0.001, "loss": 0.4181, "step": 236800 }, { "epoch": 76.56755009696187, "grad_norm": 1.0474591255187988, "learning_rate": 0.001, "loss": 0.4115, "step": 236900 }, { "epoch": 76.59987071751777, "grad_norm": 1.2533447742462158, "learning_rate": 0.001, "loss": 0.4166, "step": 237000 }, { "epoch": 76.6321913380737, "grad_norm": 1.0839040279388428, "learning_rate": 0.001, "loss": 0.4193, "step": 237100 }, { "epoch": 76.6645119586296, "grad_norm": 1.2991524934768677, "learning_rate": 0.001, "loss": 0.4216, "step": 237200 }, { "epoch": 76.69683257918552, "grad_norm": 1.1223410367965698, "learning_rate": 0.001, "loss": 0.4231, "step": 237300 }, { "epoch": 76.72915319974143, "grad_norm": 1.1693302392959595, "learning_rate": 0.001, "loss": 0.4241, "step": 237400 }, { "epoch": 76.76147382029735, "grad_norm": 1.0133192539215088, "learning_rate": 0.001, "loss": 0.4247, "step": 237500 }, { "epoch": 76.79379444085326, "grad_norm": 1.0101685523986816, "learning_rate": 0.001, "loss": 0.4311, "step": 237600 }, { "epoch": 76.82611506140918, "grad_norm": 1.1369035243988037, "learning_rate": 0.001, "loss": 0.4356, "step": 237700 }, { "epoch": 76.85843568196509, "grad_norm": 1.1009441614151, "learning_rate": 0.001, "loss": 0.43, "step": 237800 }, { "epoch": 76.89075630252101, "grad_norm": 0.9508324265480042, "learning_rate": 0.001, "loss": 0.4407, "step": 237900 }, { "epoch": 76.92307692307692, "grad_norm": 1.0266079902648926, "learning_rate": 0.001, "loss": 0.4396, "step": 238000 }, { "epoch": 76.95539754363284, "grad_norm": 1.8654358386993408, "learning_rate": 0.001, "loss": 0.4393, "step": 238100 }, { "epoch": 76.98771816418875, "grad_norm": 0.861159086227417, "learning_rate": 0.001, "loss": 0.446, "step": 238200 }, { "epoch": 77.02003878474467, "grad_norm": 1.1454296112060547, "learning_rate": 0.001, "loss": 0.3935, "step": 238300 }, { "epoch": 77.05235940530058, "grad_norm": 0.8687179088592529, "learning_rate": 0.001, "loss": 0.3672, "step": 238400 }, { "epoch": 77.0846800258565, "grad_norm": 0.9302710890769958, "learning_rate": 0.001, "loss": 0.3712, "step": 238500 }, { "epoch": 77.11700064641241, "grad_norm": 1.1745777130126953, "learning_rate": 0.001, "loss": 0.3718, "step": 238600 }, { "epoch": 77.14932126696833, "grad_norm": 1.3829610347747803, "learning_rate": 0.001, "loss": 0.3784, "step": 238700 }, { "epoch": 77.18164188752424, "grad_norm": 1.3880671262741089, "learning_rate": 0.001, "loss": 0.379, "step": 238800 }, { "epoch": 77.21396250808016, "grad_norm": 1.139510989189148, "learning_rate": 0.001, "loss": 0.3832, "step": 238900 }, { "epoch": 77.24628312863607, "grad_norm": 1.0458334684371948, "learning_rate": 0.001, "loss": 0.3868, "step": 239000 }, { "epoch": 77.27860374919199, "grad_norm": 1.2887505292892456, "learning_rate": 0.001, "loss": 0.3879, "step": 239100 }, { "epoch": 77.3109243697479, "grad_norm": 0.8756444454193115, "learning_rate": 0.001, "loss": 0.3905, "step": 239200 }, { "epoch": 77.34324499030382, "grad_norm": 1.1735554933547974, "learning_rate": 0.001, "loss": 0.3891, "step": 239300 }, { "epoch": 77.37556561085972, "grad_norm": 1.3803002834320068, "learning_rate": 0.001, "loss": 0.3909, "step": 239400 }, { "epoch": 77.40788623141565, "grad_norm": 1.0585838556289673, "learning_rate": 0.001, "loss": 0.3953, "step": 239500 }, { "epoch": 77.44020685197155, "grad_norm": 0.9552614092826843, "learning_rate": 0.001, "loss": 0.4021, "step": 239600 }, { "epoch": 77.47252747252747, "grad_norm": 1.0278576612472534, "learning_rate": 0.001, "loss": 0.3997, "step": 239700 }, { "epoch": 77.50484809308338, "grad_norm": 1.0613999366760254, "learning_rate": 0.001, "loss": 0.4071, "step": 239800 }, { "epoch": 77.5371687136393, "grad_norm": 0.9422594308853149, "learning_rate": 0.001, "loss": 0.4061, "step": 239900 }, { "epoch": 77.56948933419521, "grad_norm": 1.0477360486984253, "learning_rate": 0.001, "loss": 0.4054, "step": 240000 }, { "epoch": 77.60180995475113, "grad_norm": 1.31101655960083, "learning_rate": 0.001, "loss": 0.4103, "step": 240100 }, { "epoch": 77.63413057530704, "grad_norm": 1.1494314670562744, "learning_rate": 0.001, "loss": 0.4116, "step": 240200 }, { "epoch": 77.66645119586296, "grad_norm": 1.0546482801437378, "learning_rate": 0.001, "loss": 0.4189, "step": 240300 }, { "epoch": 77.69877181641887, "grad_norm": 1.0545334815979004, "learning_rate": 0.001, "loss": 0.4168, "step": 240400 }, { "epoch": 77.73109243697479, "grad_norm": 1.150909662246704, "learning_rate": 0.001, "loss": 0.4188, "step": 240500 }, { "epoch": 77.7634130575307, "grad_norm": 1.2516354322433472, "learning_rate": 0.001, "loss": 0.4231, "step": 240600 }, { "epoch": 77.79573367808662, "grad_norm": 1.109307885169983, "learning_rate": 0.001, "loss": 0.427, "step": 240700 }, { "epoch": 77.82805429864253, "grad_norm": 1.3773829936981201, "learning_rate": 0.001, "loss": 0.4268, "step": 240800 }, { "epoch": 77.86037491919845, "grad_norm": 1.2621300220489502, "learning_rate": 0.001, "loss": 0.4244, "step": 240900 }, { "epoch": 77.89269553975436, "grad_norm": 1.088753342628479, "learning_rate": 0.001, "loss": 0.4253, "step": 241000 }, { "epoch": 77.92501616031028, "grad_norm": 1.0021073818206787, "learning_rate": 0.001, "loss": 0.4306, "step": 241100 }, { "epoch": 77.95733678086619, "grad_norm": 1.2800052165985107, "learning_rate": 0.001, "loss": 0.4327, "step": 241200 }, { "epoch": 77.98965740142211, "grad_norm": 0.8955698609352112, "learning_rate": 0.001, "loss": 0.4316, "step": 241300 }, { "epoch": 78.02197802197803, "grad_norm": 1.0784353017807007, "learning_rate": 0.001, "loss": 0.3872, "step": 241400 }, { "epoch": 78.05429864253394, "grad_norm": 1.2403620481491089, "learning_rate": 0.001, "loss": 0.3612, "step": 241500 }, { "epoch": 78.08661926308986, "grad_norm": 0.8937690854072571, "learning_rate": 0.001, "loss": 0.3631, "step": 241600 }, { "epoch": 78.11893988364577, "grad_norm": 1.2409058809280396, "learning_rate": 0.001, "loss": 0.3704, "step": 241700 }, { "epoch": 78.15126050420169, "grad_norm": 1.0751912593841553, "learning_rate": 0.001, "loss": 0.3768, "step": 241800 }, { "epoch": 78.1835811247576, "grad_norm": 0.9923916459083557, "learning_rate": 0.001, "loss": 0.3765, "step": 241900 }, { "epoch": 78.21590174531352, "grad_norm": 1.2745343446731567, "learning_rate": 0.001, "loss": 0.3736, "step": 242000 }, { "epoch": 78.24822236586942, "grad_norm": 0.8710459470748901, "learning_rate": 0.001, "loss": 0.3796, "step": 242100 }, { "epoch": 78.28054298642535, "grad_norm": 0.9729889631271362, "learning_rate": 0.001, "loss": 0.3844, "step": 242200 }, { "epoch": 78.31286360698125, "grad_norm": 1.0507479906082153, "learning_rate": 0.001, "loss": 0.3858, "step": 242300 }, { "epoch": 78.34518422753717, "grad_norm": 1.1043941974639893, "learning_rate": 0.001, "loss": 0.386, "step": 242400 }, { "epoch": 78.37750484809308, "grad_norm": 1.1862475872039795, "learning_rate": 0.001, "loss": 0.3867, "step": 242500 }, { "epoch": 78.409825468649, "grad_norm": 0.8970211744308472, "learning_rate": 0.001, "loss": 0.3932, "step": 242600 }, { "epoch": 78.44214608920491, "grad_norm": 1.02936851978302, "learning_rate": 0.001, "loss": 0.3896, "step": 242700 }, { "epoch": 78.47446670976083, "grad_norm": 1.20807683467865, "learning_rate": 0.001, "loss": 0.3961, "step": 242800 }, { "epoch": 78.50678733031674, "grad_norm": 1.0068432092666626, "learning_rate": 0.001, "loss": 0.4008, "step": 242900 }, { "epoch": 78.53910795087266, "grad_norm": 1.0105209350585938, "learning_rate": 0.001, "loss": 0.4009, "step": 243000 }, { "epoch": 78.57142857142857, "grad_norm": 1.0913435220718384, "learning_rate": 0.001, "loss": 0.4038, "step": 243100 }, { "epoch": 78.60374919198449, "grad_norm": 1.0575194358825684, "learning_rate": 0.001, "loss": 0.4027, "step": 243200 }, { "epoch": 78.6360698125404, "grad_norm": 0.9809123873710632, "learning_rate": 0.001, "loss": 0.4069, "step": 243300 }, { "epoch": 78.66839043309632, "grad_norm": 1.1678017377853394, "learning_rate": 0.001, "loss": 0.405, "step": 243400 }, { "epoch": 78.70071105365223, "grad_norm": 0.9087402820587158, "learning_rate": 0.001, "loss": 0.4109, "step": 243500 }, { "epoch": 78.73303167420815, "grad_norm": 0.9380397796630859, "learning_rate": 0.001, "loss": 0.4078, "step": 243600 }, { "epoch": 78.76535229476406, "grad_norm": 1.1999925374984741, "learning_rate": 0.001, "loss": 0.423, "step": 243700 }, { "epoch": 78.79767291531998, "grad_norm": 0.9821566939353943, "learning_rate": 0.001, "loss": 0.4198, "step": 243800 }, { "epoch": 78.82999353587589, "grad_norm": 1.0138232707977295, "learning_rate": 0.001, "loss": 0.417, "step": 243900 }, { "epoch": 78.86231415643181, "grad_norm": 1.2587248086929321, "learning_rate": 0.001, "loss": 0.4242, "step": 244000 }, { "epoch": 78.89463477698771, "grad_norm": 1.1331390142440796, "learning_rate": 0.001, "loss": 0.4287, "step": 244100 }, { "epoch": 78.92695539754364, "grad_norm": 1.045154094696045, "learning_rate": 0.001, "loss": 0.4255, "step": 244200 }, { "epoch": 78.95927601809954, "grad_norm": 1.0481014251708984, "learning_rate": 0.001, "loss": 0.4284, "step": 244300 }, { "epoch": 78.99159663865547, "grad_norm": 1.1638810634613037, "learning_rate": 0.001, "loss": 0.4302, "step": 244400 }, { "epoch": 79.02391725921137, "grad_norm": 1.105881690979004, "learning_rate": 0.001, "loss": 0.3741, "step": 244500 }, { "epoch": 79.0562378797673, "grad_norm": 0.9390032887458801, "learning_rate": 0.001, "loss": 0.3569, "step": 244600 }, { "epoch": 79.0885585003232, "grad_norm": 1.1838675737380981, "learning_rate": 0.001, "loss": 0.3633, "step": 244700 }, { "epoch": 79.12087912087912, "grad_norm": 1.0419888496398926, "learning_rate": 0.001, "loss": 0.3634, "step": 244800 }, { "epoch": 79.15319974143503, "grad_norm": 1.0901824235916138, "learning_rate": 0.001, "loss": 0.3648, "step": 244900 }, { "epoch": 79.18552036199095, "grad_norm": 1.148579478263855, "learning_rate": 0.001, "loss": 0.3663, "step": 245000 }, { "epoch": 79.21784098254686, "grad_norm": 1.5151259899139404, "learning_rate": 0.001, "loss": 0.3684, "step": 245100 }, { "epoch": 79.25016160310278, "grad_norm": 0.9013495445251465, "learning_rate": 0.001, "loss": 0.3762, "step": 245200 }, { "epoch": 79.28248222365869, "grad_norm": 0.9006203413009644, "learning_rate": 0.001, "loss": 0.3805, "step": 245300 }, { "epoch": 79.31480284421461, "grad_norm": 1.0122857093811035, "learning_rate": 0.001, "loss": 0.378, "step": 245400 }, { "epoch": 79.34712346477052, "grad_norm": 1.1386504173278809, "learning_rate": 0.001, "loss": 0.3865, "step": 245500 }, { "epoch": 79.37944408532644, "grad_norm": 0.8747102618217468, "learning_rate": 0.001, "loss": 0.3852, "step": 245600 }, { "epoch": 79.41176470588235, "grad_norm": 0.8671668171882629, "learning_rate": 0.001, "loss": 0.385, "step": 245700 }, { "epoch": 79.44408532643827, "grad_norm": 0.9804284572601318, "learning_rate": 0.001, "loss": 0.391, "step": 245800 }, { "epoch": 79.47640594699418, "grad_norm": 1.2170729637145996, "learning_rate": 0.001, "loss": 0.3977, "step": 245900 }, { "epoch": 79.5087265675501, "grad_norm": 1.009521245956421, "learning_rate": 0.001, "loss": 0.3892, "step": 246000 }, { "epoch": 79.541047188106, "grad_norm": 1.055911660194397, "learning_rate": 0.001, "loss": 0.39, "step": 246100 }, { "epoch": 79.57336780866193, "grad_norm": 1.1798661947250366, "learning_rate": 0.001, "loss": 0.3984, "step": 246200 }, { "epoch": 79.60568842921784, "grad_norm": 0.9371033310890198, "learning_rate": 0.001, "loss": 0.3971, "step": 246300 }, { "epoch": 79.63800904977376, "grad_norm": 1.0621087551116943, "learning_rate": 0.001, "loss": 0.4043, "step": 246400 }, { "epoch": 79.67032967032966, "grad_norm": 1.1423753499984741, "learning_rate": 0.001, "loss": 0.4069, "step": 246500 }, { "epoch": 79.70265029088559, "grad_norm": 1.080574631690979, "learning_rate": 0.001, "loss": 0.4067, "step": 246600 }, { "epoch": 79.7349709114415, "grad_norm": 1.179984450340271, "learning_rate": 0.001, "loss": 0.4054, "step": 246700 }, { "epoch": 79.76729153199742, "grad_norm": 1.6539568901062012, "learning_rate": 0.001, "loss": 0.4032, "step": 246800 }, { "epoch": 79.79961215255332, "grad_norm": 1.0226161479949951, "learning_rate": 0.001, "loss": 0.4158, "step": 246900 }, { "epoch": 79.83193277310924, "grad_norm": 1.1376699209213257, "learning_rate": 0.001, "loss": 0.4133, "step": 247000 }, { "epoch": 79.86425339366515, "grad_norm": 0.9139329791069031, "learning_rate": 0.001, "loss": 0.4109, "step": 247100 }, { "epoch": 79.89657401422107, "grad_norm": 0.9461932182312012, "learning_rate": 0.001, "loss": 0.4207, "step": 247200 }, { "epoch": 79.92889463477698, "grad_norm": 1.2271596193313599, "learning_rate": 0.001, "loss": 0.4205, "step": 247300 }, { "epoch": 79.9612152553329, "grad_norm": 1.1056647300720215, "learning_rate": 0.001, "loss": 0.4244, "step": 247400 }, { "epoch": 79.99353587588882, "grad_norm": 1.2619762420654297, "learning_rate": 0.001, "loss": 0.4148, "step": 247500 }, { "epoch": 80.02585649644473, "grad_norm": 0.9466239213943481, "learning_rate": 0.001, "loss": 0.3615, "step": 247600 }, { "epoch": 80.05817711700065, "grad_norm": 1.2305399179458618, "learning_rate": 0.001, "loss": 0.3522, "step": 247700 }, { "epoch": 80.09049773755656, "grad_norm": 1.0746045112609863, "learning_rate": 0.001, "loss": 0.352, "step": 247800 }, { "epoch": 80.12281835811248, "grad_norm": 1.124253273010254, "learning_rate": 0.001, "loss": 0.3616, "step": 247900 }, { "epoch": 80.15513897866839, "grad_norm": 0.96457839012146, "learning_rate": 0.001, "loss": 0.3573, "step": 248000 }, { "epoch": 80.18745959922431, "grad_norm": 1.1608917713165283, "learning_rate": 0.001, "loss": 0.3609, "step": 248100 }, { "epoch": 80.21978021978022, "grad_norm": 1.196550965309143, "learning_rate": 0.001, "loss": 0.3666, "step": 248200 }, { "epoch": 80.25210084033614, "grad_norm": 1.440116286277771, "learning_rate": 0.001, "loss": 0.3692, "step": 248300 }, { "epoch": 80.28442146089205, "grad_norm": 1.460111141204834, "learning_rate": 0.001, "loss": 0.3773, "step": 248400 }, { "epoch": 80.31674208144797, "grad_norm": 0.9197182059288025, "learning_rate": 0.001, "loss": 0.3763, "step": 248500 }, { "epoch": 80.34906270200388, "grad_norm": 0.9269041419029236, "learning_rate": 0.001, "loss": 0.3748, "step": 248600 }, { "epoch": 80.3813833225598, "grad_norm": 1.0464839935302734, "learning_rate": 0.001, "loss": 0.3767, "step": 248700 }, { "epoch": 80.4137039431157, "grad_norm": 0.8667921423912048, "learning_rate": 0.001, "loss": 0.38, "step": 248800 }, { "epoch": 80.44602456367163, "grad_norm": 0.9000419974327087, "learning_rate": 0.001, "loss": 0.3846, "step": 248900 }, { "epoch": 80.47834518422754, "grad_norm": 0.910185694694519, "learning_rate": 0.001, "loss": 0.3833, "step": 249000 }, { "epoch": 80.51066580478346, "grad_norm": 0.9148023724555969, "learning_rate": 0.001, "loss": 0.3863, "step": 249100 }, { "epoch": 80.54298642533936, "grad_norm": 1.0311253070831299, "learning_rate": 0.001, "loss": 0.389, "step": 249200 }, { "epoch": 80.57530704589529, "grad_norm": 1.272690773010254, "learning_rate": 0.001, "loss": 0.3926, "step": 249300 }, { "epoch": 80.6076276664512, "grad_norm": 0.9611029028892517, "learning_rate": 0.001, "loss": 0.3866, "step": 249400 }, { "epoch": 80.63994828700712, "grad_norm": 1.238991141319275, "learning_rate": 0.001, "loss": 0.3915, "step": 249500 }, { "epoch": 80.67226890756302, "grad_norm": 1.0282546281814575, "learning_rate": 0.001, "loss": 0.3967, "step": 249600 }, { "epoch": 80.70458952811894, "grad_norm": 0.9589764475822449, "learning_rate": 0.001, "loss": 0.3998, "step": 249700 }, { "epoch": 80.73691014867485, "grad_norm": 0.9972920417785645, "learning_rate": 0.001, "loss": 0.4006, "step": 249800 }, { "epoch": 80.76923076923077, "grad_norm": 1.152413010597229, "learning_rate": 0.001, "loss": 0.4075, "step": 249900 }, { "epoch": 80.80155138978668, "grad_norm": 1.0290299654006958, "learning_rate": 0.001, "loss": 0.4054, "step": 250000 }, { "epoch": 80.8338720103426, "grad_norm": 1.0266101360321045, "learning_rate": 0.001, "loss": 0.4056, "step": 250100 }, { "epoch": 80.86619263089851, "grad_norm": 0.981960117816925, "learning_rate": 0.001, "loss": 0.4117, "step": 250200 }, { "epoch": 80.89851325145443, "grad_norm": 0.9510987997055054, "learning_rate": 0.001, "loss": 0.4076, "step": 250300 }, { "epoch": 80.93083387201034, "grad_norm": 1.1637722253799438, "learning_rate": 0.001, "loss": 0.4132, "step": 250400 }, { "epoch": 80.96315449256626, "grad_norm": 1.4615681171417236, "learning_rate": 0.001, "loss": 0.4215, "step": 250500 }, { "epoch": 80.99547511312217, "grad_norm": 1.005907416343689, "learning_rate": 0.001, "loss": 0.4102, "step": 250600 }, { "epoch": 81.02779573367809, "grad_norm": 0.8981792330741882, "learning_rate": 0.001, "loss": 0.3497, "step": 250700 }, { "epoch": 81.060116354234, "grad_norm": 1.0311108827590942, "learning_rate": 0.001, "loss": 0.3465, "step": 250800 }, { "epoch": 81.09243697478992, "grad_norm": 0.9169034361839294, "learning_rate": 0.001, "loss": 0.3501, "step": 250900 }, { "epoch": 81.12475759534583, "grad_norm": 0.8490038514137268, "learning_rate": 0.001, "loss": 0.3566, "step": 251000 }, { "epoch": 81.15707821590175, "grad_norm": 0.7982963919639587, "learning_rate": 0.001, "loss": 0.3602, "step": 251100 }, { "epoch": 81.18939883645766, "grad_norm": 1.3361057043075562, "learning_rate": 0.001, "loss": 0.3625, "step": 251200 }, { "epoch": 81.22171945701358, "grad_norm": 0.9430826902389526, "learning_rate": 0.001, "loss": 0.3585, "step": 251300 }, { "epoch": 81.25404007756948, "grad_norm": 0.9897859692573547, "learning_rate": 0.001, "loss": 0.3648, "step": 251400 }, { "epoch": 81.2863606981254, "grad_norm": 0.9883958101272583, "learning_rate": 0.001, "loss": 0.3672, "step": 251500 }, { "epoch": 81.31868131868131, "grad_norm": 1.2282551527023315, "learning_rate": 0.001, "loss": 0.3705, "step": 251600 }, { "epoch": 81.35100193923724, "grad_norm": 1.2052607536315918, "learning_rate": 0.001, "loss": 0.3677, "step": 251700 }, { "epoch": 81.38332255979314, "grad_norm": 0.9845570921897888, "learning_rate": 0.001, "loss": 0.3752, "step": 251800 }, { "epoch": 81.41564318034906, "grad_norm": 1.1060030460357666, "learning_rate": 0.001, "loss": 0.3731, "step": 251900 }, { "epoch": 81.44796380090497, "grad_norm": 1.0349748134613037, "learning_rate": 0.001, "loss": 0.3796, "step": 252000 }, { "epoch": 81.4802844214609, "grad_norm": 1.4404436349868774, "learning_rate": 0.001, "loss": 0.3904, "step": 252100 }, { "epoch": 81.5126050420168, "grad_norm": 1.0923142433166504, "learning_rate": 0.001, "loss": 0.3784, "step": 252200 }, { "epoch": 81.54492566257272, "grad_norm": 0.9293524026870728, "learning_rate": 0.001, "loss": 0.3849, "step": 252300 }, { "epoch": 81.57724628312863, "grad_norm": 0.9637797474861145, "learning_rate": 0.001, "loss": 0.3854, "step": 252400 }, { "epoch": 81.60956690368455, "grad_norm": 1.0226798057556152, "learning_rate": 0.001, "loss": 0.3924, "step": 252500 }, { "epoch": 81.64188752424046, "grad_norm": 1.540755033493042, "learning_rate": 0.001, "loss": 0.3862, "step": 252600 }, { "epoch": 81.67420814479638, "grad_norm": 1.1741937398910522, "learning_rate": 0.001, "loss": 0.3919, "step": 252700 }, { "epoch": 81.70652876535229, "grad_norm": 1.007423758506775, "learning_rate": 0.001, "loss": 0.3939, "step": 252800 }, { "epoch": 81.73884938590821, "grad_norm": 0.9153239727020264, "learning_rate": 0.001, "loss": 0.4014, "step": 252900 }, { "epoch": 81.77117000646412, "grad_norm": 0.9677464365959167, "learning_rate": 0.001, "loss": 0.3992, "step": 253000 }, { "epoch": 81.80349062702004, "grad_norm": 0.9234734773635864, "learning_rate": 0.001, "loss": 0.3978, "step": 253100 }, { "epoch": 81.83581124757595, "grad_norm": 1.1433368921279907, "learning_rate": 0.001, "loss": 0.401, "step": 253200 }, { "epoch": 81.86813186813187, "grad_norm": 1.0430803298950195, "learning_rate": 0.001, "loss": 0.4131, "step": 253300 }, { "epoch": 81.90045248868778, "grad_norm": 1.3255351781845093, "learning_rate": 0.001, "loss": 0.4062, "step": 253400 }, { "epoch": 81.9327731092437, "grad_norm": 1.1591497659683228, "learning_rate": 0.001, "loss": 0.4007, "step": 253500 }, { "epoch": 81.9650937297996, "grad_norm": 1.1188544034957886, "learning_rate": 0.001, "loss": 0.3986, "step": 253600 }, { "epoch": 81.99741435035553, "grad_norm": 1.067710518836975, "learning_rate": 0.001, "loss": 0.4101, "step": 253700 }, { "epoch": 82.02973497091145, "grad_norm": 1.005022406578064, "learning_rate": 0.001, "loss": 0.3446, "step": 253800 }, { "epoch": 82.06205559146736, "grad_norm": 1.0995275974273682, "learning_rate": 0.001, "loss": 0.3415, "step": 253900 }, { "epoch": 82.09437621202328, "grad_norm": 0.9774818420410156, "learning_rate": 0.001, "loss": 0.343, "step": 254000 }, { "epoch": 82.12669683257919, "grad_norm": 1.0783286094665527, "learning_rate": 0.001, "loss": 0.3449, "step": 254100 }, { "epoch": 82.1590174531351, "grad_norm": 0.9598420262336731, "learning_rate": 0.001, "loss": 0.3552, "step": 254200 }, { "epoch": 82.19133807369101, "grad_norm": 0.981161892414093, "learning_rate": 0.001, "loss": 0.3528, "step": 254300 }, { "epoch": 82.22365869424694, "grad_norm": 1.0798521041870117, "learning_rate": 0.001, "loss": 0.3631, "step": 254400 }, { "epoch": 82.25597931480284, "grad_norm": 0.96309894323349, "learning_rate": 0.001, "loss": 0.359, "step": 254500 }, { "epoch": 82.28829993535876, "grad_norm": 1.6235249042510986, "learning_rate": 0.001, "loss": 0.3613, "step": 254600 }, { "epoch": 82.32062055591467, "grad_norm": 1.117620587348938, "learning_rate": 0.001, "loss": 0.3641, "step": 254700 }, { "epoch": 82.3529411764706, "grad_norm": 1.0557173490524292, "learning_rate": 0.001, "loss": 0.371, "step": 254800 }, { "epoch": 82.3852617970265, "grad_norm": 0.9646346569061279, "learning_rate": 0.001, "loss": 0.3701, "step": 254900 }, { "epoch": 82.41758241758242, "grad_norm": 1.0868687629699707, "learning_rate": 0.001, "loss": 0.3726, "step": 255000 }, { "epoch": 82.44990303813833, "grad_norm": 1.2258265018463135, "learning_rate": 0.001, "loss": 0.3752, "step": 255100 }, { "epoch": 82.48222365869425, "grad_norm": 0.8767035007476807, "learning_rate": 0.001, "loss": 0.3779, "step": 255200 }, { "epoch": 82.51454427925016, "grad_norm": 0.9917929768562317, "learning_rate": 0.001, "loss": 0.3833, "step": 255300 }, { "epoch": 82.54686489980608, "grad_norm": 1.0180668830871582, "learning_rate": 0.001, "loss": 0.3828, "step": 255400 }, { "epoch": 82.57918552036199, "grad_norm": 1.0246410369873047, "learning_rate": 0.001, "loss": 0.378, "step": 255500 }, { "epoch": 82.61150614091791, "grad_norm": 1.4447978734970093, "learning_rate": 0.001, "loss": 0.3852, "step": 255600 }, { "epoch": 82.64382676147382, "grad_norm": 0.9803134799003601, "learning_rate": 0.001, "loss": 0.3796, "step": 255700 }, { "epoch": 82.67614738202974, "grad_norm": 0.8565698266029358, "learning_rate": 0.001, "loss": 0.3873, "step": 255800 }, { "epoch": 82.70846800258565, "grad_norm": 1.0969030857086182, "learning_rate": 0.001, "loss": 0.397, "step": 255900 }, { "epoch": 82.74078862314157, "grad_norm": 1.0858006477355957, "learning_rate": 0.001, "loss": 0.3916, "step": 256000 }, { "epoch": 82.77310924369748, "grad_norm": 1.1057883501052856, "learning_rate": 0.001, "loss": 0.3914, "step": 256100 }, { "epoch": 82.8054298642534, "grad_norm": 0.9593665599822998, "learning_rate": 0.001, "loss": 0.3901, "step": 256200 }, { "epoch": 82.8377504848093, "grad_norm": 1.169712781906128, "learning_rate": 0.001, "loss": 0.3901, "step": 256300 }, { "epoch": 82.87007110536523, "grad_norm": 1.0772912502288818, "learning_rate": 0.001, "loss": 0.3941, "step": 256400 }, { "epoch": 82.90239172592113, "grad_norm": 1.192619800567627, "learning_rate": 0.001, "loss": 0.3924, "step": 256500 }, { "epoch": 82.93471234647706, "grad_norm": 0.9830801486968994, "learning_rate": 0.001, "loss": 0.406, "step": 256600 }, { "epoch": 82.96703296703296, "grad_norm": 1.1633329391479492, "learning_rate": 0.001, "loss": 0.3966, "step": 256700 }, { "epoch": 82.99935358758889, "grad_norm": 1.1214252710342407, "learning_rate": 0.001, "loss": 0.397, "step": 256800 }, { "epoch": 83.03167420814479, "grad_norm": 1.221656322479248, "learning_rate": 0.001, "loss": 0.3318, "step": 256900 }, { "epoch": 83.06399482870071, "grad_norm": 0.9483466148376465, "learning_rate": 0.001, "loss": 0.3347, "step": 257000 }, { "epoch": 83.09631544925662, "grad_norm": 0.9217442870140076, "learning_rate": 0.001, "loss": 0.343, "step": 257100 }, { "epoch": 83.12863606981254, "grad_norm": 0.9654166102409363, "learning_rate": 0.001, "loss": 0.3514, "step": 257200 }, { "epoch": 83.16095669036845, "grad_norm": 1.6201155185699463, "learning_rate": 0.001, "loss": 0.3503, "step": 257300 }, { "epoch": 83.19327731092437, "grad_norm": 0.9150234460830688, "learning_rate": 0.001, "loss": 0.3526, "step": 257400 }, { "epoch": 83.22559793148028, "grad_norm": 1.2022377252578735, "learning_rate": 0.001, "loss": 0.3427, "step": 257500 }, { "epoch": 83.2579185520362, "grad_norm": 1.0958671569824219, "learning_rate": 0.001, "loss": 0.3528, "step": 257600 }, { "epoch": 83.29023917259211, "grad_norm": 0.9521307349205017, "learning_rate": 0.001, "loss": 0.3599, "step": 257700 }, { "epoch": 83.32255979314803, "grad_norm": 0.8636460304260254, "learning_rate": 0.001, "loss": 0.3637, "step": 257800 }, { "epoch": 83.35488041370394, "grad_norm": 0.9367690682411194, "learning_rate": 0.001, "loss": 0.3571, "step": 257900 }, { "epoch": 83.38720103425986, "grad_norm": 0.9551283717155457, "learning_rate": 0.001, "loss": 0.3642, "step": 258000 }, { "epoch": 83.41952165481577, "grad_norm": 1.123175859451294, "learning_rate": 0.001, "loss": 0.3684, "step": 258100 }, { "epoch": 83.45184227537169, "grad_norm": 1.1089591979980469, "learning_rate": 0.001, "loss": 0.3682, "step": 258200 }, { "epoch": 83.4841628959276, "grad_norm": 1.168323040008545, "learning_rate": 0.001, "loss": 0.3716, "step": 258300 }, { "epoch": 83.51648351648352, "grad_norm": 0.9511652588844299, "learning_rate": 0.001, "loss": 0.3777, "step": 258400 }, { "epoch": 83.54880413703943, "grad_norm": 1.0176140069961548, "learning_rate": 0.001, "loss": 0.375, "step": 258500 }, { "epoch": 83.58112475759535, "grad_norm": 0.8965360522270203, "learning_rate": 0.001, "loss": 0.3743, "step": 258600 }, { "epoch": 83.61344537815125, "grad_norm": 1.0880433320999146, "learning_rate": 0.001, "loss": 0.3786, "step": 258700 }, { "epoch": 83.64576599870718, "grad_norm": 1.1833451986312866, "learning_rate": 0.001, "loss": 0.3826, "step": 258800 }, { "epoch": 83.67808661926308, "grad_norm": 1.0985133647918701, "learning_rate": 0.001, "loss": 0.3871, "step": 258900 }, { "epoch": 83.710407239819, "grad_norm": 1.0580389499664307, "learning_rate": 0.001, "loss": 0.3929, "step": 259000 }, { "epoch": 83.74272786037491, "grad_norm": 0.9904229640960693, "learning_rate": 0.001, "loss": 0.3899, "step": 259100 }, { "epoch": 83.77504848093083, "grad_norm": 1.423741340637207, "learning_rate": 0.001, "loss": 0.3845, "step": 259200 }, { "epoch": 83.80736910148674, "grad_norm": 0.8469963073730469, "learning_rate": 0.001, "loss": 0.3884, "step": 259300 }, { "epoch": 83.83968972204266, "grad_norm": 1.0075535774230957, "learning_rate": 0.001, "loss": 0.3934, "step": 259400 }, { "epoch": 83.87201034259857, "grad_norm": 1.576718807220459, "learning_rate": 0.001, "loss": 0.3912, "step": 259500 }, { "epoch": 83.9043309631545, "grad_norm": 1.0754919052124023, "learning_rate": 0.001, "loss": 0.3925, "step": 259600 }, { "epoch": 83.9366515837104, "grad_norm": 0.9466648697853088, "learning_rate": 0.001, "loss": 0.3962, "step": 259700 }, { "epoch": 83.96897220426632, "grad_norm": 0.9969567060470581, "learning_rate": 0.001, "loss": 0.3969, "step": 259800 }, { "epoch": 84.00129282482224, "grad_norm": 0.9463942646980286, "learning_rate": 0.001, "loss": 0.3969, "step": 259900 }, { "epoch": 84.03361344537815, "grad_norm": 0.9426854252815247, "learning_rate": 0.001, "loss": 0.3333, "step": 260000 }, { "epoch": 84.06593406593407, "grad_norm": 1.4777032136917114, "learning_rate": 0.001, "loss": 0.3349, "step": 260100 }, { "epoch": 84.09825468648998, "grad_norm": 1.0034072399139404, "learning_rate": 0.001, "loss": 0.3315, "step": 260200 }, { "epoch": 84.1305753070459, "grad_norm": 1.5854597091674805, "learning_rate": 0.001, "loss": 0.3398, "step": 260300 }, { "epoch": 84.16289592760181, "grad_norm": 1.0782568454742432, "learning_rate": 0.001, "loss": 0.3423, "step": 260400 }, { "epoch": 84.19521654815773, "grad_norm": 1.258522629737854, "learning_rate": 0.001, "loss": 0.3486, "step": 260500 }, { "epoch": 84.22753716871364, "grad_norm": 1.0119706392288208, "learning_rate": 0.001, "loss": 0.3486, "step": 260600 }, { "epoch": 84.25985778926956, "grad_norm": 1.1616668701171875, "learning_rate": 0.001, "loss": 0.3483, "step": 260700 }, { "epoch": 84.29217840982547, "grad_norm": 1.0589044094085693, "learning_rate": 0.001, "loss": 0.355, "step": 260800 }, { "epoch": 84.32449903038139, "grad_norm": 1.0820260047912598, "learning_rate": 0.001, "loss": 0.3565, "step": 260900 }, { "epoch": 84.3568196509373, "grad_norm": 1.4587126970291138, "learning_rate": 0.001, "loss": 0.3585, "step": 261000 }, { "epoch": 84.38914027149322, "grad_norm": 0.9217873811721802, "learning_rate": 0.001, "loss": 0.3579, "step": 261100 }, { "epoch": 84.42146089204913, "grad_norm": 1.178114652633667, "learning_rate": 0.001, "loss": 0.3602, "step": 261200 }, { "epoch": 84.45378151260505, "grad_norm": 1.129176139831543, "learning_rate": 0.001, "loss": 0.3609, "step": 261300 }, { "epoch": 84.48610213316095, "grad_norm": 1.0064492225646973, "learning_rate": 0.001, "loss": 0.3644, "step": 261400 }, { "epoch": 84.51842275371688, "grad_norm": 1.3811231851577759, "learning_rate": 0.001, "loss": 0.3723, "step": 261500 }, { "epoch": 84.55074337427278, "grad_norm": 0.9018750190734863, "learning_rate": 0.001, "loss": 0.3691, "step": 261600 }, { "epoch": 84.5830639948287, "grad_norm": 0.8969240188598633, "learning_rate": 0.001, "loss": 0.3744, "step": 261700 }, { "epoch": 84.61538461538461, "grad_norm": 0.9575598239898682, "learning_rate": 0.001, "loss": 0.3757, "step": 261800 }, { "epoch": 84.64770523594053, "grad_norm": 1.1514880657196045, "learning_rate": 0.001, "loss": 0.37, "step": 261900 }, { "epoch": 84.68002585649644, "grad_norm": 1.2112834453582764, "learning_rate": 0.001, "loss": 0.3771, "step": 262000 }, { "epoch": 84.71234647705236, "grad_norm": 1.4919136762619019, "learning_rate": 0.001, "loss": 0.3742, "step": 262100 }, { "epoch": 84.74466709760827, "grad_norm": 1.0484683513641357, "learning_rate": 0.001, "loss": 0.3782, "step": 262200 }, { "epoch": 84.7769877181642, "grad_norm": 1.2120957374572754, "learning_rate": 0.001, "loss": 0.3845, "step": 262300 }, { "epoch": 84.8093083387201, "grad_norm": 1.3722985982894897, "learning_rate": 0.001, "loss": 0.388, "step": 262400 }, { "epoch": 84.84162895927602, "grad_norm": 1.127827525138855, "learning_rate": 0.001, "loss": 0.3911, "step": 262500 }, { "epoch": 84.87394957983193, "grad_norm": 1.1780726909637451, "learning_rate": 0.001, "loss": 0.3902, "step": 262600 }, { "epoch": 84.90627020038785, "grad_norm": 1.1607961654663086, "learning_rate": 0.001, "loss": 0.3874, "step": 262700 }, { "epoch": 84.93859082094376, "grad_norm": 1.4289777278900146, "learning_rate": 0.001, "loss": 0.3921, "step": 262800 }, { "epoch": 84.97091144149968, "grad_norm": 1.226544737815857, "learning_rate": 0.001, "loss": 0.3941, "step": 262900 }, { "epoch": 85.00323206205559, "grad_norm": 0.8671598434448242, "learning_rate": 0.001, "loss": 0.3886, "step": 263000 }, { "epoch": 85.03555268261151, "grad_norm": 1.6635525226593018, "learning_rate": 0.001, "loss": 0.3294, "step": 263100 }, { "epoch": 85.06787330316742, "grad_norm": 1.377448558807373, "learning_rate": 0.001, "loss": 0.3324, "step": 263200 }, { "epoch": 85.10019392372334, "grad_norm": 0.9825029969215393, "learning_rate": 0.001, "loss": 0.3341, "step": 263300 }, { "epoch": 85.13251454427925, "grad_norm": 0.9248387813568115, "learning_rate": 0.001, "loss": 0.3295, "step": 263400 }, { "epoch": 85.16483516483517, "grad_norm": 1.0447638034820557, "learning_rate": 0.001, "loss": 0.3377, "step": 263500 }, { "epoch": 85.19715578539108, "grad_norm": 0.8679701685905457, "learning_rate": 0.001, "loss": 0.3401, "step": 263600 }, { "epoch": 85.229476405947, "grad_norm": 0.9929383993148804, "learning_rate": 0.001, "loss": 0.3448, "step": 263700 }, { "epoch": 85.2617970265029, "grad_norm": 1.1555461883544922, "learning_rate": 0.001, "loss": 0.355, "step": 263800 }, { "epoch": 85.29411764705883, "grad_norm": 1.300236463546753, "learning_rate": 0.001, "loss": 0.344, "step": 263900 }, { "epoch": 85.32643826761473, "grad_norm": 1.0155036449432373, "learning_rate": 0.001, "loss": 0.3568, "step": 264000 }, { "epoch": 85.35875888817066, "grad_norm": 0.8369677066802979, "learning_rate": 0.001, "loss": 0.3529, "step": 264100 }, { "epoch": 85.39107950872656, "grad_norm": 0.9425675868988037, "learning_rate": 0.001, "loss": 0.356, "step": 264200 }, { "epoch": 85.42340012928248, "grad_norm": 1.0285272598266602, "learning_rate": 0.001, "loss": 0.3578, "step": 264300 }, { "epoch": 85.45572074983839, "grad_norm": 0.910561740398407, "learning_rate": 0.001, "loss": 0.3576, "step": 264400 }, { "epoch": 85.48804137039431, "grad_norm": 1.010042428970337, "learning_rate": 0.001, "loss": 0.3598, "step": 264500 }, { "epoch": 85.52036199095022, "grad_norm": 1.048146367073059, "learning_rate": 0.001, "loss": 0.369, "step": 264600 }, { "epoch": 85.55268261150614, "grad_norm": 0.9783318638801575, "learning_rate": 0.001, "loss": 0.3668, "step": 264700 }, { "epoch": 85.58500323206205, "grad_norm": 1.1085271835327148, "learning_rate": 0.001, "loss": 0.3683, "step": 264800 }, { "epoch": 85.61732385261797, "grad_norm": 1.1479791402816772, "learning_rate": 0.001, "loss": 0.37, "step": 264900 }, { "epoch": 85.64964447317388, "grad_norm": 1.2988147735595703, "learning_rate": 0.001, "loss": 0.3669, "step": 265000 }, { "epoch": 85.6819650937298, "grad_norm": 0.8706017136573792, "learning_rate": 0.001, "loss": 0.3723, "step": 265100 }, { "epoch": 85.71428571428571, "grad_norm": 1.0042610168457031, "learning_rate": 0.001, "loss": 0.3677, "step": 265200 }, { "epoch": 85.74660633484163, "grad_norm": 1.3180789947509766, "learning_rate": 0.001, "loss": 0.3762, "step": 265300 }, { "epoch": 85.77892695539754, "grad_norm": 0.9858996272087097, "learning_rate": 0.001, "loss": 0.3781, "step": 265400 }, { "epoch": 85.81124757595346, "grad_norm": 0.9402637481689453, "learning_rate": 0.001, "loss": 0.3813, "step": 265500 }, { "epoch": 85.84356819650937, "grad_norm": 1.2245409488677979, "learning_rate": 0.001, "loss": 0.3786, "step": 265600 }, { "epoch": 85.87588881706529, "grad_norm": 1.3883883953094482, "learning_rate": 0.001, "loss": 0.38, "step": 265700 }, { "epoch": 85.9082094376212, "grad_norm": 1.0936774015426636, "learning_rate": 0.001, "loss": 0.3868, "step": 265800 }, { "epoch": 85.94053005817712, "grad_norm": 0.9416587352752686, "learning_rate": 0.001, "loss": 0.3885, "step": 265900 }, { "epoch": 85.97285067873302, "grad_norm": 1.6277998685836792, "learning_rate": 0.001, "loss": 0.388, "step": 266000 }, { "epoch": 86.00517129928895, "grad_norm": 0.8930133581161499, "learning_rate": 0.001, "loss": 0.3789, "step": 266100 }, { "epoch": 86.03749191984487, "grad_norm": 0.8565099835395813, "learning_rate": 0.001, "loss": 0.3304, "step": 266200 }, { "epoch": 86.06981254040078, "grad_norm": 0.9187474846839905, "learning_rate": 0.001, "loss": 0.3256, "step": 266300 }, { "epoch": 86.1021331609567, "grad_norm": 0.8798098564147949, "learning_rate": 0.001, "loss": 0.3285, "step": 266400 }, { "epoch": 86.1344537815126, "grad_norm": 0.9984344244003296, "learning_rate": 0.001, "loss": 0.33, "step": 266500 }, { "epoch": 86.16677440206853, "grad_norm": 1.1285039186477661, "learning_rate": 0.001, "loss": 0.335, "step": 266600 }, { "epoch": 86.19909502262443, "grad_norm": 0.952877938747406, "learning_rate": 0.001, "loss": 0.3365, "step": 266700 }, { "epoch": 86.23141564318036, "grad_norm": 0.9790455102920532, "learning_rate": 0.001, "loss": 0.3411, "step": 266800 }, { "epoch": 86.26373626373626, "grad_norm": 1.0569692850112915, "learning_rate": 0.001, "loss": 0.3421, "step": 266900 }, { "epoch": 86.29605688429218, "grad_norm": 1.0188093185424805, "learning_rate": 0.001, "loss": 0.3479, "step": 267000 }, { "epoch": 86.32837750484809, "grad_norm": 1.0954725742340088, "learning_rate": 0.001, "loss": 0.3445, "step": 267100 }, { "epoch": 86.36069812540401, "grad_norm": 1.0758107900619507, "learning_rate": 0.001, "loss": 0.3418, "step": 267200 }, { "epoch": 86.39301874595992, "grad_norm": 1.3398795127868652, "learning_rate": 0.001, "loss": 0.3533, "step": 267300 }, { "epoch": 86.42533936651584, "grad_norm": 1.4030940532684326, "learning_rate": 0.001, "loss": 0.3567, "step": 267400 }, { "epoch": 86.45765998707175, "grad_norm": 1.3974443674087524, "learning_rate": 0.001, "loss": 0.3474, "step": 267500 }, { "epoch": 86.48998060762767, "grad_norm": 1.269712209701538, "learning_rate": 0.001, "loss": 0.3573, "step": 267600 }, { "epoch": 86.52230122818358, "grad_norm": 1.0381618738174438, "learning_rate": 0.001, "loss": 0.3589, "step": 267700 }, { "epoch": 86.5546218487395, "grad_norm": 1.1004698276519775, "learning_rate": 0.001, "loss": 0.361, "step": 267800 }, { "epoch": 86.58694246929541, "grad_norm": 1.2840595245361328, "learning_rate": 0.001, "loss": 0.3652, "step": 267900 }, { "epoch": 86.61926308985133, "grad_norm": 1.2200064659118652, "learning_rate": 0.001, "loss": 0.3631, "step": 268000 }, { "epoch": 86.65158371040724, "grad_norm": 1.0484951734542847, "learning_rate": 0.001, "loss": 0.3712, "step": 268100 }, { "epoch": 86.68390433096316, "grad_norm": 1.1336572170257568, "learning_rate": 0.001, "loss": 0.3678, "step": 268200 }, { "epoch": 86.71622495151907, "grad_norm": 1.3072932958602905, "learning_rate": 0.001, "loss": 0.3652, "step": 268300 }, { "epoch": 86.74854557207499, "grad_norm": 1.0461935997009277, "learning_rate": 0.001, "loss": 0.3763, "step": 268400 }, { "epoch": 86.7808661926309, "grad_norm": 0.9744913578033447, "learning_rate": 0.001, "loss": 0.3683, "step": 268500 }, { "epoch": 86.81318681318682, "grad_norm": 1.234160304069519, "learning_rate": 0.001, "loss": 0.3736, "step": 268600 }, { "epoch": 86.84550743374272, "grad_norm": 1.054231882095337, "learning_rate": 0.001, "loss": 0.3752, "step": 268700 }, { "epoch": 86.87782805429865, "grad_norm": 1.110755443572998, "learning_rate": 0.001, "loss": 0.3743, "step": 268800 }, { "epoch": 86.91014867485455, "grad_norm": 1.060897946357727, "learning_rate": 0.001, "loss": 0.3757, "step": 268900 }, { "epoch": 86.94246929541048, "grad_norm": 0.9313083291053772, "learning_rate": 0.001, "loss": 0.3764, "step": 269000 }, { "epoch": 86.97478991596638, "grad_norm": 0.9712097644805908, "learning_rate": 0.001, "loss": 0.3858, "step": 269100 }, { "epoch": 87.0071105365223, "grad_norm": 1.0305821895599365, "learning_rate": 0.001, "loss": 0.3741, "step": 269200 }, { "epoch": 87.03943115707821, "grad_norm": 0.8524965047836304, "learning_rate": 0.001, "loss": 0.3172, "step": 269300 }, { "epoch": 87.07175177763413, "grad_norm": 1.0251199007034302, "learning_rate": 0.001, "loss": 0.3195, "step": 269400 }, { "epoch": 87.10407239819004, "grad_norm": 0.9868242144584656, "learning_rate": 0.001, "loss": 0.3215, "step": 269500 }, { "epoch": 87.13639301874596, "grad_norm": 1.0545991659164429, "learning_rate": 0.001, "loss": 0.3323, "step": 269600 }, { "epoch": 87.16871363930187, "grad_norm": 1.1055914163589478, "learning_rate": 0.001, "loss": 0.3233, "step": 269700 }, { "epoch": 87.20103425985779, "grad_norm": 0.9079421162605286, "learning_rate": 0.001, "loss": 0.3331, "step": 269800 }, { "epoch": 87.2333548804137, "grad_norm": 1.231232762336731, "learning_rate": 0.001, "loss": 0.3367, "step": 269900 }, { "epoch": 87.26567550096962, "grad_norm": 0.9043568968772888, "learning_rate": 0.001, "loss": 0.3393, "step": 270000 }, { "epoch": 87.29799612152553, "grad_norm": 1.0445023775100708, "learning_rate": 0.001, "loss": 0.3383, "step": 270100 }, { "epoch": 87.33031674208145, "grad_norm": 0.9281661510467529, "learning_rate": 0.001, "loss": 0.3469, "step": 270200 }, { "epoch": 87.36263736263736, "grad_norm": 1.1577532291412354, "learning_rate": 0.001, "loss": 0.3481, "step": 270300 }, { "epoch": 87.39495798319328, "grad_norm": 1.3595645427703857, "learning_rate": 0.001, "loss": 0.3501, "step": 270400 }, { "epoch": 87.42727860374919, "grad_norm": 1.107136607170105, "learning_rate": 0.001, "loss": 0.3427, "step": 270500 }, { "epoch": 87.45959922430511, "grad_norm": 1.547667384147644, "learning_rate": 0.001, "loss": 0.3511, "step": 270600 }, { "epoch": 87.49191984486102, "grad_norm": 0.9796596169471741, "learning_rate": 0.001, "loss": 0.3535, "step": 270700 }, { "epoch": 87.52424046541694, "grad_norm": 1.5801600217819214, "learning_rate": 0.001, "loss": 0.3488, "step": 270800 }, { "epoch": 87.55656108597285, "grad_norm": 0.9296369552612305, "learning_rate": 0.001, "loss": 0.3577, "step": 270900 }, { "epoch": 87.58888170652877, "grad_norm": 1.1541513204574585, "learning_rate": 0.001, "loss": 0.3602, "step": 271000 }, { "epoch": 87.62120232708467, "grad_norm": 1.0234315395355225, "learning_rate": 0.001, "loss": 0.3648, "step": 271100 }, { "epoch": 87.6535229476406, "grad_norm": 1.0786499977111816, "learning_rate": 0.001, "loss": 0.3608, "step": 271200 }, { "epoch": 87.6858435681965, "grad_norm": 0.9756454825401306, "learning_rate": 0.001, "loss": 0.3691, "step": 271300 }, { "epoch": 87.71816418875243, "grad_norm": 1.168795108795166, "learning_rate": 0.001, "loss": 0.3677, "step": 271400 }, { "epoch": 87.75048480930833, "grad_norm": 0.976140558719635, "learning_rate": 0.001, "loss": 0.3681, "step": 271500 }, { "epoch": 87.78280542986425, "grad_norm": 1.1995179653167725, "learning_rate": 0.001, "loss": 0.3699, "step": 271600 }, { "epoch": 87.81512605042016, "grad_norm": 1.2804988622665405, "learning_rate": 0.001, "loss": 0.3676, "step": 271700 }, { "epoch": 87.84744667097608, "grad_norm": 1.0625967979431152, "learning_rate": 0.001, "loss": 0.3771, "step": 271800 }, { "epoch": 87.87976729153199, "grad_norm": 0.8920925855636597, "learning_rate": 0.001, "loss": 0.3717, "step": 271900 }, { "epoch": 87.91208791208791, "grad_norm": 1.0500991344451904, "learning_rate": 0.001, "loss": 0.3739, "step": 272000 }, { "epoch": 87.94440853264382, "grad_norm": 0.9764301776885986, "learning_rate": 0.001, "loss": 0.3787, "step": 272100 }, { "epoch": 87.97672915319974, "grad_norm": 1.2822630405426025, "learning_rate": 0.001, "loss": 0.3803, "step": 272200 }, { "epoch": 88.00904977375566, "grad_norm": 0.9201326370239258, "learning_rate": 0.001, "loss": 0.3562, "step": 272300 }, { "epoch": 88.04137039431157, "grad_norm": 1.0660690069198608, "learning_rate": 0.001, "loss": 0.3165, "step": 272400 }, { "epoch": 88.07369101486749, "grad_norm": 1.3865466117858887, "learning_rate": 0.001, "loss": 0.3156, "step": 272500 }, { "epoch": 88.1060116354234, "grad_norm": 0.8955172896385193, "learning_rate": 0.001, "loss": 0.3228, "step": 272600 }, { "epoch": 88.13833225597932, "grad_norm": 1.1990008354187012, "learning_rate": 0.001, "loss": 0.3264, "step": 272700 }, { "epoch": 88.17065287653523, "grad_norm": 0.9380447268486023, "learning_rate": 0.001, "loss": 0.3265, "step": 272800 }, { "epoch": 88.20297349709115, "grad_norm": 0.9149646162986755, "learning_rate": 0.001, "loss": 0.3269, "step": 272900 }, { "epoch": 88.23529411764706, "grad_norm": 0.9332559704780579, "learning_rate": 0.001, "loss": 0.3314, "step": 273000 }, { "epoch": 88.26761473820298, "grad_norm": 0.9615284204483032, "learning_rate": 0.001, "loss": 0.3296, "step": 273100 }, { "epoch": 88.29993535875889, "grad_norm": 1.1389895677566528, "learning_rate": 0.001, "loss": 0.3376, "step": 273200 }, { "epoch": 88.33225597931481, "grad_norm": 0.9941064119338989, "learning_rate": 0.001, "loss": 0.3385, "step": 273300 }, { "epoch": 88.36457659987072, "grad_norm": 0.8724578022956848, "learning_rate": 0.001, "loss": 0.3378, "step": 273400 }, { "epoch": 88.39689722042664, "grad_norm": 1.0726187229156494, "learning_rate": 0.001, "loss": 0.3408, "step": 273500 }, { "epoch": 88.42921784098255, "grad_norm": 0.9205648303031921, "learning_rate": 0.001, "loss": 0.3416, "step": 273600 }, { "epoch": 88.46153846153847, "grad_norm": 1.0518187284469604, "learning_rate": 0.001, "loss": 0.3456, "step": 273700 }, { "epoch": 88.49385908209437, "grad_norm": 0.9029635190963745, "learning_rate": 0.001, "loss": 0.3521, "step": 273800 }, { "epoch": 88.5261797026503, "grad_norm": 1.0454427003860474, "learning_rate": 0.001, "loss": 0.3474, "step": 273900 }, { "epoch": 88.5585003232062, "grad_norm": 1.0768498182296753, "learning_rate": 0.001, "loss": 0.3564, "step": 274000 }, { "epoch": 88.59082094376213, "grad_norm": 0.9860252141952515, "learning_rate": 0.001, "loss": 0.3537, "step": 274100 }, { "epoch": 88.62314156431803, "grad_norm": 1.0597411394119263, "learning_rate": 0.001, "loss": 0.3585, "step": 274200 }, { "epoch": 88.65546218487395, "grad_norm": 1.2499903440475464, "learning_rate": 0.001, "loss": 0.3595, "step": 274300 }, { "epoch": 88.68778280542986, "grad_norm": 1.041275978088379, "learning_rate": 0.001, "loss": 0.3604, "step": 274400 }, { "epoch": 88.72010342598578, "grad_norm": 1.0063154697418213, "learning_rate": 0.001, "loss": 0.359, "step": 274500 }, { "epoch": 88.75242404654169, "grad_norm": 1.1932463645935059, "learning_rate": 0.001, "loss": 0.364, "step": 274600 }, { "epoch": 88.78474466709761, "grad_norm": 1.0435807704925537, "learning_rate": 0.001, "loss": 0.3608, "step": 274700 }, { "epoch": 88.81706528765352, "grad_norm": 0.9908381104469299, "learning_rate": 0.001, "loss": 0.3705, "step": 274800 }, { "epoch": 88.84938590820944, "grad_norm": 1.0460364818572998, "learning_rate": 0.001, "loss": 0.3659, "step": 274900 }, { "epoch": 88.88170652876535, "grad_norm": 1.0796244144439697, "learning_rate": 0.001, "loss": 0.3677, "step": 275000 }, { "epoch": 88.91402714932127, "grad_norm": 1.1879616975784302, "learning_rate": 0.001, "loss": 0.3724, "step": 275100 }, { "epoch": 88.94634776987718, "grad_norm": 1.3666988611221313, "learning_rate": 0.001, "loss": 0.3704, "step": 275200 }, { "epoch": 88.9786683904331, "grad_norm": 1.2975006103515625, "learning_rate": 0.001, "loss": 0.3756, "step": 275300 }, { "epoch": 89.01098901098901, "grad_norm": 0.878056526184082, "learning_rate": 0.001, "loss": 0.3461, "step": 275400 }, { "epoch": 89.04330963154493, "grad_norm": 0.9479916095733643, "learning_rate": 0.001, "loss": 0.3133, "step": 275500 }, { "epoch": 89.07563025210084, "grad_norm": 1.1515830755233765, "learning_rate": 0.001, "loss": 0.3149, "step": 275600 }, { "epoch": 89.10795087265676, "grad_norm": 1.0100334882736206, "learning_rate": 0.001, "loss": 0.3209, "step": 275700 }, { "epoch": 89.14027149321267, "grad_norm": 0.8784673810005188, "learning_rate": 0.001, "loss": 0.3206, "step": 275800 }, { "epoch": 89.17259211376859, "grad_norm": 1.0764578580856323, "learning_rate": 0.001, "loss": 0.3261, "step": 275900 }, { "epoch": 89.2049127343245, "grad_norm": 0.9277750849723816, "learning_rate": 0.001, "loss": 0.3235, "step": 276000 }, { "epoch": 89.23723335488042, "grad_norm": 0.8363617658615112, "learning_rate": 0.001, "loss": 0.3251, "step": 276100 }, { "epoch": 89.26955397543632, "grad_norm": 0.8687469363212585, "learning_rate": 0.001, "loss": 0.3233, "step": 276200 }, { "epoch": 89.30187459599225, "grad_norm": 0.9667211771011353, "learning_rate": 0.001, "loss": 0.3333, "step": 276300 }, { "epoch": 89.33419521654815, "grad_norm": 0.9360036849975586, "learning_rate": 0.001, "loss": 0.3341, "step": 276400 }, { "epoch": 89.36651583710407, "grad_norm": 1.0721772909164429, "learning_rate": 0.001, "loss": 0.3369, "step": 276500 }, { "epoch": 89.39883645765998, "grad_norm": 0.9033608436584473, "learning_rate": 0.001, "loss": 0.336, "step": 276600 }, { "epoch": 89.4311570782159, "grad_norm": 1.2335203886032104, "learning_rate": 0.001, "loss": 0.3353, "step": 276700 }, { "epoch": 89.46347769877181, "grad_norm": 1.0744938850402832, "learning_rate": 0.001, "loss": 0.3479, "step": 276800 }, { "epoch": 89.49579831932773, "grad_norm": 1.0532876253128052, "learning_rate": 0.001, "loss": 0.338, "step": 276900 }, { "epoch": 89.52811893988364, "grad_norm": 0.8420795202255249, "learning_rate": 0.001, "loss": 0.3477, "step": 277000 }, { "epoch": 89.56043956043956, "grad_norm": 1.152453899383545, "learning_rate": 0.001, "loss": 0.3499, "step": 277100 }, { "epoch": 89.59276018099547, "grad_norm": 1.076563835144043, "learning_rate": 0.001, "loss": 0.3512, "step": 277200 }, { "epoch": 89.62508080155139, "grad_norm": 1.1771286725997925, "learning_rate": 0.001, "loss": 0.354, "step": 277300 }, { "epoch": 89.6574014221073, "grad_norm": 1.1800857782363892, "learning_rate": 0.001, "loss": 0.3483, "step": 277400 }, { "epoch": 89.68972204266322, "grad_norm": 0.9495158195495605, "learning_rate": 0.001, "loss": 0.3527, "step": 277500 }, { "epoch": 89.72204266321913, "grad_norm": 0.9570762515068054, "learning_rate": 0.001, "loss": 0.3616, "step": 277600 }, { "epoch": 89.75436328377505, "grad_norm": 1.1813184022903442, "learning_rate": 0.001, "loss": 0.3564, "step": 277700 }, { "epoch": 89.78668390433096, "grad_norm": 0.9939044713973999, "learning_rate": 0.001, "loss": 0.3563, "step": 277800 }, { "epoch": 89.81900452488688, "grad_norm": 0.8801103830337524, "learning_rate": 0.001, "loss": 0.3616, "step": 277900 }, { "epoch": 89.85132514544279, "grad_norm": 1.282175898551941, "learning_rate": 0.001, "loss": 0.3653, "step": 278000 }, { "epoch": 89.88364576599871, "grad_norm": 1.295422911643982, "learning_rate": 0.001, "loss": 0.3615, "step": 278100 }, { "epoch": 89.91596638655462, "grad_norm": 1.2482227087020874, "learning_rate": 0.001, "loss": 0.3631, "step": 278200 }, { "epoch": 89.94828700711054, "grad_norm": 0.8686507344245911, "learning_rate": 0.001, "loss": 0.3722, "step": 278300 }, { "epoch": 89.98060762766644, "grad_norm": 1.17640221118927, "learning_rate": 0.001, "loss": 0.3684, "step": 278400 }, { "epoch": 90.01292824822237, "grad_norm": 0.9840737581253052, "learning_rate": 0.001, "loss": 0.3405, "step": 278500 }, { "epoch": 90.04524886877829, "grad_norm": 1.0191493034362793, "learning_rate": 0.001, "loss": 0.3099, "step": 278600 }, { "epoch": 90.0775694893342, "grad_norm": 0.9464077353477478, "learning_rate": 0.001, "loss": 0.3125, "step": 278700 }, { "epoch": 90.10989010989012, "grad_norm": 0.9208871722221375, "learning_rate": 0.001, "loss": 0.3148, "step": 278800 }, { "epoch": 90.14221073044602, "grad_norm": 1.0500644445419312, "learning_rate": 0.001, "loss": 0.3133, "step": 278900 }, { "epoch": 90.17453135100195, "grad_norm": 1.0777854919433594, "learning_rate": 0.001, "loss": 0.3143, "step": 279000 }, { "epoch": 90.20685197155785, "grad_norm": 0.9212045073509216, "learning_rate": 0.001, "loss": 0.3229, "step": 279100 }, { "epoch": 90.23917259211377, "grad_norm": 1.263359785079956, "learning_rate": 0.001, "loss": 0.3238, "step": 279200 }, { "epoch": 90.27149321266968, "grad_norm": 0.9570851922035217, "learning_rate": 0.001, "loss": 0.3234, "step": 279300 }, { "epoch": 90.3038138332256, "grad_norm": 1.0342272520065308, "learning_rate": 0.001, "loss": 0.3278, "step": 279400 }, { "epoch": 90.33613445378151, "grad_norm": 1.056708574295044, "learning_rate": 0.001, "loss": 0.3313, "step": 279500 }, { "epoch": 90.36845507433743, "grad_norm": 0.9968493580818176, "learning_rate": 0.001, "loss": 0.3327, "step": 279600 }, { "epoch": 90.40077569489334, "grad_norm": 1.5855700969696045, "learning_rate": 0.001, "loss": 0.3346, "step": 279700 }, { "epoch": 90.43309631544926, "grad_norm": 1.2351268529891968, "learning_rate": 0.001, "loss": 0.3332, "step": 279800 }, { "epoch": 90.46541693600517, "grad_norm": 1.2913575172424316, "learning_rate": 0.001, "loss": 0.3368, "step": 279900 }, { "epoch": 90.49773755656109, "grad_norm": 0.9135908484458923, "learning_rate": 0.001, "loss": 0.343, "step": 280000 }, { "epoch": 90.530058177117, "grad_norm": 0.8392446637153625, "learning_rate": 0.001, "loss": 0.3397, "step": 280100 }, { "epoch": 90.56237879767292, "grad_norm": 0.9061764478683472, "learning_rate": 0.001, "loss": 0.3411, "step": 280200 }, { "epoch": 90.59469941822883, "grad_norm": 0.863675057888031, "learning_rate": 0.001, "loss": 0.3408, "step": 280300 }, { "epoch": 90.62702003878475, "grad_norm": 1.0142652988433838, "learning_rate": 0.001, "loss": 0.3483, "step": 280400 }, { "epoch": 90.65934065934066, "grad_norm": 0.8978894948959351, "learning_rate": 0.001, "loss": 0.3513, "step": 280500 }, { "epoch": 90.69166127989658, "grad_norm": 1.0866570472717285, "learning_rate": 0.001, "loss": 0.3469, "step": 280600 }, { "epoch": 90.72398190045249, "grad_norm": 0.9070130586624146, "learning_rate": 0.001, "loss": 0.3524, "step": 280700 }, { "epoch": 90.75630252100841, "grad_norm": 1.1825019121170044, "learning_rate": 0.001, "loss": 0.3533, "step": 280800 }, { "epoch": 90.78862314156432, "grad_norm": 0.9480785131454468, "learning_rate": 0.001, "loss": 0.3537, "step": 280900 }, { "epoch": 90.82094376212024, "grad_norm": 1.0649914741516113, "learning_rate": 0.001, "loss": 0.3535, "step": 281000 }, { "epoch": 90.85326438267614, "grad_norm": 1.1400140523910522, "learning_rate": 0.001, "loss": 0.3617, "step": 281100 }, { "epoch": 90.88558500323207, "grad_norm": 1.1364398002624512, "learning_rate": 0.001, "loss": 0.3613, "step": 281200 }, { "epoch": 90.91790562378797, "grad_norm": 1.1689413785934448, "learning_rate": 0.001, "loss": 0.3567, "step": 281300 }, { "epoch": 90.9502262443439, "grad_norm": 1.1273508071899414, "learning_rate": 0.001, "loss": 0.3608, "step": 281400 }, { "epoch": 90.9825468648998, "grad_norm": 0.9578779935836792, "learning_rate": 0.001, "loss": 0.3653, "step": 281500 }, { "epoch": 91.01486748545572, "grad_norm": 0.9322060942649841, "learning_rate": 0.001, "loss": 0.331, "step": 281600 }, { "epoch": 91.04718810601163, "grad_norm": 0.9285730123519897, "learning_rate": 0.001, "loss": 0.3054, "step": 281700 }, { "epoch": 91.07950872656755, "grad_norm": 0.9409992694854736, "learning_rate": 0.001, "loss": 0.3044, "step": 281800 }, { "epoch": 91.11182934712346, "grad_norm": 0.921698272228241, "learning_rate": 0.001, "loss": 0.3099, "step": 281900 }, { "epoch": 91.14414996767938, "grad_norm": 0.9547483921051025, "learning_rate": 0.001, "loss": 0.3145, "step": 282000 }, { "epoch": 91.17647058823529, "grad_norm": 0.8718846440315247, "learning_rate": 0.001, "loss": 0.317, "step": 282100 }, { "epoch": 91.20879120879121, "grad_norm": 1.1787347793579102, "learning_rate": 0.001, "loss": 0.3169, "step": 282200 }, { "epoch": 91.24111182934712, "grad_norm": 0.833748996257782, "learning_rate": 0.001, "loss": 0.3209, "step": 282300 }, { "epoch": 91.27343244990304, "grad_norm": 1.084109902381897, "learning_rate": 0.001, "loss": 0.3222, "step": 282400 }, { "epoch": 91.30575307045895, "grad_norm": 1.155814528465271, "learning_rate": 0.001, "loss": 0.3225, "step": 282500 }, { "epoch": 91.33807369101487, "grad_norm": 0.9923741817474365, "learning_rate": 0.001, "loss": 0.3281, "step": 282600 }, { "epoch": 91.37039431157078, "grad_norm": 1.216067910194397, "learning_rate": 0.001, "loss": 0.3304, "step": 282700 }, { "epoch": 91.4027149321267, "grad_norm": 0.9273979663848877, "learning_rate": 0.001, "loss": 0.3353, "step": 282800 }, { "epoch": 91.4350355526826, "grad_norm": 0.8830786347389221, "learning_rate": 0.001, "loss": 0.3329, "step": 282900 }, { "epoch": 91.46735617323853, "grad_norm": 0.9615721106529236, "learning_rate": 0.001, "loss": 0.333, "step": 283000 }, { "epoch": 91.49967679379444, "grad_norm": 1.0586540699005127, "learning_rate": 0.001, "loss": 0.3339, "step": 283100 }, { "epoch": 91.53199741435036, "grad_norm": 0.9249664545059204, "learning_rate": 0.001, "loss": 0.3288, "step": 283200 }, { "epoch": 91.56431803490626, "grad_norm": 1.1405055522918701, "learning_rate": 0.001, "loss": 0.3392, "step": 283300 }, { "epoch": 91.59663865546219, "grad_norm": 0.9498429894447327, "learning_rate": 0.001, "loss": 0.346, "step": 283400 }, { "epoch": 91.6289592760181, "grad_norm": 0.8667886257171631, "learning_rate": 0.001, "loss": 0.338, "step": 283500 }, { "epoch": 91.66127989657402, "grad_norm": 1.175717830657959, "learning_rate": 0.001, "loss": 0.3561, "step": 283600 }, { "epoch": 91.69360051712992, "grad_norm": 1.1780322790145874, "learning_rate": 0.001, "loss": 0.3492, "step": 283700 }, { "epoch": 91.72592113768584, "grad_norm": 1.344182014465332, "learning_rate": 0.001, "loss": 0.3447, "step": 283800 }, { "epoch": 91.75824175824175, "grad_norm": 1.0317049026489258, "learning_rate": 0.001, "loss": 0.3521, "step": 283900 }, { "epoch": 91.79056237879767, "grad_norm": 0.958740770816803, "learning_rate": 0.001, "loss": 0.354, "step": 284000 }, { "epoch": 91.82288299935358, "grad_norm": 0.935267984867096, "learning_rate": 0.001, "loss": 0.3551, "step": 284100 }, { "epoch": 91.8552036199095, "grad_norm": 1.3996332883834839, "learning_rate": 0.001, "loss": 0.3528, "step": 284200 }, { "epoch": 91.88752424046541, "grad_norm": 0.9520308375358582, "learning_rate": 0.001, "loss": 0.3575, "step": 284300 }, { "epoch": 91.91984486102133, "grad_norm": 1.102171540260315, "learning_rate": 0.001, "loss": 0.3603, "step": 284400 }, { "epoch": 91.95216548157724, "grad_norm": 1.0449070930480957, "learning_rate": 0.001, "loss": 0.3563, "step": 284500 }, { "epoch": 91.98448610213316, "grad_norm": 0.9982185363769531, "learning_rate": 0.001, "loss": 0.362, "step": 284600 }, { "epoch": 92.01680672268908, "grad_norm": 1.3165881633758545, "learning_rate": 0.001, "loss": 0.3255, "step": 284700 }, { "epoch": 92.04912734324499, "grad_norm": 0.8716535568237305, "learning_rate": 0.001, "loss": 0.2996, "step": 284800 }, { "epoch": 92.08144796380091, "grad_norm": 1.0054798126220703, "learning_rate": 0.001, "loss": 0.3056, "step": 284900 }, { "epoch": 92.11376858435682, "grad_norm": 1.1799583435058594, "learning_rate": 0.001, "loss": 0.3077, "step": 285000 }, { "epoch": 92.14608920491274, "grad_norm": 0.9408718943595886, "learning_rate": 0.001, "loss": 0.3105, "step": 285100 }, { "epoch": 92.17840982546865, "grad_norm": 1.0861705541610718, "learning_rate": 0.001, "loss": 0.3152, "step": 285200 }, { "epoch": 92.21073044602457, "grad_norm": 1.3419109582901, "learning_rate": 0.001, "loss": 0.3154, "step": 285300 }, { "epoch": 92.24305106658048, "grad_norm": 1.1945537328720093, "learning_rate": 0.001, "loss": 0.312, "step": 285400 }, { "epoch": 92.2753716871364, "grad_norm": 1.4180561304092407, "learning_rate": 0.001, "loss": 0.3204, "step": 285500 }, { "epoch": 92.3076923076923, "grad_norm": 1.034794807434082, "learning_rate": 0.001, "loss": 0.3199, "step": 285600 }, { "epoch": 92.34001292824823, "grad_norm": 0.967502236366272, "learning_rate": 0.001, "loss": 0.3208, "step": 285700 }, { "epoch": 92.37233354880414, "grad_norm": 1.2261031866073608, "learning_rate": 0.001, "loss": 0.3225, "step": 285800 }, { "epoch": 92.40465416936006, "grad_norm": 1.0543971061706543, "learning_rate": 0.001, "loss": 0.3279, "step": 285900 }, { "epoch": 92.43697478991596, "grad_norm": 0.9720335006713867, "learning_rate": 0.001, "loss": 0.3307, "step": 286000 }, { "epoch": 92.46929541047189, "grad_norm": 1.1377506256103516, "learning_rate": 0.001, "loss": 0.3312, "step": 286100 }, { "epoch": 92.5016160310278, "grad_norm": 1.4359102249145508, "learning_rate": 0.001, "loss": 0.3328, "step": 286200 }, { "epoch": 92.53393665158372, "grad_norm": 1.005310297012329, "learning_rate": 0.001, "loss": 0.3359, "step": 286300 }, { "epoch": 92.56625727213962, "grad_norm": 1.5165833234786987, "learning_rate": 0.001, "loss": 0.3286, "step": 286400 }, { "epoch": 92.59857789269554, "grad_norm": 1.3362587690353394, "learning_rate": 0.001, "loss": 0.3329, "step": 286500 }, { "epoch": 92.63089851325145, "grad_norm": 0.953940749168396, "learning_rate": 0.001, "loss": 0.3421, "step": 286600 }, { "epoch": 92.66321913380737, "grad_norm": 1.0208436250686646, "learning_rate": 0.001, "loss": 0.3421, "step": 286700 }, { "epoch": 92.69553975436328, "grad_norm": 1.5811911821365356, "learning_rate": 0.001, "loss": 0.3457, "step": 286800 }, { "epoch": 92.7278603749192, "grad_norm": 1.0778679847717285, "learning_rate": 0.001, "loss": 0.3446, "step": 286900 }, { "epoch": 92.76018099547511, "grad_norm": 1.2522433996200562, "learning_rate": 0.001, "loss": 0.3423, "step": 287000 }, { "epoch": 92.79250161603103, "grad_norm": 0.9745849967002869, "learning_rate": 0.001, "loss": 0.3495, "step": 287100 }, { "epoch": 92.82482223658694, "grad_norm": 0.982984721660614, "learning_rate": 0.001, "loss": 0.3465, "step": 287200 }, { "epoch": 92.85714285714286, "grad_norm": 1.1498178243637085, "learning_rate": 0.001, "loss": 0.3512, "step": 287300 }, { "epoch": 92.88946347769877, "grad_norm": 0.9689698219299316, "learning_rate": 0.001, "loss": 0.3538, "step": 287400 }, { "epoch": 92.92178409825469, "grad_norm": 1.2577341794967651, "learning_rate": 0.001, "loss": 0.3567, "step": 287500 }, { "epoch": 92.9541047188106, "grad_norm": 1.129622459411621, "learning_rate": 0.001, "loss": 0.3563, "step": 287600 }, { "epoch": 92.98642533936652, "grad_norm": 1.2102665901184082, "learning_rate": 0.001, "loss": 0.3534, "step": 287700 }, { "epoch": 93.01874595992243, "grad_norm": 1.2173608541488647, "learning_rate": 0.001, "loss": 0.3218, "step": 287800 }, { "epoch": 93.05106658047835, "grad_norm": 1.1179250478744507, "learning_rate": 0.001, "loss": 0.2979, "step": 287900 }, { "epoch": 93.08338720103426, "grad_norm": 1.0404188632965088, "learning_rate": 0.001, "loss": 0.302, "step": 288000 }, { "epoch": 93.11570782159018, "grad_norm": 1.2764333486557007, "learning_rate": 0.001, "loss": 0.302, "step": 288100 }, { "epoch": 93.14802844214609, "grad_norm": 0.8845721483230591, "learning_rate": 0.001, "loss": 0.3077, "step": 288200 }, { "epoch": 93.180349062702, "grad_norm": 1.092352032661438, "learning_rate": 0.001, "loss": 0.3061, "step": 288300 }, { "epoch": 93.21266968325791, "grad_norm": 0.9488524794578552, "learning_rate": 0.001, "loss": 0.3095, "step": 288400 }, { "epoch": 93.24499030381384, "grad_norm": 1.9193674325942993, "learning_rate": 0.001, "loss": 0.31, "step": 288500 }, { "epoch": 93.27731092436974, "grad_norm": 0.9536850452423096, "learning_rate": 0.001, "loss": 0.3202, "step": 288600 }, { "epoch": 93.30963154492567, "grad_norm": 1.0513343811035156, "learning_rate": 0.001, "loss": 0.3156, "step": 288700 }, { "epoch": 93.34195216548157, "grad_norm": 0.8580809235572815, "learning_rate": 0.001, "loss": 0.3199, "step": 288800 }, { "epoch": 93.3742727860375, "grad_norm": 1.6308412551879883, "learning_rate": 0.001, "loss": 0.3242, "step": 288900 }, { "epoch": 93.4065934065934, "grad_norm": 1.0085711479187012, "learning_rate": 0.001, "loss": 0.3226, "step": 289000 }, { "epoch": 93.43891402714932, "grad_norm": 1.0596351623535156, "learning_rate": 0.001, "loss": 0.3208, "step": 289100 }, { "epoch": 93.47123464770523, "grad_norm": 1.1402593851089478, "learning_rate": 0.001, "loss": 0.3272, "step": 289200 }, { "epoch": 93.50355526826115, "grad_norm": 1.0539191961288452, "learning_rate": 0.001, "loss": 0.3289, "step": 289300 }, { "epoch": 93.53587588881706, "grad_norm": 0.9605076313018799, "learning_rate": 0.001, "loss": 0.3296, "step": 289400 }, { "epoch": 93.56819650937298, "grad_norm": 1.2236053943634033, "learning_rate": 0.001, "loss": 0.3362, "step": 289500 }, { "epoch": 93.60051712992889, "grad_norm": 1.0947476625442505, "learning_rate": 0.001, "loss": 0.3331, "step": 289600 }, { "epoch": 93.63283775048481, "grad_norm": 0.8871232867240906, "learning_rate": 0.001, "loss": 0.3306, "step": 289700 }, { "epoch": 93.66515837104072, "grad_norm": 0.9305219054222107, "learning_rate": 0.001, "loss": 0.3369, "step": 289800 }, { "epoch": 93.69747899159664, "grad_norm": 1.4813601970672607, "learning_rate": 0.001, "loss": 0.3341, "step": 289900 }, { "epoch": 93.72979961215255, "grad_norm": 0.8980185985565186, "learning_rate": 0.001, "loss": 0.34, "step": 290000 }, { "epoch": 93.76212023270847, "grad_norm": 1.057469129562378, "learning_rate": 0.001, "loss": 0.3419, "step": 290100 }, { "epoch": 93.79444085326438, "grad_norm": 1.3161203861236572, "learning_rate": 0.001, "loss": 0.346, "step": 290200 }, { "epoch": 93.8267614738203, "grad_norm": 1.0926547050476074, "learning_rate": 0.001, "loss": 0.344, "step": 290300 }, { "epoch": 93.8590820943762, "grad_norm": 1.0470707416534424, "learning_rate": 0.001, "loss": 0.3455, "step": 290400 }, { "epoch": 93.89140271493213, "grad_norm": 1.342071771621704, "learning_rate": 0.001, "loss": 0.345, "step": 290500 }, { "epoch": 93.92372333548803, "grad_norm": 0.9363331198692322, "learning_rate": 0.001, "loss": 0.3515, "step": 290600 }, { "epoch": 93.95604395604396, "grad_norm": 1.0006422996520996, "learning_rate": 0.001, "loss": 0.351, "step": 290700 }, { "epoch": 93.98836457659988, "grad_norm": 0.9673388600349426, "learning_rate": 0.001, "loss": 0.3482, "step": 290800 }, { "epoch": 94.02068519715579, "grad_norm": 0.9804301261901855, "learning_rate": 0.001, "loss": 0.3186, "step": 290900 }, { "epoch": 94.0530058177117, "grad_norm": 0.9330466985702515, "learning_rate": 0.001, "loss": 0.2916, "step": 291000 }, { "epoch": 94.08532643826761, "grad_norm": 1.381067156791687, "learning_rate": 0.001, "loss": 0.2954, "step": 291100 }, { "epoch": 94.11764705882354, "grad_norm": 1.119249939918518, "learning_rate": 0.001, "loss": 0.3027, "step": 291200 }, { "epoch": 94.14996767937944, "grad_norm": 1.0015678405761719, "learning_rate": 0.001, "loss": 0.2998, "step": 291300 }, { "epoch": 94.18228829993537, "grad_norm": 0.9264289140701294, "learning_rate": 0.001, "loss": 0.303, "step": 291400 }, { "epoch": 94.21460892049127, "grad_norm": 0.9031481146812439, "learning_rate": 0.001, "loss": 0.3112, "step": 291500 }, { "epoch": 94.2469295410472, "grad_norm": 0.8585326075553894, "learning_rate": 0.001, "loss": 0.313, "step": 291600 }, { "epoch": 94.2792501616031, "grad_norm": 0.9791240692138672, "learning_rate": 0.001, "loss": 0.3095, "step": 291700 }, { "epoch": 94.31157078215902, "grad_norm": 0.847224235534668, "learning_rate": 0.001, "loss": 0.3144, "step": 291800 }, { "epoch": 94.34389140271493, "grad_norm": 1.1473792791366577, "learning_rate": 0.001, "loss": 0.3163, "step": 291900 }, { "epoch": 94.37621202327085, "grad_norm": 1.40758216381073, "learning_rate": 0.001, "loss": 0.3196, "step": 292000 }, { "epoch": 94.40853264382676, "grad_norm": 1.0558946132659912, "learning_rate": 0.001, "loss": 0.3191, "step": 292100 }, { "epoch": 94.44085326438268, "grad_norm": 1.1685112714767456, "learning_rate": 0.001, "loss": 0.3174, "step": 292200 }, { "epoch": 94.47317388493859, "grad_norm": 1.1187825202941895, "learning_rate": 0.001, "loss": 0.3197, "step": 292300 }, { "epoch": 94.50549450549451, "grad_norm": 0.9858559370040894, "learning_rate": 0.001, "loss": 0.3247, "step": 292400 }, { "epoch": 94.53781512605042, "grad_norm": 1.0946744680404663, "learning_rate": 0.001, "loss": 0.3273, "step": 292500 }, { "epoch": 94.57013574660634, "grad_norm": 0.9937974214553833, "learning_rate": 0.001, "loss": 0.3243, "step": 292600 }, { "epoch": 94.60245636716225, "grad_norm": 1.1599940061569214, "learning_rate": 0.001, "loss": 0.3254, "step": 292700 }, { "epoch": 94.63477698771817, "grad_norm": 1.0597692728042603, "learning_rate": 0.001, "loss": 0.3285, "step": 292800 }, { "epoch": 94.66709760827408, "grad_norm": 1.1922874450683594, "learning_rate": 0.001, "loss": 0.3341, "step": 292900 }, { "epoch": 94.69941822883, "grad_norm": 1.3713363409042358, "learning_rate": 0.001, "loss": 0.3383, "step": 293000 }, { "epoch": 94.7317388493859, "grad_norm": 1.1120933294296265, "learning_rate": 0.001, "loss": 0.3401, "step": 293100 }, { "epoch": 94.76405946994183, "grad_norm": 1.0789011716842651, "learning_rate": 0.001, "loss": 0.3395, "step": 293200 }, { "epoch": 94.79638009049773, "grad_norm": 0.9565038681030273, "learning_rate": 0.001, "loss": 0.3385, "step": 293300 }, { "epoch": 94.82870071105366, "grad_norm": 0.9890508055686951, "learning_rate": 0.001, "loss": 0.3452, "step": 293400 }, { "epoch": 94.86102133160956, "grad_norm": 0.8314007520675659, "learning_rate": 0.001, "loss": 0.3409, "step": 293500 }, { "epoch": 94.89334195216549, "grad_norm": 1.344644546508789, "learning_rate": 0.001, "loss": 0.3476, "step": 293600 }, { "epoch": 94.9256625727214, "grad_norm": 1.1226747035980225, "learning_rate": 0.001, "loss": 0.345, "step": 293700 }, { "epoch": 94.95798319327731, "grad_norm": 1.1787598133087158, "learning_rate": 0.001, "loss": 0.3415, "step": 293800 }, { "epoch": 94.99030381383322, "grad_norm": 1.1146658658981323, "learning_rate": 0.001, "loss": 0.3479, "step": 293900 }, { "epoch": 95.02262443438914, "grad_norm": 1.0750230550765991, "learning_rate": 0.001, "loss": 0.3109, "step": 294000 }, { "epoch": 95.05494505494505, "grad_norm": 0.879135012626648, "learning_rate": 0.001, "loss": 0.2963, "step": 294100 }, { "epoch": 95.08726567550097, "grad_norm": 1.0691251754760742, "learning_rate": 0.001, "loss": 0.2966, "step": 294200 }, { "epoch": 95.11958629605688, "grad_norm": 0.9954388737678528, "learning_rate": 0.001, "loss": 0.2993, "step": 294300 }, { "epoch": 95.1519069166128, "grad_norm": 1.1846879720687866, "learning_rate": 0.001, "loss": 0.2953, "step": 294400 }, { "epoch": 95.18422753716871, "grad_norm": 0.8292128443717957, "learning_rate": 0.001, "loss": 0.3041, "step": 294500 }, { "epoch": 95.21654815772463, "grad_norm": 1.0676506757736206, "learning_rate": 0.001, "loss": 0.31, "step": 294600 }, { "epoch": 95.24886877828054, "grad_norm": 1.1302704811096191, "learning_rate": 0.001, "loss": 0.3073, "step": 294700 }, { "epoch": 95.28118939883646, "grad_norm": 1.098841905593872, "learning_rate": 0.001, "loss": 0.3075, "step": 294800 }, { "epoch": 95.31351001939237, "grad_norm": 0.9133958220481873, "learning_rate": 0.001, "loss": 0.3092, "step": 294900 }, { "epoch": 95.34583063994829, "grad_norm": 0.8570176362991333, "learning_rate": 0.001, "loss": 0.3105, "step": 295000 }, { "epoch": 95.3781512605042, "grad_norm": 1.0471367835998535, "learning_rate": 0.001, "loss": 0.316, "step": 295100 }, { "epoch": 95.41047188106012, "grad_norm": 0.9666006565093994, "learning_rate": 0.001, "loss": 0.3158, "step": 295200 }, { "epoch": 95.44279250161603, "grad_norm": 1.2274963855743408, "learning_rate": 0.001, "loss": 0.3173, "step": 295300 }, { "epoch": 95.47511312217195, "grad_norm": 0.9744833707809448, "learning_rate": 0.001, "loss": 0.3245, "step": 295400 }, { "epoch": 95.50743374272786, "grad_norm": 0.959595799446106, "learning_rate": 0.001, "loss": 0.3228, "step": 295500 }, { "epoch": 95.53975436328378, "grad_norm": 0.9851042032241821, "learning_rate": 0.001, "loss": 0.3243, "step": 295600 }, { "epoch": 95.57207498383968, "grad_norm": 1.0527063608169556, "learning_rate": 0.001, "loss": 0.3284, "step": 295700 }, { "epoch": 95.6043956043956, "grad_norm": 0.9141400456428528, "learning_rate": 0.001, "loss": 0.3288, "step": 295800 }, { "epoch": 95.63671622495151, "grad_norm": 1.5334450006484985, "learning_rate": 0.001, "loss": 0.3326, "step": 295900 }, { "epoch": 95.66903684550743, "grad_norm": 1.1169499158859253, "learning_rate": 0.001, "loss": 0.3313, "step": 296000 }, { "epoch": 95.70135746606334, "grad_norm": 1.2132196426391602, "learning_rate": 0.001, "loss": 0.3315, "step": 296100 }, { "epoch": 95.73367808661926, "grad_norm": 1.1573938131332397, "learning_rate": 0.001, "loss": 0.3358, "step": 296200 }, { "epoch": 95.76599870717517, "grad_norm": 1.1136852502822876, "learning_rate": 0.001, "loss": 0.3302, "step": 296300 }, { "epoch": 95.7983193277311, "grad_norm": 1.1400196552276611, "learning_rate": 0.001, "loss": 0.3386, "step": 296400 }, { "epoch": 95.830639948287, "grad_norm": 1.1053751707077026, "learning_rate": 0.001, "loss": 0.3374, "step": 296500 }, { "epoch": 95.86296056884292, "grad_norm": 1.2936042547225952, "learning_rate": 0.001, "loss": 0.3413, "step": 296600 }, { "epoch": 95.89528118939883, "grad_norm": 0.8937700390815735, "learning_rate": 0.001, "loss": 0.3343, "step": 296700 }, { "epoch": 95.92760180995475, "grad_norm": 1.1477892398834229, "learning_rate": 0.001, "loss": 0.3415, "step": 296800 }, { "epoch": 95.95992243051066, "grad_norm": 1.2016029357910156, "learning_rate": 0.001, "loss": 0.34, "step": 296900 }, { "epoch": 95.99224305106658, "grad_norm": 1.0441241264343262, "learning_rate": 0.001, "loss": 0.3475, "step": 297000 }, { "epoch": 96.0245636716225, "grad_norm": 0.8638437986373901, "learning_rate": 0.001, "loss": 0.301, "step": 297100 }, { "epoch": 96.05688429217841, "grad_norm": 1.0192385911941528, "learning_rate": 0.001, "loss": 0.2899, "step": 297200 }, { "epoch": 96.08920491273433, "grad_norm": 0.9291768074035645, "learning_rate": 0.001, "loss": 0.29, "step": 297300 }, { "epoch": 96.12152553329024, "grad_norm": 0.9278668761253357, "learning_rate": 0.001, "loss": 0.2971, "step": 297400 }, { "epoch": 96.15384615384616, "grad_norm": 0.9582951068878174, "learning_rate": 0.001, "loss": 0.3014, "step": 297500 }, { "epoch": 96.18616677440207, "grad_norm": 1.2500112056732178, "learning_rate": 0.001, "loss": 0.3023, "step": 297600 }, { "epoch": 96.21848739495799, "grad_norm": 0.8730352520942688, "learning_rate": 0.001, "loss": 0.2995, "step": 297700 }, { "epoch": 96.2508080155139, "grad_norm": 0.900300145149231, "learning_rate": 0.001, "loss": 0.299, "step": 297800 }, { "epoch": 96.28312863606982, "grad_norm": 0.9397518038749695, "learning_rate": 0.001, "loss": 0.3034, "step": 297900 }, { "epoch": 96.31544925662573, "grad_norm": 0.9644562005996704, "learning_rate": 0.001, "loss": 0.3062, "step": 298000 }, { "epoch": 96.34776987718165, "grad_norm": 1.52510404586792, "learning_rate": 0.001, "loss": 0.2838, "step": 298100 }, { "epoch": 96.38009049773756, "grad_norm": 1.4102787971496582, "learning_rate": 0.001, "loss": 0.2922, "step": 298200 }, { "epoch": 96.41241111829348, "grad_norm": 0.8548278212547302, "learning_rate": 0.001, "loss": 0.2934, "step": 298300 }, { "epoch": 96.44473173884938, "grad_norm": 0.8908146619796753, "learning_rate": 0.001, "loss": 0.2956, "step": 298400 }, { "epoch": 96.4770523594053, "grad_norm": 1.165833830833435, "learning_rate": 0.001, "loss": 0.3002, "step": 298500 }, { "epoch": 96.50937297996121, "grad_norm": 1.04105806350708, "learning_rate": 0.001, "loss": 0.3022, "step": 298600 }, { "epoch": 96.54169360051714, "grad_norm": 1.0093927383422852, "learning_rate": 0.001, "loss": 0.3025, "step": 298700 }, { "epoch": 96.57401422107304, "grad_norm": 0.9814240336418152, "learning_rate": 0.001, "loss": 0.3059, "step": 298800 }, { "epoch": 96.60633484162896, "grad_norm": 0.8546968698501587, "learning_rate": 0.001, "loss": 0.3126, "step": 298900 }, { "epoch": 96.63865546218487, "grad_norm": 0.8697537183761597, "learning_rate": 0.001, "loss": 0.3036, "step": 299000 }, { "epoch": 96.6709760827408, "grad_norm": 1.0340416431427002, "learning_rate": 0.001, "loss": 0.3095, "step": 299100 }, { "epoch": 96.7032967032967, "grad_norm": 1.0208220481872559, "learning_rate": 0.001, "loss": 0.311, "step": 299200 }, { "epoch": 96.73561732385262, "grad_norm": 1.004384994506836, "learning_rate": 0.001, "loss": 0.3122, "step": 299300 }, { "epoch": 96.76793794440853, "grad_norm": 0.9546629190444946, "learning_rate": 0.001, "loss": 0.3144, "step": 299400 }, { "epoch": 96.80025856496445, "grad_norm": 1.420586347579956, "learning_rate": 0.001, "loss": 0.3149, "step": 299500 }, { "epoch": 96.83257918552036, "grad_norm": 0.8971379399299622, "learning_rate": 0.001, "loss": 0.3213, "step": 299600 }, { "epoch": 96.86489980607628, "grad_norm": 1.1719610691070557, "learning_rate": 0.001, "loss": 0.3295, "step": 299700 }, { "epoch": 96.89722042663219, "grad_norm": 1.066396713256836, "learning_rate": 0.001, "loss": 0.3296, "step": 299800 }, { "epoch": 96.92954104718811, "grad_norm": 0.9001867771148682, "learning_rate": 0.001, "loss": 0.3289, "step": 299900 }, { "epoch": 96.96186166774402, "grad_norm": 0.8381643891334534, "learning_rate": 0.001, "loss": 0.3236, "step": 300000 }, { "epoch": 96.99418228829994, "grad_norm": 0.9989371299743652, "learning_rate": 0.001, "loss": 0.3249, "step": 300100 }, { "epoch": 97.02650290885585, "grad_norm": 0.9245850443840027, "learning_rate": 0.001, "loss": 0.2981, "step": 300200 }, { "epoch": 97.05882352941177, "grad_norm": 1.1345840692520142, "learning_rate": 0.001, "loss": 0.2944, "step": 300300 }, { "epoch": 97.09114414996768, "grad_norm": 0.8973127007484436, "learning_rate": 0.001, "loss": 0.2918, "step": 300400 }, { "epoch": 97.1234647705236, "grad_norm": 0.9607701301574707, "learning_rate": 0.001, "loss": 0.2955, "step": 300500 }, { "epoch": 97.1557853910795, "grad_norm": 1.043542504310608, "learning_rate": 0.001, "loss": 0.2974, "step": 300600 }, { "epoch": 97.18810601163543, "grad_norm": 0.9277297258377075, "learning_rate": 0.001, "loss": 0.3008, "step": 300700 }, { "epoch": 97.22042663219133, "grad_norm": 1.0357109308242798, "learning_rate": 0.001, "loss": 0.3063, "step": 300800 }, { "epoch": 97.25274725274726, "grad_norm": 0.9745941758155823, "learning_rate": 0.001, "loss": 0.3052, "step": 300900 }, { "epoch": 97.28506787330316, "grad_norm": 0.9069507718086243, "learning_rate": 0.001, "loss": 0.3089, "step": 301000 }, { "epoch": 97.31738849385908, "grad_norm": 1.103066086769104, "learning_rate": 0.001, "loss": 0.3056, "step": 301100 }, { "epoch": 97.34970911441499, "grad_norm": 0.94358891248703, "learning_rate": 0.001, "loss": 0.3079, "step": 301200 }, { "epoch": 97.38202973497091, "grad_norm": 1.0480751991271973, "learning_rate": 0.001, "loss": 0.307, "step": 301300 }, { "epoch": 97.41435035552682, "grad_norm": 1.1027411222457886, "learning_rate": 0.001, "loss": 0.3104, "step": 301400 }, { "epoch": 97.44667097608274, "grad_norm": 1.1246782541275024, "learning_rate": 0.001, "loss": 0.3173, "step": 301500 }, { "epoch": 97.47899159663865, "grad_norm": 1.0335659980773926, "learning_rate": 0.001, "loss": 0.3208, "step": 301600 }, { "epoch": 97.51131221719457, "grad_norm": 1.1103721857070923, "learning_rate": 0.001, "loss": 0.3229, "step": 301700 }, { "epoch": 97.54363283775048, "grad_norm": 1.0705045461654663, "learning_rate": 0.001, "loss": 0.319, "step": 301800 }, { "epoch": 97.5759534583064, "grad_norm": 1.1745333671569824, "learning_rate": 0.001, "loss": 0.3197, "step": 301900 }, { "epoch": 97.60827407886231, "grad_norm": 0.9382982850074768, "learning_rate": 0.001, "loss": 0.3166, "step": 302000 }, { "epoch": 97.64059469941823, "grad_norm": 0.8743950128555298, "learning_rate": 0.001, "loss": 0.3205, "step": 302100 }, { "epoch": 97.67291531997414, "grad_norm": 0.9783182740211487, "learning_rate": 0.001, "loss": 0.3276, "step": 302200 }, { "epoch": 97.70523594053006, "grad_norm": 0.7884121537208557, "learning_rate": 0.001, "loss": 0.3304, "step": 302300 }, { "epoch": 97.73755656108597, "grad_norm": 1.0552650690078735, "learning_rate": 0.001, "loss": 0.3276, "step": 302400 }, { "epoch": 97.76987718164189, "grad_norm": 0.821494996547699, "learning_rate": 0.001, "loss": 0.3348, "step": 302500 }, { "epoch": 97.8021978021978, "grad_norm": 1.3517026901245117, "learning_rate": 0.001, "loss": 0.3255, "step": 302600 }, { "epoch": 97.83451842275372, "grad_norm": 0.9896811842918396, "learning_rate": 0.001, "loss": 0.3308, "step": 302700 }, { "epoch": 97.86683904330962, "grad_norm": 0.9154704213142395, "learning_rate": 0.001, "loss": 0.3314, "step": 302800 }, { "epoch": 97.89915966386555, "grad_norm": 1.1953586339950562, "learning_rate": 0.001, "loss": 0.3354, "step": 302900 }, { "epoch": 97.93148028442145, "grad_norm": 1.0497210025787354, "learning_rate": 0.001, "loss": 0.3348, "step": 303000 }, { "epoch": 97.96380090497738, "grad_norm": 0.9717264175415039, "learning_rate": 0.001, "loss": 0.3358, "step": 303100 }, { "epoch": 97.99612152553328, "grad_norm": 1.0108492374420166, "learning_rate": 0.001, "loss": 0.3374, "step": 303200 }, { "epoch": 98.0284421460892, "grad_norm": 1.0328601598739624, "learning_rate": 0.001, "loss": 0.2868, "step": 303300 }, { "epoch": 98.06076276664513, "grad_norm": 1.0747592449188232, "learning_rate": 0.001, "loss": 0.2853, "step": 303400 }, { "epoch": 98.09308338720103, "grad_norm": 1.1372816562652588, "learning_rate": 0.001, "loss": 0.2847, "step": 303500 }, { "epoch": 98.12540400775696, "grad_norm": 0.9705296158790588, "learning_rate": 0.001, "loss": 0.2886, "step": 303600 }, { "epoch": 98.15772462831286, "grad_norm": 1.2798770666122437, "learning_rate": 0.001, "loss": 0.2916, "step": 303700 }, { "epoch": 98.19004524886878, "grad_norm": 1.0893833637237549, "learning_rate": 0.001, "loss": 0.2946, "step": 303800 }, { "epoch": 98.22236586942469, "grad_norm": 1.0411652326583862, "learning_rate": 0.001, "loss": 0.2939, "step": 303900 }, { "epoch": 98.25468648998061, "grad_norm": 0.918574333190918, "learning_rate": 0.001, "loss": 0.2924, "step": 304000 }, { "epoch": 98.28700711053652, "grad_norm": 1.0690749883651733, "learning_rate": 0.001, "loss": 0.2972, "step": 304100 }, { "epoch": 98.31932773109244, "grad_norm": 1.015520691871643, "learning_rate": 0.001, "loss": 0.2974, "step": 304200 }, { "epoch": 98.35164835164835, "grad_norm": 0.8617237210273743, "learning_rate": 0.001, "loss": 0.3061, "step": 304300 }, { "epoch": 98.38396897220427, "grad_norm": 0.9893969893455505, "learning_rate": 0.001, "loss": 0.3068, "step": 304400 }, { "epoch": 98.41628959276018, "grad_norm": 0.9186530113220215, "learning_rate": 0.001, "loss": 0.302, "step": 304500 }, { "epoch": 98.4486102133161, "grad_norm": 0.818869411945343, "learning_rate": 0.001, "loss": 0.317, "step": 304600 }, { "epoch": 98.48093083387201, "grad_norm": 1.182711124420166, "learning_rate": 0.001, "loss": 0.3051, "step": 304700 }, { "epoch": 98.51325145442793, "grad_norm": 0.9321359395980835, "learning_rate": 0.001, "loss": 0.317, "step": 304800 }, { "epoch": 98.54557207498384, "grad_norm": 0.9476949572563171, "learning_rate": 0.001, "loss": 0.314, "step": 304900 }, { "epoch": 98.57789269553976, "grad_norm": 1.0242886543273926, "learning_rate": 0.001, "loss": 0.3157, "step": 305000 }, { "epoch": 98.61021331609567, "grad_norm": 0.9900296330451965, "learning_rate": 0.001, "loss": 0.3186, "step": 305100 }, { "epoch": 98.64253393665159, "grad_norm": 0.932622492313385, "learning_rate": 0.001, "loss": 0.3192, "step": 305200 }, { "epoch": 98.6748545572075, "grad_norm": 1.31930673122406, "learning_rate": 0.001, "loss": 0.3208, "step": 305300 }, { "epoch": 98.70717517776342, "grad_norm": 1.0266984701156616, "learning_rate": 0.001, "loss": 0.3177, "step": 305400 }, { "epoch": 98.73949579831933, "grad_norm": 1.1642436981201172, "learning_rate": 0.001, "loss": 0.3283, "step": 305500 }, { "epoch": 98.77181641887525, "grad_norm": 1.2960577011108398, "learning_rate": 0.001, "loss": 0.331, "step": 305600 }, { "epoch": 98.80413703943115, "grad_norm": 0.9979022145271301, "learning_rate": 0.001, "loss": 0.3243, "step": 305700 }, { "epoch": 98.83645765998708, "grad_norm": 0.9198813438415527, "learning_rate": 0.001, "loss": 0.3251, "step": 305800 }, { "epoch": 98.86877828054298, "grad_norm": 0.9677019119262695, "learning_rate": 0.001, "loss": 0.3259, "step": 305900 }, { "epoch": 98.9010989010989, "grad_norm": 1.3916834592819214, "learning_rate": 0.001, "loss": 0.3252, "step": 306000 }, { "epoch": 98.93341952165481, "grad_norm": 0.9493709206581116, "learning_rate": 0.001, "loss": 0.3317, "step": 306100 }, { "epoch": 98.96574014221073, "grad_norm": 1.203032374382019, "learning_rate": 0.001, "loss": 0.3304, "step": 306200 }, { "epoch": 98.99806076276664, "grad_norm": 0.9089868664741516, "learning_rate": 0.001, "loss": 0.333, "step": 306300 }, { "epoch": 99.03038138332256, "grad_norm": 0.9423157572746277, "learning_rate": 0.001, "loss": 0.2819, "step": 306400 }, { "epoch": 99.06270200387847, "grad_norm": 0.8564140200614929, "learning_rate": 0.001, "loss": 0.2825, "step": 306500 }, { "epoch": 99.09502262443439, "grad_norm": 0.927879810333252, "learning_rate": 0.001, "loss": 0.2838, "step": 306600 }, { "epoch": 99.1273432449903, "grad_norm": 0.994034469127655, "learning_rate": 0.001, "loss": 0.284, "step": 306700 }, { "epoch": 99.15966386554622, "grad_norm": 1.0891473293304443, "learning_rate": 0.001, "loss": 0.2877, "step": 306800 }, { "epoch": 99.19198448610213, "grad_norm": 0.8748770952224731, "learning_rate": 0.001, "loss": 0.2938, "step": 306900 }, { "epoch": 99.22430510665805, "grad_norm": 0.7188429236412048, "learning_rate": 0.001, "loss": 0.2928, "step": 307000 }, { "epoch": 99.25662572721396, "grad_norm": 0.8335596919059753, "learning_rate": 0.001, "loss": 0.2933, "step": 307100 }, { "epoch": 99.28894634776988, "grad_norm": 1.0737080574035645, "learning_rate": 0.001, "loss": 0.3014, "step": 307200 }, { "epoch": 99.32126696832579, "grad_norm": 0.8751986622810364, "learning_rate": 0.001, "loss": 0.2998, "step": 307300 }, { "epoch": 99.35358758888171, "grad_norm": 0.8792111873626709, "learning_rate": 0.001, "loss": 0.2979, "step": 307400 }, { "epoch": 99.38590820943762, "grad_norm": 0.8787132501602173, "learning_rate": 0.001, "loss": 0.309, "step": 307500 }, { "epoch": 99.41822882999354, "grad_norm": 0.9008506536483765, "learning_rate": 0.001, "loss": 0.3032, "step": 307600 }, { "epoch": 99.45054945054945, "grad_norm": 1.0287768840789795, "learning_rate": 0.001, "loss": 0.3021, "step": 307700 }, { "epoch": 99.48287007110537, "grad_norm": 0.9312304854393005, "learning_rate": 0.001, "loss": 0.3126, "step": 307800 }, { "epoch": 99.51519069166127, "grad_norm": 1.1244314908981323, "learning_rate": 0.001, "loss": 0.3064, "step": 307900 }, { "epoch": 99.5475113122172, "grad_norm": 0.9401556253433228, "learning_rate": 0.001, "loss": 0.3086, "step": 308000 }, { "epoch": 99.5798319327731, "grad_norm": 0.8846582174301147, "learning_rate": 0.001, "loss": 0.31, "step": 308100 }, { "epoch": 99.61215255332903, "grad_norm": 1.1068730354309082, "learning_rate": 0.001, "loss": 0.3103, "step": 308200 }, { "epoch": 99.64447317388493, "grad_norm": 0.9638748168945312, "learning_rate": 0.001, "loss": 0.315, "step": 308300 }, { "epoch": 99.67679379444085, "grad_norm": 0.9338170289993286, "learning_rate": 0.001, "loss": 0.3211, "step": 308400 }, { "epoch": 99.70911441499676, "grad_norm": 1.134093999862671, "learning_rate": 0.001, "loss": 0.3171, "step": 308500 }, { "epoch": 99.74143503555268, "grad_norm": 0.8674108982086182, "learning_rate": 0.001, "loss": 0.3199, "step": 308600 }, { "epoch": 99.77375565610859, "grad_norm": 1.1616936922073364, "learning_rate": 0.001, "loss": 0.3192, "step": 308700 }, { "epoch": 99.80607627666451, "grad_norm": 1.20745849609375, "learning_rate": 0.001, "loss": 0.323, "step": 308800 }, { "epoch": 99.83839689722042, "grad_norm": 0.912433922290802, "learning_rate": 0.001, "loss": 0.324, "step": 308900 }, { "epoch": 99.87071751777634, "grad_norm": 0.9443680047988892, "learning_rate": 0.001, "loss": 0.3264, "step": 309000 }, { "epoch": 99.90303813833225, "grad_norm": 1.096807837486267, "learning_rate": 0.001, "loss": 0.3257, "step": 309100 }, { "epoch": 99.93535875888817, "grad_norm": 0.8975643515586853, "learning_rate": 0.001, "loss": 0.3247, "step": 309200 }, { "epoch": 99.96767937944408, "grad_norm": 1.0424891710281372, "learning_rate": 0.001, "loss": 0.3254, "step": 309300 }, { "epoch": 100.0, "grad_norm": 1.1122369766235352, "learning_rate": 0.001, "loss": 0.32, "step": 309400 }, { "epoch": 100.0, "step": 309400, "total_flos": 1.3416077993472e+18, "train_loss": 0.00946761073955198, "train_runtime": 2056.1068, "train_samples_per_second": 4814.925, "train_steps_per_second": 150.479 } ], "logging_steps": 100, "max_steps": 309400, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3416077993472e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }