{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 20000, "global_step": 309400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03232062055591468, "grad_norm": 72.69493865966797, "learning_rate": 9.900000000000002e-06, "loss": 4.6926, "step": 100 }, { "epoch": 0.06464124111182935, "grad_norm": 23.135353088378906, "learning_rate": 1.9900000000000003e-05, "loss": 4.2515, "step": 200 }, { "epoch": 0.09696186166774402, "grad_norm": 11.328283309936523, "learning_rate": 2.9900000000000002e-05, "loss": 4.0864, "step": 300 }, { "epoch": 0.1292824822236587, "grad_norm": 2.6657352447509766, "learning_rate": 3.99e-05, "loss": 4.0225, "step": 400 }, { "epoch": 0.16160310277957338, "grad_norm": 2.3429675102233887, "learning_rate": 4.99e-05, "loss": 3.983, "step": 500 }, { "epoch": 0.19392372333548805, "grad_norm": 1.887925148010254, "learning_rate": 5.9900000000000006e-05, "loss": 3.9677, "step": 600 }, { "epoch": 0.22624434389140272, "grad_norm": 2.880443811416626, "learning_rate": 6.99e-05, "loss": 3.9613, "step": 700 }, { "epoch": 0.2585649644473174, "grad_norm": 2.1577000617980957, "learning_rate": 7.99e-05, "loss": 3.9546, "step": 800 }, { "epoch": 0.2908855850032321, "grad_norm": 1.9247859716415405, "learning_rate": 8.989999999999999e-05, "loss": 3.9181, "step": 900 }, { "epoch": 0.32320620555914675, "grad_norm": 2.573133945465088, "learning_rate": 9.99e-05, "loss": 3.9221, "step": 1000 }, { "epoch": 0.3555268261150614, "grad_norm": 1.9989290237426758, "learning_rate": 0.0001099, "loss": 3.8933, "step": 1100 }, { "epoch": 0.3878474466709761, "grad_norm": 2.2814531326293945, "learning_rate": 0.00011990000000000001, "loss": 3.8719, "step": 1200 }, { "epoch": 0.42016806722689076, "grad_norm": 2.458432197570801, "learning_rate": 0.00012989999999999999, "loss": 3.8443, "step": 1300 }, { "epoch": 0.45248868778280543, "grad_norm": 2.0653417110443115, "learning_rate": 0.0001399, "loss": 3.8193, "step": 1400 }, { "epoch": 0.4848093083387201, "grad_norm": 2.419416666030884, "learning_rate": 0.0001499, "loss": 3.7737, "step": 1500 }, { "epoch": 0.5171299288946348, "grad_norm": 2.163240909576416, "learning_rate": 0.00015989999999999998, "loss": 3.7631, "step": 1600 }, { "epoch": 0.5494505494505495, "grad_norm": 2.2348344326019287, "learning_rate": 0.0001699, "loss": 3.772, "step": 1700 }, { "epoch": 0.5817711700064642, "grad_norm": 2.0993492603302, "learning_rate": 0.0001799, "loss": 3.7321, "step": 1800 }, { "epoch": 0.6140917905623788, "grad_norm": 2.437217950820923, "learning_rate": 0.0001899, "loss": 3.7403, "step": 1900 }, { "epoch": 0.6464124111182935, "grad_norm": 2.181126356124878, "learning_rate": 0.0001999, "loss": 3.6949, "step": 2000 }, { "epoch": 0.6787330316742082, "grad_norm": 2.3080005645751953, "learning_rate": 0.0002099, "loss": 3.6878, "step": 2100 }, { "epoch": 0.7110536522301228, "grad_norm": 2.473737955093384, "learning_rate": 0.0002199, "loss": 3.6674, "step": 2200 }, { "epoch": 0.7433742727860375, "grad_norm": 2.345834970474243, "learning_rate": 0.0002299, "loss": 3.6612, "step": 2300 }, { "epoch": 0.7756948933419522, "grad_norm": 2.674607753753662, "learning_rate": 0.0002399, "loss": 3.6347, "step": 2400 }, { "epoch": 0.8080155138978669, "grad_norm": 2.227604389190674, "learning_rate": 0.0002499, "loss": 3.6095, "step": 2500 }, { "epoch": 0.8403361344537815, "grad_norm": 2.51373553276062, "learning_rate": 0.00025990000000000003, "loss": 3.6329, "step": 2600 }, { "epoch": 0.8726567550096962, "grad_norm": 2.00618314743042, "learning_rate": 0.0002699, "loss": 3.5774, "step": 2700 }, { "epoch": 0.9049773755656109, "grad_norm": 2.3344666957855225, "learning_rate": 0.0002799, "loss": 3.5739, "step": 2800 }, { "epoch": 0.9372979961215255, "grad_norm": 5.624629974365234, "learning_rate": 0.0002899, "loss": 3.5559, "step": 2900 }, { "epoch": 0.9696186166774402, "grad_norm": 2.345763683319092, "learning_rate": 0.0002999, "loss": 3.5348, "step": 3000 }, { "epoch": 1.0019392372333549, "grad_norm": 1.3896344900131226, "learning_rate": 0.0003099, "loss": 3.5449, "step": 3100 }, { "epoch": 1.0342598577892697, "grad_norm": 1.5742976665496826, "learning_rate": 0.0003199, "loss": 3.4849, "step": 3200 }, { "epoch": 1.0665804783451842, "grad_norm": 2.1341753005981445, "learning_rate": 0.00032990000000000005, "loss": 3.4951, "step": 3300 }, { "epoch": 1.098901098901099, "grad_norm": 1.2879307270050049, "learning_rate": 0.00033989999999999997, "loss": 3.4841, "step": 3400 }, { "epoch": 1.1312217194570136, "grad_norm": 1.4358031749725342, "learning_rate": 0.0003499, "loss": 3.4535, "step": 3500 }, { "epoch": 1.1635423400129283, "grad_norm": 1.1226449012756348, "learning_rate": 0.0003599, "loss": 3.4809, "step": 3600 }, { "epoch": 1.195862960568843, "grad_norm": 1.422311544418335, "learning_rate": 0.0003699, "loss": 3.471, "step": 3700 }, { "epoch": 1.2281835811247577, "grad_norm": 1.0241137742996216, "learning_rate": 0.0003799, "loss": 3.461, "step": 3800 }, { "epoch": 1.2605042016806722, "grad_norm": 1.6753121614456177, "learning_rate": 0.00038990000000000004, "loss": 3.4675, "step": 3900 }, { "epoch": 1.292824822236587, "grad_norm": 1.6611377000808716, "learning_rate": 0.00039989999999999996, "loss": 3.4455, "step": 4000 }, { "epoch": 1.3251454427925016, "grad_norm": 1.438313603401184, "learning_rate": 0.0004099, "loss": 3.4321, "step": 4100 }, { "epoch": 1.3574660633484164, "grad_norm": 1.1133291721343994, "learning_rate": 0.0004199, "loss": 3.438, "step": 4200 }, { "epoch": 1.389786683904331, "grad_norm": 1.128862738609314, "learning_rate": 0.0004299, "loss": 3.4298, "step": 4300 }, { "epoch": 1.4221073044602457, "grad_norm": 1.1996052265167236, "learning_rate": 0.0004399, "loss": 3.4292, "step": 4400 }, { "epoch": 1.4544279250161603, "grad_norm": 1.7385873794555664, "learning_rate": 0.00044990000000000004, "loss": 3.4226, "step": 4500 }, { "epoch": 1.486748545572075, "grad_norm": 1.5057859420776367, "learning_rate": 0.0004599, "loss": 3.4023, "step": 4600 }, { "epoch": 1.5190691661279896, "grad_norm": 1.4652315378189087, "learning_rate": 0.0004699, "loss": 3.4124, "step": 4700 }, { "epoch": 1.5513897866839044, "grad_norm": 1.0963859558105469, "learning_rate": 0.0004799, "loss": 3.3926, "step": 4800 }, { "epoch": 1.5837104072398192, "grad_norm": 1.2108802795410156, "learning_rate": 0.0004899, "loss": 3.4172, "step": 4900 }, { "epoch": 1.6160310277957337, "grad_norm": 1.786129355430603, "learning_rate": 0.0004999000000000001, "loss": 3.4146, "step": 5000 }, { "epoch": 1.6483516483516483, "grad_norm": 63.912750244140625, "learning_rate": 0.0005099, "loss": 3.3999, "step": 5100 }, { "epoch": 1.680672268907563, "grad_norm": 1.1686525344848633, "learning_rate": 0.0005199, "loss": 3.397, "step": 5200 }, { "epoch": 1.7129928894634778, "grad_norm": 1.364325761795044, "learning_rate": 0.0005299, "loss": 3.3958, "step": 5300 }, { "epoch": 1.7453135100193924, "grad_norm": 1.1780201196670532, "learning_rate": 0.0005399000000000001, "loss": 3.4028, "step": 5400 }, { "epoch": 1.777634130575307, "grad_norm": 1.065304160118103, "learning_rate": 0.0005499000000000001, "loss": 3.3808, "step": 5500 }, { "epoch": 1.8099547511312217, "grad_norm": 1.0597527027130127, "learning_rate": 0.0005599, "loss": 3.3706, "step": 5600 }, { "epoch": 1.8422753716871365, "grad_norm": 1.3476094007492065, "learning_rate": 0.0005698999999999999, "loss": 3.3688, "step": 5700 }, { "epoch": 1.874595992243051, "grad_norm": 2.879058599472046, "learning_rate": 0.0005799, "loss": 3.4072, "step": 5800 }, { "epoch": 1.9069166127989656, "grad_norm": 1.1472113132476807, "learning_rate": 0.0005899, "loss": 3.3886, "step": 5900 }, { "epoch": 1.9392372333548804, "grad_norm": 1.2379556894302368, "learning_rate": 0.0005999, "loss": 3.3785, "step": 6000 }, { "epoch": 1.9715578539107952, "grad_norm": 1.6195952892303467, "learning_rate": 0.0006099, "loss": 3.3841, "step": 6100 }, { "epoch": 2.0038784744667097, "grad_norm": 0.9655342102050781, "learning_rate": 0.0006199, "loss": 3.3795, "step": 6200 }, { "epoch": 2.0361990950226243, "grad_norm": 1.3918567895889282, "learning_rate": 0.0006299000000000001, "loss": 3.3073, "step": 6300 }, { "epoch": 2.0685197155785393, "grad_norm": 8.419848442077637, "learning_rate": 0.0006399, "loss": 3.3047, "step": 6400 }, { "epoch": 2.100840336134454, "grad_norm": 1.0073812007904053, "learning_rate": 0.0006499, "loss": 3.3054, "step": 6500 }, { "epoch": 2.1331609566903684, "grad_norm": 1.4536677598953247, "learning_rate": 0.0006599, "loss": 3.3051, "step": 6600 }, { "epoch": 2.165481577246283, "grad_norm": 1.4026859998703003, "learning_rate": 0.0006699000000000001, "loss": 3.3085, "step": 6700 }, { "epoch": 2.197802197802198, "grad_norm": 0.9917745590209961, "learning_rate": 0.0006799, "loss": 3.2987, "step": 6800 }, { "epoch": 2.2301228183581125, "grad_norm": 0.9140070676803589, "learning_rate": 0.0006899, "loss": 3.2913, "step": 6900 }, { "epoch": 2.262443438914027, "grad_norm": 1.244673728942871, "learning_rate": 0.0006998999999999999, "loss": 3.3051, "step": 7000 }, { "epoch": 2.2947640594699417, "grad_norm": 1.1538276672363281, "learning_rate": 0.0007099, "loss": 3.2893, "step": 7100 }, { "epoch": 2.3270846800258567, "grad_norm": 1.0714364051818848, "learning_rate": 0.0007199, "loss": 3.3056, "step": 7200 }, { "epoch": 2.3594053005817712, "grad_norm": 1.0838427543640137, "learning_rate": 0.0007299, "loss": 3.2883, "step": 7300 }, { "epoch": 2.391725921137686, "grad_norm": 1.9484409093856812, "learning_rate": 0.0007399, "loss": 3.2912, "step": 7400 }, { "epoch": 2.4240465416936003, "grad_norm": 0.8263738751411438, "learning_rate": 0.0007499000000000001, "loss": 3.3058, "step": 7500 }, { "epoch": 2.4563671622495153, "grad_norm": 1.0178207159042358, "learning_rate": 0.0007599, "loss": 3.3047, "step": 7600 }, { "epoch": 2.48868778280543, "grad_norm": 1.4200830459594727, "learning_rate": 0.0007699, "loss": 3.2972, "step": 7700 }, { "epoch": 2.5210084033613445, "grad_norm": 1.1275110244750977, "learning_rate": 0.0007799, "loss": 3.2755, "step": 7800 }, { "epoch": 2.553329023917259, "grad_norm": 4.5084757804870605, "learning_rate": 0.0007899000000000001, "loss": 3.263, "step": 7900 }, { "epoch": 2.585649644473174, "grad_norm": 1.0436567068099976, "learning_rate": 0.0007999000000000001, "loss": 3.3055, "step": 8000 }, { "epoch": 2.6179702650290886, "grad_norm": 1.3180861473083496, "learning_rate": 0.0008099, "loss": 3.3036, "step": 8100 }, { "epoch": 2.650290885585003, "grad_norm": 30.407550811767578, "learning_rate": 0.0008198999999999999, "loss": 3.2697, "step": 8200 }, { "epoch": 2.682611506140918, "grad_norm": 1.135068416595459, "learning_rate": 0.0008299, "loss": 3.2863, "step": 8300 }, { "epoch": 2.7149321266968327, "grad_norm": 1.2667639255523682, "learning_rate": 0.0008399, "loss": 3.2935, "step": 8400 }, { "epoch": 2.7472527472527473, "grad_norm": 1.0069563388824463, "learning_rate": 0.0008499, "loss": 3.2845, "step": 8500 }, { "epoch": 2.779573367808662, "grad_norm": 1.5233217477798462, "learning_rate": 0.0008599, "loss": 3.2746, "step": 8600 }, { "epoch": 2.8118939883645764, "grad_norm": 1.8134593963623047, "learning_rate": 0.0008699000000000001, "loss": 3.2626, "step": 8700 }, { "epoch": 2.8442146089204914, "grad_norm": 0.9406353831291199, "learning_rate": 0.0008799000000000001, "loss": 3.2745, "step": 8800 }, { "epoch": 2.876535229476406, "grad_norm": 1.378232717514038, "learning_rate": 0.0008899, "loss": 3.2848, "step": 8900 }, { "epoch": 2.9088558500323205, "grad_norm": 0.962679386138916, "learning_rate": 0.0008999, "loss": 3.2617, "step": 9000 }, { "epoch": 2.9411764705882355, "grad_norm": 1.2002023458480835, "learning_rate": 0.0009099, "loss": 3.283, "step": 9100 }, { "epoch": 2.97349709114415, "grad_norm": 1.681128978729248, "learning_rate": 0.0009199000000000001, "loss": 3.2808, "step": 9200 }, { "epoch": 3.0058177117000646, "grad_norm": 0.9787313342094421, "learning_rate": 0.0009299, "loss": 3.271, "step": 9300 }, { "epoch": 3.038138332255979, "grad_norm": 1.0612725019454956, "learning_rate": 0.0009399, "loss": 3.18, "step": 9400 }, { "epoch": 3.070458952811894, "grad_norm": 1.0846830606460571, "learning_rate": 0.0009498999999999999, "loss": 3.1897, "step": 9500 }, { "epoch": 3.1027795733678087, "grad_norm": 1.2514480352401733, "learning_rate": 0.0009599, "loss": 3.1937, "step": 9600 }, { "epoch": 3.1351001939237233, "grad_norm": 1.2659525871276855, "learning_rate": 0.0009699, "loss": 3.1909, "step": 9700 }, { "epoch": 3.167420814479638, "grad_norm": 0.9239290952682495, "learning_rate": 0.0009799, "loss": 3.1885, "step": 9800 }, { "epoch": 3.199741435035553, "grad_norm": 1.0374817848205566, "learning_rate": 0.0009899, "loss": 3.1727, "step": 9900 }, { "epoch": 3.2320620555914674, "grad_norm": 1.1210569143295288, "learning_rate": 0.0009999, "loss": 3.1989, "step": 10000 }, { "epoch": 3.264382676147382, "grad_norm": 1.262765645980835, "learning_rate": 0.001, "loss": 3.217, "step": 10100 }, { "epoch": 3.2967032967032965, "grad_norm": 1.2551616430282593, "learning_rate": 0.001, "loss": 3.2026, "step": 10200 }, { "epoch": 3.3290239172592115, "grad_norm": 0.8465852737426758, "learning_rate": 0.001, "loss": 3.1966, "step": 10300 }, { "epoch": 3.361344537815126, "grad_norm": 0.9038814306259155, "learning_rate": 0.001, "loss": 3.1873, "step": 10400 }, { "epoch": 3.3936651583710407, "grad_norm": 1.6032987833023071, "learning_rate": 0.001, "loss": 3.1883, "step": 10500 }, { "epoch": 3.425985778926955, "grad_norm": 4.427276611328125, "learning_rate": 0.001, "loss": 3.211, "step": 10600 }, { "epoch": 3.45830639948287, "grad_norm": 2.015230894088745, "learning_rate": 0.001, "loss": 3.2117, "step": 10700 }, { "epoch": 3.490627020038785, "grad_norm": 0.8382908701896667, "learning_rate": 0.001, "loss": 3.1997, "step": 10800 }, { "epoch": 3.5229476405946993, "grad_norm": 0.8825782537460327, "learning_rate": 0.001, "loss": 3.1928, "step": 10900 }, { "epoch": 3.555268261150614, "grad_norm": 1.9547151327133179, "learning_rate": 0.001, "loss": 3.1821, "step": 11000 }, { "epoch": 3.587588881706529, "grad_norm": 1.0269750356674194, "learning_rate": 0.001, "loss": 3.193, "step": 11100 }, { "epoch": 3.6199095022624435, "grad_norm": 1.2436543703079224, "learning_rate": 0.001, "loss": 3.1928, "step": 11200 }, { "epoch": 3.652230122818358, "grad_norm": 1.1015961170196533, "learning_rate": 0.001, "loss": 3.2153, "step": 11300 }, { "epoch": 3.684550743374273, "grad_norm": 1.053785800933838, "learning_rate": 0.001, "loss": 3.1885, "step": 11400 }, { "epoch": 3.7168713639301876, "grad_norm": 1.1661300659179688, "learning_rate": 0.001, "loss": 3.1657, "step": 11500 }, { "epoch": 3.749191984486102, "grad_norm": 0.9985815286636353, "learning_rate": 0.001, "loss": 3.1788, "step": 11600 }, { "epoch": 3.7815126050420167, "grad_norm": 0.9899930953979492, "learning_rate": 0.001, "loss": 3.1866, "step": 11700 }, { "epoch": 3.8138332255979313, "grad_norm": 1.0321563482284546, "learning_rate": 0.001, "loss": 3.1832, "step": 11800 }, { "epoch": 3.8461538461538463, "grad_norm": 1.0053410530090332, "learning_rate": 0.001, "loss": 3.1694, "step": 11900 }, { "epoch": 3.878474466709761, "grad_norm": 1.15951406955719, "learning_rate": 0.001, "loss": 3.1818, "step": 12000 }, { "epoch": 3.9107950872656754, "grad_norm": 1.1773817539215088, "learning_rate": 0.001, "loss": 3.1678, "step": 12100 }, { "epoch": 3.9431157078215904, "grad_norm": 0.9126469492912292, "learning_rate": 0.001, "loss": 3.1617, "step": 12200 }, { "epoch": 3.975436328377505, "grad_norm": 1.4895422458648682, "learning_rate": 0.001, "loss": 3.1937, "step": 12300 }, { "epoch": 4.0077569489334195, "grad_norm": 1.5335532426834106, "learning_rate": 0.001, "loss": 3.1699, "step": 12400 }, { "epoch": 4.040077569489334, "grad_norm": 0.8907751441001892, "learning_rate": 0.001, "loss": 3.0591, "step": 12500 }, { "epoch": 4.072398190045249, "grad_norm": 1.0643270015716553, "learning_rate": 0.001, "loss": 3.0627, "step": 12600 }, { "epoch": 4.104718810601163, "grad_norm": 1.6155329942703247, "learning_rate": 0.001, "loss": 3.0746, "step": 12700 }, { "epoch": 4.137039431157079, "grad_norm": 2.3686649799346924, "learning_rate": 0.001, "loss": 3.0695, "step": 12800 }, { "epoch": 4.169360051712993, "grad_norm": 0.955572783946991, "learning_rate": 0.001, "loss": 3.0727, "step": 12900 }, { "epoch": 4.201680672268908, "grad_norm": 1.466068148612976, "learning_rate": 0.001, "loss": 3.0651, "step": 13000 }, { "epoch": 4.234001292824822, "grad_norm": 1.0525376796722412, "learning_rate": 0.001, "loss": 3.0836, "step": 13100 }, { "epoch": 4.266321913380737, "grad_norm": 1.3606575727462769, "learning_rate": 0.001, "loss": 3.0703, "step": 13200 }, { "epoch": 4.298642533936651, "grad_norm": 0.9707290530204773, "learning_rate": 0.001, "loss": 3.0918, "step": 13300 }, { "epoch": 4.330963154492566, "grad_norm": 0.8722020983695984, "learning_rate": 0.001, "loss": 3.0852, "step": 13400 }, { "epoch": 4.3632837750484805, "grad_norm": 0.9314419031143188, "learning_rate": 0.001, "loss": 3.1113, "step": 13500 }, { "epoch": 4.395604395604396, "grad_norm": 0.9096336364746094, "learning_rate": 0.001, "loss": 3.0726, "step": 13600 }, { "epoch": 4.4279250161603105, "grad_norm": 1.0619395971298218, "learning_rate": 0.001, "loss": 3.0897, "step": 13700 }, { "epoch": 4.460245636716225, "grad_norm": 1.046873927116394, "learning_rate": 0.001, "loss": 3.0952, "step": 13800 }, { "epoch": 4.49256625727214, "grad_norm": 1.1861430406570435, "learning_rate": 0.001, "loss": 3.0967, "step": 13900 }, { "epoch": 4.524886877828054, "grad_norm": 1.0336763858795166, "learning_rate": 0.001, "loss": 3.089, "step": 14000 }, { "epoch": 4.557207498383969, "grad_norm": 0.9000735878944397, "learning_rate": 0.001, "loss": 3.0633, "step": 14100 }, { "epoch": 4.589528118939883, "grad_norm": 1.0032758712768555, "learning_rate": 0.001, "loss": 3.0821, "step": 14200 }, { "epoch": 4.621848739495798, "grad_norm": 1.038820743560791, "learning_rate": 0.001, "loss": 3.0841, "step": 14300 }, { "epoch": 4.654169360051713, "grad_norm": 1.0038261413574219, "learning_rate": 0.001, "loss": 3.0766, "step": 14400 }, { "epoch": 4.686489980607628, "grad_norm": 1.0693703889846802, "learning_rate": 0.001, "loss": 3.0799, "step": 14500 }, { "epoch": 4.7188106011635425, "grad_norm": 252.37478637695312, "learning_rate": 0.001, "loss": 3.092, "step": 14600 }, { "epoch": 4.751131221719457, "grad_norm": 1.0643776655197144, "learning_rate": 0.001, "loss": 3.0819, "step": 14700 }, { "epoch": 4.783451842275372, "grad_norm": 1.0738087892532349, "learning_rate": 0.001, "loss": 3.0926, "step": 14800 }, { "epoch": 4.815772462831286, "grad_norm": 1.2091130018234253, "learning_rate": 0.001, "loss": 3.0869, "step": 14900 }, { "epoch": 4.848093083387201, "grad_norm": 1.18547785282135, "learning_rate": 0.001, "loss": 3.047, "step": 15000 }, { "epoch": 4.880413703943116, "grad_norm": 0.8726029396057129, "learning_rate": 0.001, "loss": 3.0705, "step": 15100 }, { "epoch": 4.912734324499031, "grad_norm": 0.7934514284133911, "learning_rate": 0.001, "loss": 3.0795, "step": 15200 }, { "epoch": 4.945054945054945, "grad_norm": 0.7607043981552124, "learning_rate": 0.001, "loss": 3.0832, "step": 15300 }, { "epoch": 4.97737556561086, "grad_norm": 2.6883575916290283, "learning_rate": 0.001, "loss": 3.0787, "step": 15400 }, { "epoch": 5.009696186166774, "grad_norm": 1.5376274585723877, "learning_rate": 0.001, "loss": 3.0592, "step": 15500 }, { "epoch": 5.042016806722689, "grad_norm": 1.2259726524353027, "learning_rate": 0.001, "loss": 2.9604, "step": 15600 }, { "epoch": 5.0743374272786035, "grad_norm": 1.015201210975647, "learning_rate": 0.001, "loss": 2.964, "step": 15700 }, { "epoch": 5.106658047834518, "grad_norm": 1.4067490100860596, "learning_rate": 0.001, "loss": 2.9772, "step": 15800 }, { "epoch": 5.1389786683904335, "grad_norm": 1.2442225217819214, "learning_rate": 0.001, "loss": 2.9628, "step": 15900 }, { "epoch": 5.171299288946348, "grad_norm": 0.9024932980537415, "learning_rate": 0.001, "loss": 2.9917, "step": 16000 }, { "epoch": 5.203619909502263, "grad_norm": 2.2738070487976074, "learning_rate": 0.001, "loss": 2.973, "step": 16100 }, { "epoch": 5.235940530058177, "grad_norm": 0.9074735045433044, "learning_rate": 0.001, "loss": 2.9723, "step": 16200 }, { "epoch": 5.268261150614092, "grad_norm": 0.9674960374832153, "learning_rate": 0.001, "loss": 2.9836, "step": 16300 }, { "epoch": 5.300581771170006, "grad_norm": 1.4998130798339844, "learning_rate": 0.001, "loss": 2.9768, "step": 16400 }, { "epoch": 5.332902391725921, "grad_norm": 1.1380162239074707, "learning_rate": 0.001, "loss": 3.0141, "step": 16500 }, { "epoch": 5.365223012281835, "grad_norm": 0.9653527140617371, "learning_rate": 0.001, "loss": 3.0056, "step": 16600 }, { "epoch": 5.397543632837751, "grad_norm": 0.940452516078949, "learning_rate": 0.001, "loss": 2.99, "step": 16700 }, { "epoch": 5.429864253393665, "grad_norm": 0.9358466863632202, "learning_rate": 0.001, "loss": 3.0069, "step": 16800 }, { "epoch": 5.46218487394958, "grad_norm": 4.019763946533203, "learning_rate": 0.001, "loss": 3.005, "step": 16900 }, { "epoch": 5.4945054945054945, "grad_norm": 0.9941039085388184, "learning_rate": 0.001, "loss": 2.984, "step": 17000 }, { "epoch": 5.526826115061409, "grad_norm": 0.9077835083007812, "learning_rate": 0.001, "loss": 3.0108, "step": 17100 }, { "epoch": 5.559146735617324, "grad_norm": 1.0444730520248413, "learning_rate": 0.001, "loss": 2.9954, "step": 17200 }, { "epoch": 5.591467356173238, "grad_norm": 0.8790830969810486, "learning_rate": 0.001, "loss": 2.978, "step": 17300 }, { "epoch": 5.623787976729153, "grad_norm": 1.1472781896591187, "learning_rate": 0.001, "loss": 3.0051, "step": 17400 }, { "epoch": 5.656108597285068, "grad_norm": 0.9749560356140137, "learning_rate": 0.001, "loss": 2.9734, "step": 17500 }, { "epoch": 5.688429217840983, "grad_norm": 0.9285848140716553, "learning_rate": 0.001, "loss": 2.9993, "step": 17600 }, { "epoch": 5.720749838396897, "grad_norm": 0.7411068081855774, "learning_rate": 0.001, "loss": 2.9916, "step": 17700 }, { "epoch": 5.753070458952812, "grad_norm": 1.0168943405151367, "learning_rate": 0.001, "loss": 3.0097, "step": 17800 }, { "epoch": 5.785391079508726, "grad_norm": 0.7873669862747192, "learning_rate": 0.001, "loss": 3.003, "step": 17900 }, { "epoch": 5.817711700064641, "grad_norm": 0.9932910799980164, "learning_rate": 0.001, "loss": 2.9799, "step": 18000 }, { "epoch": 5.850032320620556, "grad_norm": 0.8969007134437561, "learning_rate": 0.001, "loss": 2.992, "step": 18100 }, { "epoch": 5.882352941176471, "grad_norm": 0.9084499478340149, "learning_rate": 0.001, "loss": 2.998, "step": 18200 }, { "epoch": 5.914673561732386, "grad_norm": 0.8776857852935791, "learning_rate": 0.001, "loss": 3.018, "step": 18300 }, { "epoch": 5.9469941822883, "grad_norm": 1.0515482425689697, "learning_rate": 0.001, "loss": 3.0213, "step": 18400 }, { "epoch": 5.979314802844215, "grad_norm": 1.1112173795700073, "learning_rate": 0.001, "loss": 3.0052, "step": 18500 }, { "epoch": 6.011635423400129, "grad_norm": 2.20530104637146, "learning_rate": 0.001, "loss": 2.9506, "step": 18600 }, { "epoch": 6.043956043956044, "grad_norm": 1.1793222427368164, "learning_rate": 0.001, "loss": 2.9143, "step": 18700 }, { "epoch": 6.076276664511958, "grad_norm": 0.9854251146316528, "learning_rate": 0.001, "loss": 2.9002, "step": 18800 }, { "epoch": 6.108597285067873, "grad_norm": 1.0617247819900513, "learning_rate": 0.001, "loss": 2.9038, "step": 18900 }, { "epoch": 6.140917905623788, "grad_norm": 0.8956011533737183, "learning_rate": 0.001, "loss": 2.8884, "step": 19000 }, { "epoch": 6.173238526179703, "grad_norm": 0.8987821936607361, "learning_rate": 0.001, "loss": 2.9108, "step": 19100 }, { "epoch": 6.2055591467356175, "grad_norm": 0.9038986563682556, "learning_rate": 0.001, "loss": 2.8917, "step": 19200 }, { "epoch": 6.237879767291532, "grad_norm": 1.015396237373352, "learning_rate": 0.001, "loss": 2.9112, "step": 19300 }, { "epoch": 6.270200387847447, "grad_norm": 0.901792049407959, "learning_rate": 0.001, "loss": 2.903, "step": 19400 }, { "epoch": 6.302521008403361, "grad_norm": 1.155234456062317, "learning_rate": 0.001, "loss": 2.912, "step": 19500 }, { "epoch": 6.334841628959276, "grad_norm": 0.8977019786834717, "learning_rate": 0.001, "loss": 2.9215, "step": 19600 }, { "epoch": 6.36716224951519, "grad_norm": 0.9986981749534607, "learning_rate": 0.001, "loss": 2.9189, "step": 19700 }, { "epoch": 6.399482870071106, "grad_norm": 1438.670654296875, "learning_rate": 0.001, "loss": 2.9012, "step": 19800 }, { "epoch": 6.43180349062702, "grad_norm": 1.1558160781860352, "learning_rate": 0.001, "loss": 2.9289, "step": 19900 }, { "epoch": 6.464124111182935, "grad_norm": 1.1606899499893188, "learning_rate": 0.001, "loss": 2.907, "step": 20000 }, { "epoch": 6.496444731738849, "grad_norm": 5.5601606369018555, "learning_rate": 0.001, "loss": 2.9271, "step": 20100 }, { "epoch": 6.528765352294764, "grad_norm": 1.1128451824188232, "learning_rate": 0.001, "loss": 2.9233, "step": 20200 }, { "epoch": 6.5610859728506785, "grad_norm": 1.1404609680175781, "learning_rate": 0.001, "loss": 2.9082, "step": 20300 }, { "epoch": 6.593406593406593, "grad_norm": 0.9506986737251282, "learning_rate": 0.001, "loss": 2.9442, "step": 20400 }, { "epoch": 6.625727213962508, "grad_norm": 2.65427565574646, "learning_rate": 0.001, "loss": 2.9238, "step": 20500 }, { "epoch": 6.658047834518423, "grad_norm": 0.9996835589408875, "learning_rate": 0.001, "loss": 2.9457, "step": 20600 }, { "epoch": 6.690368455074338, "grad_norm": 1.134716510772705, "learning_rate": 0.001, "loss": 2.9437, "step": 20700 }, { "epoch": 6.722689075630252, "grad_norm": 0.9267463684082031, "learning_rate": 0.001, "loss": 2.9292, "step": 20800 }, { "epoch": 6.755009696186167, "grad_norm": 0.9719293713569641, "learning_rate": 0.001, "loss": 2.9398, "step": 20900 }, { "epoch": 6.787330316742081, "grad_norm": 1.184998631477356, "learning_rate": 0.001, "loss": 2.9291, "step": 21000 }, { "epoch": 6.819650937297996, "grad_norm": 0.7128123044967651, "learning_rate": 0.001, "loss": 2.9187, "step": 21100 }, { "epoch": 6.85197155785391, "grad_norm": 1.079878330230713, "learning_rate": 0.001, "loss": 2.9517, "step": 21200 }, { "epoch": 6.884292178409826, "grad_norm": 1.0560288429260254, "learning_rate": 0.001, "loss": 2.9341, "step": 21300 }, { "epoch": 6.91661279896574, "grad_norm": 1.0774500370025635, "learning_rate": 0.001, "loss": 2.9286, "step": 21400 }, { "epoch": 6.948933419521655, "grad_norm": 0.9224665760993958, "learning_rate": 0.001, "loss": 2.9208, "step": 21500 }, { "epoch": 6.98125404007757, "grad_norm": 0.9871198534965515, "learning_rate": 0.001, "loss": 2.9373, "step": 21600 }, { "epoch": 7.013574660633484, "grad_norm": 1.299688696861267, "learning_rate": 0.001, "loss": 2.8893, "step": 21700 }, { "epoch": 7.045895281189399, "grad_norm": 1.1260560750961304, "learning_rate": 0.001, "loss": 2.8234, "step": 21800 }, { "epoch": 7.078215901745313, "grad_norm": 1.0508748292922974, "learning_rate": 0.001, "loss": 2.8097, "step": 21900 }, { "epoch": 7.110536522301228, "grad_norm": 1.3072657585144043, "learning_rate": 0.001, "loss": 2.8389, "step": 22000 }, { "epoch": 7.142857142857143, "grad_norm": 1.1243537664413452, "learning_rate": 0.001, "loss": 2.8245, "step": 22100 }, { "epoch": 7.175177763413058, "grad_norm": 1.061802864074707, "learning_rate": 0.001, "loss": 2.8225, "step": 22200 }, { "epoch": 7.207498383968972, "grad_norm": 2.5482921600341797, "learning_rate": 0.001, "loss": 2.8204, "step": 22300 }, { "epoch": 7.239819004524887, "grad_norm": 1.027732491493225, "learning_rate": 0.001, "loss": 2.8452, "step": 22400 }, { "epoch": 7.2721396250808015, "grad_norm": 0.9640640616416931, "learning_rate": 0.001, "loss": 2.8482, "step": 22500 }, { "epoch": 7.304460245636716, "grad_norm": 1.094141960144043, "learning_rate": 0.001, "loss": 2.8336, "step": 22600 }, { "epoch": 7.336780866192631, "grad_norm": 0.9503917098045349, "learning_rate": 0.001, "loss": 2.8513, "step": 22700 }, { "epoch": 7.369101486748546, "grad_norm": 1.3805400133132935, "learning_rate": 0.001, "loss": 2.8395, "step": 22800 }, { "epoch": 7.401422107304461, "grad_norm": 1.1830805540084839, "learning_rate": 0.001, "loss": 2.8373, "step": 22900 }, { "epoch": 7.433742727860375, "grad_norm": 3.162130832672119, "learning_rate": 0.001, "loss": 2.8813, "step": 23000 }, { "epoch": 7.46606334841629, "grad_norm": 1.2394567728042603, "learning_rate": 0.001, "loss": 2.8736, "step": 23100 }, { "epoch": 7.498383968972204, "grad_norm": 1.3402519226074219, "learning_rate": 0.001, "loss": 2.8861, "step": 23200 }, { "epoch": 7.530704589528119, "grad_norm": 1.2871930599212646, "learning_rate": 0.001, "loss": 2.8679, "step": 23300 }, { "epoch": 7.563025210084033, "grad_norm": 1.3503236770629883, "learning_rate": 0.001, "loss": 2.883, "step": 23400 }, { "epoch": 7.595345830639948, "grad_norm": 1.0640761852264404, "learning_rate": 0.001, "loss": 2.8829, "step": 23500 }, { "epoch": 7.6276664511958625, "grad_norm": 1.1721611022949219, "learning_rate": 0.001, "loss": 2.8622, "step": 23600 }, { "epoch": 7.659987071751778, "grad_norm": 1.4284518957138062, "learning_rate": 0.001, "loss": 2.8824, "step": 23700 }, { "epoch": 7.6923076923076925, "grad_norm": 0.8941483497619629, "learning_rate": 0.001, "loss": 2.8916, "step": 23800 }, { "epoch": 7.724628312863607, "grad_norm": 1.0220887660980225, "learning_rate": 0.001, "loss": 2.8648, "step": 23900 }, { "epoch": 7.756948933419522, "grad_norm": 1.0884076356887817, "learning_rate": 0.001, "loss": 2.8879, "step": 24000 }, { "epoch": 7.789269553975436, "grad_norm": 2.3624370098114014, "learning_rate": 0.001, "loss": 2.8882, "step": 24100 }, { "epoch": 7.821590174531351, "grad_norm": 1.3170629739761353, "learning_rate": 0.001, "loss": 2.8795, "step": 24200 }, { "epoch": 7.853910795087265, "grad_norm": 1.104962706565857, "learning_rate": 0.001, "loss": 2.8914, "step": 24300 }, { "epoch": 7.886231415643181, "grad_norm": 44.59548568725586, "learning_rate": 0.001, "loss": 2.8754, "step": 24400 }, { "epoch": 7.918552036199095, "grad_norm": 1.1567580699920654, "learning_rate": 0.001, "loss": 2.8968, "step": 24500 }, { "epoch": 7.95087265675501, "grad_norm": 1.4284254312515259, "learning_rate": 0.001, "loss": 2.8623, "step": 24600 }, { "epoch": 7.983193277310924, "grad_norm": 1.011319875717163, "learning_rate": 0.001, "loss": 2.8714, "step": 24700 }, { "epoch": 8.015513897866839, "grad_norm": 1.562037706375122, "learning_rate": 0.001, "loss": 2.809, "step": 24800 }, { "epoch": 8.047834518422754, "grad_norm": 1.4347589015960693, "learning_rate": 0.001, "loss": 2.7666, "step": 24900 }, { "epoch": 8.080155138978668, "grad_norm": 1.0763415098190308, "learning_rate": 0.001, "loss": 2.771, "step": 25000 }, { "epoch": 8.112475759534583, "grad_norm": 1.4732086658477783, "learning_rate": 0.001, "loss": 2.7833, "step": 25100 }, { "epoch": 8.144796380090497, "grad_norm": 1.2230743169784546, "learning_rate": 0.001, "loss": 2.7798, "step": 25200 }, { "epoch": 8.177117000646412, "grad_norm": 1.3164935111999512, "learning_rate": 0.001, "loss": 2.7901, "step": 25300 }, { "epoch": 8.209437621202326, "grad_norm": 1.651221752166748, "learning_rate": 0.001, "loss": 2.7823, "step": 25400 }, { "epoch": 8.241758241758241, "grad_norm": 1.7760975360870361, "learning_rate": 0.001, "loss": 2.7796, "step": 25500 }, { "epoch": 8.274078862314157, "grad_norm": 1.6292158365249634, "learning_rate": 0.001, "loss": 2.8132, "step": 25600 }, { "epoch": 8.306399482870072, "grad_norm": 1.5424258708953857, "learning_rate": 0.001, "loss": 2.8147, "step": 25700 }, { "epoch": 8.338720103425986, "grad_norm": 1.454942226409912, "learning_rate": 0.001, "loss": 2.8129, "step": 25800 }, { "epoch": 8.371040723981901, "grad_norm": 1.2529276609420776, "learning_rate": 0.001, "loss": 2.7698, "step": 25900 }, { "epoch": 8.403361344537815, "grad_norm": 1.2751734256744385, "learning_rate": 0.001, "loss": 2.8021, "step": 26000 }, { "epoch": 8.43568196509373, "grad_norm": 1.1924549341201782, "learning_rate": 0.001, "loss": 2.8018, "step": 26100 }, { "epoch": 8.468002585649645, "grad_norm": 1.8647762537002563, "learning_rate": 0.001, "loss": 2.8107, "step": 26200 }, { "epoch": 8.50032320620556, "grad_norm": 1.6496800184249878, "learning_rate": 0.001, "loss": 2.837, "step": 26300 }, { "epoch": 8.532643826761474, "grad_norm": 1.042580008506775, "learning_rate": 0.001, "loss": 2.8345, "step": 26400 }, { "epoch": 8.564964447317388, "grad_norm": 1.2232996225357056, "learning_rate": 0.001, "loss": 2.7988, "step": 26500 }, { "epoch": 8.597285067873303, "grad_norm": 1.518761157989502, "learning_rate": 0.001, "loss": 2.8227, "step": 26600 }, { "epoch": 8.629605688429217, "grad_norm": 1.4686894416809082, "learning_rate": 0.001, "loss": 2.8108, "step": 26700 }, { "epoch": 8.661926308985132, "grad_norm": 1.3486344814300537, "learning_rate": 0.001, "loss": 2.8269, "step": 26800 }, { "epoch": 8.694246929541046, "grad_norm": 1.5729914903640747, "learning_rate": 0.001, "loss": 2.8321, "step": 26900 }, { "epoch": 8.726567550096961, "grad_norm": 1.305612325668335, "learning_rate": 0.001, "loss": 2.8338, "step": 27000 }, { "epoch": 8.758888170652877, "grad_norm": 1.666892170906067, "learning_rate": 0.001, "loss": 2.8105, "step": 27100 }, { "epoch": 8.791208791208792, "grad_norm": 1.133399486541748, "learning_rate": 0.001, "loss": 2.8201, "step": 27200 }, { "epoch": 8.823529411764707, "grad_norm": 1.2935371398925781, "learning_rate": 0.001, "loss": 2.8318, "step": 27300 }, { "epoch": 8.855850032320621, "grad_norm": 1.4941588640213013, "learning_rate": 0.001, "loss": 2.8321, "step": 27400 }, { "epoch": 8.888170652876536, "grad_norm": 1.3890061378479004, "learning_rate": 0.001, "loss": 2.8419, "step": 27500 }, { "epoch": 8.92049127343245, "grad_norm": 1.5180507898330688, "learning_rate": 0.001, "loss": 2.8405, "step": 27600 }, { "epoch": 8.952811893988365, "grad_norm": 1.3410158157348633, "learning_rate": 0.001, "loss": 2.8431, "step": 27700 }, { "epoch": 8.98513251454428, "grad_norm": 1.2363744974136353, "learning_rate": 0.001, "loss": 2.8369, "step": 27800 }, { "epoch": 9.017453135100194, "grad_norm": 2.7910056114196777, "learning_rate": 0.001, "loss": 2.7654, "step": 27900 }, { "epoch": 9.049773755656108, "grad_norm": 1.4430453777313232, "learning_rate": 0.001, "loss": 2.6972, "step": 28000 }, { "epoch": 9.082094376212023, "grad_norm": 1.5520833730697632, "learning_rate": 0.001, "loss": 2.736, "step": 28100 }, { "epoch": 9.114414996767938, "grad_norm": 2.4241955280303955, "learning_rate": 0.001, "loss": 2.6983, "step": 28200 }, { "epoch": 9.146735617323852, "grad_norm": 2.456726551055908, "learning_rate": 0.001, "loss": 2.7524, "step": 28300 }, { "epoch": 9.179056237879767, "grad_norm": 1.9772100448608398, "learning_rate": 0.001, "loss": 2.736, "step": 28400 }, { "epoch": 9.211376858435681, "grad_norm": 1.5703206062316895, "learning_rate": 0.001, "loss": 2.7425, "step": 28500 }, { "epoch": 9.243697478991596, "grad_norm": 2.0594890117645264, "learning_rate": 0.001, "loss": 2.7448, "step": 28600 }, { "epoch": 9.276018099547512, "grad_norm": 1.5000182390213013, "learning_rate": 0.001, "loss": 2.7747, "step": 28700 }, { "epoch": 9.308338720103427, "grad_norm": 1.3302167654037476, "learning_rate": 0.001, "loss": 2.7544, "step": 28800 }, { "epoch": 9.340659340659341, "grad_norm": 1.160788893699646, "learning_rate": 0.001, "loss": 2.7528, "step": 28900 }, { "epoch": 9.372979961215256, "grad_norm": 1.709227204322815, "learning_rate": 0.001, "loss": 2.7599, "step": 29000 }, { "epoch": 9.40530058177117, "grad_norm": 3.8329715728759766, "learning_rate": 0.001, "loss": 2.7545, "step": 29100 }, { "epoch": 9.437621202327085, "grad_norm": 2.1938230991363525, "learning_rate": 0.001, "loss": 2.7766, "step": 29200 }, { "epoch": 9.469941822883, "grad_norm": 1.3510043621063232, "learning_rate": 0.001, "loss": 2.7754, "step": 29300 }, { "epoch": 9.502262443438914, "grad_norm": 1.2608709335327148, "learning_rate": 0.001, "loss": 2.7588, "step": 29400 }, { "epoch": 9.534583063994829, "grad_norm": 6.222628116607666, "learning_rate": 0.001, "loss": 2.7808, "step": 29500 }, { "epoch": 9.566903684550743, "grad_norm": 1.161131501197815, "learning_rate": 0.001, "loss": 2.7694, "step": 29600 }, { "epoch": 9.599224305106658, "grad_norm": 1.5423818826675415, "learning_rate": 0.001, "loss": 2.7865, "step": 29700 }, { "epoch": 9.631544925662572, "grad_norm": 1.2014741897583008, "learning_rate": 0.001, "loss": 2.7795, "step": 29800 }, { "epoch": 9.663865546218487, "grad_norm": 1.2147090435028076, "learning_rate": 0.001, "loss": 2.7723, "step": 29900 }, { "epoch": 9.696186166774401, "grad_norm": 1.5985743999481201, "learning_rate": 0.001, "loss": 2.7672, "step": 30000 }, { "epoch": 9.728506787330316, "grad_norm": 1.4221839904785156, "learning_rate": 0.001, "loss": 2.777, "step": 30100 }, { "epoch": 9.760827407886232, "grad_norm": 1.517334222793579, "learning_rate": 0.001, "loss": 2.7784, "step": 30200 }, { "epoch": 9.793148028442147, "grad_norm": 2.060960054397583, "learning_rate": 0.001, "loss": 2.7811, "step": 30300 }, { "epoch": 9.825468648998061, "grad_norm": 1.1917989253997803, "learning_rate": 0.001, "loss": 2.7838, "step": 30400 }, { "epoch": 9.857789269553976, "grad_norm": 1.433867335319519, "learning_rate": 0.001, "loss": 2.7739, "step": 30500 }, { "epoch": 9.89010989010989, "grad_norm": 6.551234722137451, "learning_rate": 0.001, "loss": 2.7902, "step": 30600 }, { "epoch": 9.922430510665805, "grad_norm": 1.8671542406082153, "learning_rate": 0.001, "loss": 2.8048, "step": 30700 }, { "epoch": 9.95475113122172, "grad_norm": 1.410808801651001, "learning_rate": 0.001, "loss": 2.7932, "step": 30800 }, { "epoch": 9.987071751777634, "grad_norm": 2.106566905975342, "learning_rate": 0.001, "loss": 2.7982, "step": 30900 }, { "epoch": 10.019392372333549, "grad_norm": 1.4149757623672485, "learning_rate": 0.001, "loss": 2.7199, "step": 31000 }, { "epoch": 10.051712992889463, "grad_norm": 1.1046713590621948, "learning_rate": 0.001, "loss": 2.6804, "step": 31100 }, { "epoch": 10.084033613445378, "grad_norm": 1.539957880973816, "learning_rate": 0.001, "loss": 2.6986, "step": 31200 }, { "epoch": 10.116354234001292, "grad_norm": 1.1501537561416626, "learning_rate": 0.001, "loss": 2.7089, "step": 31300 }, { "epoch": 10.148674854557207, "grad_norm": 12.145599365234375, "learning_rate": 0.001, "loss": 2.6841, "step": 31400 }, { "epoch": 10.180995475113122, "grad_norm": 0.9656225442886353, "learning_rate": 0.001, "loss": 2.7048, "step": 31500 }, { "epoch": 10.213316095669036, "grad_norm": 1.4126918315887451, "learning_rate": 0.001, "loss": 2.6959, "step": 31600 }, { "epoch": 10.24563671622495, "grad_norm": 1.1594783067703247, "learning_rate": 0.001, "loss": 2.7009, "step": 31700 }, { "epoch": 10.277957336780867, "grad_norm": 1.078967809677124, "learning_rate": 0.001, "loss": 2.7095, "step": 31800 }, { "epoch": 10.310277957336782, "grad_norm": 1.0235683917999268, "learning_rate": 0.001, "loss": 2.7064, "step": 31900 }, { "epoch": 10.342598577892696, "grad_norm": 1.1078089475631714, "learning_rate": 0.001, "loss": 2.7044, "step": 32000 }, { "epoch": 10.37491919844861, "grad_norm": 1.3842604160308838, "learning_rate": 0.001, "loss": 2.7156, "step": 32100 }, { "epoch": 10.407239819004525, "grad_norm": 1.0882638692855835, "learning_rate": 0.001, "loss": 2.7479, "step": 32200 }, { "epoch": 10.43956043956044, "grad_norm": 0.9538421034812927, "learning_rate": 0.001, "loss": 2.7225, "step": 32300 }, { "epoch": 10.471881060116354, "grad_norm": 1.0541894435882568, "learning_rate": 0.001, "loss": 2.7374, "step": 32400 }, { "epoch": 10.504201680672269, "grad_norm": 1.0659579038619995, "learning_rate": 0.001, "loss": 2.7293, "step": 32500 }, { "epoch": 10.536522301228183, "grad_norm": 1.5823651552200317, "learning_rate": 0.001, "loss": 2.7261, "step": 32600 }, { "epoch": 10.568842921784098, "grad_norm": 1.222914218902588, "learning_rate": 0.001, "loss": 2.7326, "step": 32700 }, { "epoch": 10.601163542340013, "grad_norm": 12.506160736083984, "learning_rate": 0.001, "loss": 2.7235, "step": 32800 }, { "epoch": 10.633484162895927, "grad_norm": 1.0963003635406494, "learning_rate": 0.001, "loss": 2.7425, "step": 32900 }, { "epoch": 10.665804783451842, "grad_norm": 0.7521632313728333, "learning_rate": 0.001, "loss": 2.7119, "step": 33000 }, { "epoch": 10.698125404007756, "grad_norm": 1.0782854557037354, "learning_rate": 0.001, "loss": 2.7351, "step": 33100 }, { "epoch": 10.73044602456367, "grad_norm": 0.8940654993057251, "learning_rate": 0.001, "loss": 2.7497, "step": 33200 }, { "epoch": 10.762766645119587, "grad_norm": 1.0389740467071533, "learning_rate": 0.001, "loss": 2.7551, "step": 33300 }, { "epoch": 10.795087265675502, "grad_norm": 1.0971581935882568, "learning_rate": 0.001, "loss": 2.7354, "step": 33400 }, { "epoch": 10.827407886231416, "grad_norm": 0.9104992747306824, "learning_rate": 0.001, "loss": 2.7599, "step": 33500 }, { "epoch": 10.85972850678733, "grad_norm": 1.4746216535568237, "learning_rate": 0.001, "loss": 2.7506, "step": 33600 }, { "epoch": 10.892049127343245, "grad_norm": 2.0504276752471924, "learning_rate": 0.001, "loss": 2.7569, "step": 33700 }, { "epoch": 10.92436974789916, "grad_norm": 0.9838640689849854, "learning_rate": 0.001, "loss": 2.7295, "step": 33800 }, { "epoch": 10.956690368455074, "grad_norm": 0.9727325439453125, "learning_rate": 0.001, "loss": 2.7456, "step": 33900 }, { "epoch": 10.989010989010989, "grad_norm": 1.271849513053894, "learning_rate": 0.001, "loss": 2.7364, "step": 34000 }, { "epoch": 11.021331609566904, "grad_norm": 1.2966394424438477, "learning_rate": 0.001, "loss": 2.7027, "step": 34100 }, { "epoch": 11.053652230122818, "grad_norm": 1.1168221235275269, "learning_rate": 0.001, "loss": 2.6491, "step": 34200 }, { "epoch": 11.085972850678733, "grad_norm": 1.237612247467041, "learning_rate": 0.001, "loss": 2.6415, "step": 34300 }, { "epoch": 11.118293471234647, "grad_norm": 1.1559176445007324, "learning_rate": 0.001, "loss": 2.6619, "step": 34400 }, { "epoch": 11.150614091790562, "grad_norm": 1.7797229290008545, "learning_rate": 0.001, "loss": 2.6483, "step": 34500 }, { "epoch": 11.182934712346476, "grad_norm": 1.0046466588974, "learning_rate": 0.001, "loss": 2.6604, "step": 34600 }, { "epoch": 11.215255332902391, "grad_norm": 1.1106253862380981, "learning_rate": 0.001, "loss": 2.6674, "step": 34700 }, { "epoch": 11.247575953458306, "grad_norm": 0.9139506816864014, "learning_rate": 0.001, "loss": 2.6447, "step": 34800 }, { "epoch": 11.279896574014222, "grad_norm": 0.979452908039093, "learning_rate": 0.001, "loss": 2.6616, "step": 34900 }, { "epoch": 11.312217194570136, "grad_norm": 1.4314377307891846, "learning_rate": 0.001, "loss": 2.6492, "step": 35000 }, { "epoch": 11.344537815126051, "grad_norm": 0.9578601717948914, "learning_rate": 0.001, "loss": 2.659, "step": 35100 }, { "epoch": 11.376858435681966, "grad_norm": 0.8165574073791504, "learning_rate": 0.001, "loss": 2.6615, "step": 35200 }, { "epoch": 11.40917905623788, "grad_norm": 0.9311456680297852, "learning_rate": 0.001, "loss": 2.6738, "step": 35300 }, { "epoch": 11.441499676793795, "grad_norm": 1.1456940174102783, "learning_rate": 0.001, "loss": 2.6671, "step": 35400 }, { "epoch": 11.47382029734971, "grad_norm": 1.0223768949508667, "learning_rate": 0.001, "loss": 2.6909, "step": 35500 }, { "epoch": 11.506140917905624, "grad_norm": 1.0260502099990845, "learning_rate": 0.001, "loss": 2.7128, "step": 35600 }, { "epoch": 11.538461538461538, "grad_norm": 1.0336605310440063, "learning_rate": 0.001, "loss": 2.6813, "step": 35700 }, { "epoch": 11.570782159017453, "grad_norm": 1.2250432968139648, "learning_rate": 0.001, "loss": 2.7014, "step": 35800 }, { "epoch": 11.603102779573367, "grad_norm": 1.2731081247329712, "learning_rate": 0.001, "loss": 2.6795, "step": 35900 }, { "epoch": 11.635423400129282, "grad_norm": 0.9055103659629822, "learning_rate": 0.001, "loss": 2.6804, "step": 36000 }, { "epoch": 11.667744020685197, "grad_norm": 1.1321115493774414, "learning_rate": 0.001, "loss": 2.7095, "step": 36100 }, { "epoch": 11.700064641241111, "grad_norm": 0.9652146100997925, "learning_rate": 0.001, "loss": 2.6824, "step": 36200 }, { "epoch": 11.732385261797026, "grad_norm": 0.7905705571174622, "learning_rate": 0.001, "loss": 2.6913, "step": 36300 }, { "epoch": 11.764705882352942, "grad_norm": 1.2499932050704956, "learning_rate": 0.001, "loss": 2.6865, "step": 36400 }, { "epoch": 11.797026502908857, "grad_norm": 1.3597419261932373, "learning_rate": 0.001, "loss": 2.6859, "step": 36500 }, { "epoch": 11.829347123464771, "grad_norm": 1.1623271703720093, "learning_rate": 0.001, "loss": 2.725, "step": 36600 }, { "epoch": 11.861667744020686, "grad_norm": 2.189985752105713, "learning_rate": 0.001, "loss": 2.7077, "step": 36700 }, { "epoch": 11.8939883645766, "grad_norm": 1.2867103815078735, "learning_rate": 0.001, "loss": 2.6944, "step": 36800 }, { "epoch": 11.926308985132515, "grad_norm": 1.086223840713501, "learning_rate": 0.001, "loss": 2.7213, "step": 36900 }, { "epoch": 11.95862960568843, "grad_norm": 1.2899161577224731, "learning_rate": 0.001, "loss": 2.6918, "step": 37000 }, { "epoch": 11.990950226244344, "grad_norm": 1.1615432500839233, "learning_rate": 0.001, "loss": 2.7156, "step": 37100 }, { "epoch": 12.023270846800258, "grad_norm": 1.1235369443893433, "learning_rate": 0.001, "loss": 2.6163, "step": 37200 }, { "epoch": 12.055591467356173, "grad_norm": 0.8783882260322571, "learning_rate": 0.001, "loss": 2.5824, "step": 37300 }, { "epoch": 12.087912087912088, "grad_norm": 1.1633951663970947, "learning_rate": 0.001, "loss": 2.5888, "step": 37400 }, { "epoch": 12.120232708468002, "grad_norm": 1.2664191722869873, "learning_rate": 0.001, "loss": 2.5853, "step": 37500 }, { "epoch": 12.152553329023917, "grad_norm": 1.117937684059143, "learning_rate": 0.001, "loss": 2.5975, "step": 37600 }, { "epoch": 12.184873949579831, "grad_norm": 1.054268479347229, "learning_rate": 0.001, "loss": 2.6144, "step": 37700 }, { "epoch": 12.217194570135746, "grad_norm": 0.9705707430839539, "learning_rate": 0.001, "loss": 2.6327, "step": 37800 }, { "epoch": 12.24951519069166, "grad_norm": 0.9434573650360107, "learning_rate": 0.001, "loss": 2.6308, "step": 37900 }, { "epoch": 12.281835811247577, "grad_norm": 1.2516924142837524, "learning_rate": 0.001, "loss": 2.6318, "step": 38000 }, { "epoch": 12.314156431803491, "grad_norm": 1.1667375564575195, "learning_rate": 0.001, "loss": 2.6246, "step": 38100 }, { "epoch": 12.346477052359406, "grad_norm": 2.7181923389434814, "learning_rate": 0.001, "loss": 2.6298, "step": 38200 }, { "epoch": 12.37879767291532, "grad_norm": 1.3722175359725952, "learning_rate": 0.001, "loss": 2.6055, "step": 38300 }, { "epoch": 12.411118293471235, "grad_norm": 1.4176762104034424, "learning_rate": 0.001, "loss": 2.6461, "step": 38400 }, { "epoch": 12.44343891402715, "grad_norm": 1.107043743133545, "learning_rate": 0.001, "loss": 2.6511, "step": 38500 }, { "epoch": 12.475759534583064, "grad_norm": 1.1418050527572632, "learning_rate": 0.001, "loss": 2.6352, "step": 38600 }, { "epoch": 12.508080155138979, "grad_norm": 0.8514591455459595, "learning_rate": 0.001, "loss": 2.6463, "step": 38700 }, { "epoch": 12.540400775694893, "grad_norm": 1.0669589042663574, "learning_rate": 0.001, "loss": 2.6392, "step": 38800 }, { "epoch": 12.572721396250808, "grad_norm": 0.9883720278739929, "learning_rate": 0.001, "loss": 2.6437, "step": 38900 }, { "epoch": 12.605042016806722, "grad_norm": 1.3261536359786987, "learning_rate": 0.001, "loss": 2.653, "step": 39000 }, { "epoch": 12.637362637362637, "grad_norm": 1.0962554216384888, "learning_rate": 0.001, "loss": 2.6623, "step": 39100 }, { "epoch": 12.669683257918551, "grad_norm": 1.2624469995498657, "learning_rate": 0.001, "loss": 2.6543, "step": 39200 }, { "epoch": 12.702003878474466, "grad_norm": 1.0717207193374634, "learning_rate": 0.001, "loss": 2.6439, "step": 39300 }, { "epoch": 12.73432449903038, "grad_norm": 1.0760866403579712, "learning_rate": 0.001, "loss": 2.6904, "step": 39400 }, { "epoch": 12.766645119586297, "grad_norm": 1.204099416732788, "learning_rate": 0.001, "loss": 2.6673, "step": 39500 }, { "epoch": 12.798965740142211, "grad_norm": 0.9972160458564758, "learning_rate": 0.001, "loss": 2.656, "step": 39600 }, { "epoch": 12.831286360698126, "grad_norm": 16.532602310180664, "learning_rate": 0.001, "loss": 2.6829, "step": 39700 }, { "epoch": 12.86360698125404, "grad_norm": 0.9585914015769958, "learning_rate": 0.001, "loss": 2.6614, "step": 39800 }, { "epoch": 12.895927601809955, "grad_norm": 1.0661193132400513, "learning_rate": 0.001, "loss": 2.6706, "step": 39900 }, { "epoch": 12.92824822236587, "grad_norm": 0.9563218355178833, "learning_rate": 0.001, "loss": 2.6694, "step": 40000 }, { "epoch": 12.960568842921784, "grad_norm": 1.1475189924240112, "learning_rate": 0.001, "loss": 2.6791, "step": 40100 }, { "epoch": 12.992889463477699, "grad_norm": 1.0078437328338623, "learning_rate": 0.001, "loss": 2.6723, "step": 40200 }, { "epoch": 13.025210084033613, "grad_norm": 1.1758180856704712, "learning_rate": 0.001, "loss": 2.5716, "step": 40300 }, { "epoch": 13.057530704589528, "grad_norm": 1.3015313148498535, "learning_rate": 0.001, "loss": 2.5678, "step": 40400 }, { "epoch": 13.089851325145442, "grad_norm": 0.9321695566177368, "learning_rate": 0.001, "loss": 2.5821, "step": 40500 }, { "epoch": 13.122171945701357, "grad_norm": 1.0390539169311523, "learning_rate": 0.001, "loss": 2.5582, "step": 40600 }, { "epoch": 13.154492566257272, "grad_norm": 1.00677490234375, "learning_rate": 0.001, "loss": 2.5672, "step": 40700 }, { "epoch": 13.186813186813186, "grad_norm": 1.592461109161377, "learning_rate": 0.001, "loss": 2.5889, "step": 40800 }, { "epoch": 13.2191338073691, "grad_norm": 0.909713625907898, "learning_rate": 0.001, "loss": 2.5838, "step": 40900 }, { "epoch": 13.251454427925015, "grad_norm": 1.420849323272705, "learning_rate": 0.001, "loss": 2.5678, "step": 41000 }, { "epoch": 13.283775048480932, "grad_norm": 1.22563898563385, "learning_rate": 0.001, "loss": 2.5739, "step": 41100 }, { "epoch": 13.316095669036846, "grad_norm": 1.220168113708496, "learning_rate": 0.001, "loss": 2.5886, "step": 41200 }, { "epoch": 13.34841628959276, "grad_norm": 0.7951250672340393, "learning_rate": 0.001, "loss": 2.5795, "step": 41300 }, { "epoch": 13.380736910148675, "grad_norm": 1.0727436542510986, "learning_rate": 0.001, "loss": 2.5959, "step": 41400 }, { "epoch": 13.41305753070459, "grad_norm": 0.9477673768997192, "learning_rate": 0.001, "loss": 2.6006, "step": 41500 }, { "epoch": 13.445378151260504, "grad_norm": 1.163881540298462, "learning_rate": 0.001, "loss": 2.608, "step": 41600 }, { "epoch": 13.477698771816419, "grad_norm": 0.9824815392494202, "learning_rate": 0.001, "loss": 2.599, "step": 41700 }, { "epoch": 13.510019392372334, "grad_norm": 1.89625883102417, "learning_rate": 0.001, "loss": 2.6207, "step": 41800 }, { "epoch": 13.542340012928248, "grad_norm": 0.9242108464241028, "learning_rate": 0.001, "loss": 2.598, "step": 41900 }, { "epoch": 13.574660633484163, "grad_norm": 0.8262276649475098, "learning_rate": 0.001, "loss": 2.5978, "step": 42000 }, { "epoch": 13.606981254040077, "grad_norm": 0.9891680479049683, "learning_rate": 0.001, "loss": 2.6167, "step": 42100 }, { "epoch": 13.639301874595992, "grad_norm": 0.8426249027252197, "learning_rate": 0.001, "loss": 2.6065, "step": 42200 }, { "epoch": 13.671622495151906, "grad_norm": 1.1443393230438232, "learning_rate": 0.001, "loss": 2.6156, "step": 42300 }, { "epoch": 13.70394311570782, "grad_norm": 1.0155730247497559, "learning_rate": 0.001, "loss": 2.5964, "step": 42400 }, { "epoch": 13.736263736263737, "grad_norm": 0.8924771547317505, "learning_rate": 0.001, "loss": 2.6141, "step": 42500 }, { "epoch": 13.768584356819652, "grad_norm": 0.9005572199821472, "learning_rate": 0.001, "loss": 2.6254, "step": 42600 }, { "epoch": 13.800904977375566, "grad_norm": 1.0440888404846191, "learning_rate": 0.001, "loss": 2.612, "step": 42700 }, { "epoch": 13.83322559793148, "grad_norm": 1.1966723203659058, "learning_rate": 0.001, "loss": 2.6332, "step": 42800 }, { "epoch": 13.865546218487395, "grad_norm": 0.9007824063301086, "learning_rate": 0.001, "loss": 2.6243, "step": 42900 }, { "epoch": 13.89786683904331, "grad_norm": 0.9014492034912109, "learning_rate": 0.001, "loss": 2.6502, "step": 43000 }, { "epoch": 13.930187459599225, "grad_norm": 1.1441199779510498, "learning_rate": 0.001, "loss": 2.6327, "step": 43100 }, { "epoch": 13.96250808015514, "grad_norm": 0.9134061336517334, "learning_rate": 0.001, "loss": 2.6267, "step": 43200 }, { "epoch": 13.994828700711054, "grad_norm": 1.1688714027404785, "learning_rate": 0.001, "loss": 2.6273, "step": 43300 }, { "epoch": 14.027149321266968, "grad_norm": 0.8692650198936462, "learning_rate": 0.001, "loss": 2.5239, "step": 43400 }, { "epoch": 14.059469941822883, "grad_norm": 1.0588010549545288, "learning_rate": 0.001, "loss": 2.5065, "step": 43500 }, { "epoch": 14.091790562378797, "grad_norm": 1.2241944074630737, "learning_rate": 0.001, "loss": 2.5223, "step": 43600 }, { "epoch": 14.124111182934712, "grad_norm": 1.0896589756011963, "learning_rate": 0.001, "loss": 2.5267, "step": 43700 }, { "epoch": 14.156431803490626, "grad_norm": 4.487442970275879, "learning_rate": 0.001, "loss": 2.5183, "step": 43800 }, { "epoch": 14.188752424046541, "grad_norm": 1.220746397972107, "learning_rate": 0.001, "loss": 2.5448, "step": 43900 }, { "epoch": 14.221073044602456, "grad_norm": 0.9159670472145081, "learning_rate": 0.001, "loss": 2.5406, "step": 44000 }, { "epoch": 14.25339366515837, "grad_norm": 1.1351267099380493, "learning_rate": 0.001, "loss": 2.5199, "step": 44100 }, { "epoch": 14.285714285714286, "grad_norm": 1.135184645652771, "learning_rate": 0.001, "loss": 2.528, "step": 44200 }, { "epoch": 14.318034906270201, "grad_norm": 1.3520567417144775, "learning_rate": 0.001, "loss": 2.5614, "step": 44300 }, { "epoch": 14.350355526826116, "grad_norm": 0.9926235675811768, "learning_rate": 0.001, "loss": 2.5471, "step": 44400 }, { "epoch": 14.38267614738203, "grad_norm": 1.00084388256073, "learning_rate": 0.001, "loss": 2.5571, "step": 44500 }, { "epoch": 14.414996767937945, "grad_norm": 1.0103403329849243, "learning_rate": 0.001, "loss": 2.563, "step": 44600 }, { "epoch": 14.44731738849386, "grad_norm": 0.967103898525238, "learning_rate": 0.001, "loss": 2.5461, "step": 44700 }, { "epoch": 14.479638009049774, "grad_norm": 0.884563684463501, "learning_rate": 0.001, "loss": 2.5593, "step": 44800 }, { "epoch": 14.511958629605688, "grad_norm": 1.0966639518737793, "learning_rate": 0.001, "loss": 2.5802, "step": 44900 }, { "epoch": 14.544279250161603, "grad_norm": 1.0496076345443726, "learning_rate": 0.001, "loss": 2.557, "step": 45000 }, { "epoch": 14.576599870717518, "grad_norm": 1.2999776601791382, "learning_rate": 0.001, "loss": 2.5653, "step": 45100 }, { "epoch": 14.608920491273432, "grad_norm": 5.081854820251465, "learning_rate": 0.001, "loss": 2.562, "step": 45200 }, { "epoch": 14.641241111829347, "grad_norm": 1.0345853567123413, "learning_rate": 0.001, "loss": 2.574, "step": 45300 }, { "epoch": 14.673561732385261, "grad_norm": 0.9832631945610046, "learning_rate": 0.001, "loss": 2.5621, "step": 45400 }, { "epoch": 14.705882352941176, "grad_norm": 2.3717122077941895, "learning_rate": 0.001, "loss": 2.5693, "step": 45500 }, { "epoch": 14.738202973497092, "grad_norm": 0.8935089111328125, "learning_rate": 0.001, "loss": 2.5827, "step": 45600 }, { "epoch": 14.770523594053007, "grad_norm": 3.5782103538513184, "learning_rate": 0.001, "loss": 2.593, "step": 45700 }, { "epoch": 14.802844214608921, "grad_norm": 0.9158485531806946, "learning_rate": 0.001, "loss": 2.5692, "step": 45800 }, { "epoch": 14.835164835164836, "grad_norm": 1.114057183265686, "learning_rate": 0.001, "loss": 2.5966, "step": 45900 }, { "epoch": 14.86748545572075, "grad_norm": 1.069358468055725, "learning_rate": 0.001, "loss": 2.6012, "step": 46000 }, { "epoch": 14.899806076276665, "grad_norm": 0.8641564249992371, "learning_rate": 0.001, "loss": 2.5826, "step": 46100 }, { "epoch": 14.93212669683258, "grad_norm": 0.84532630443573, "learning_rate": 0.001, "loss": 2.6051, "step": 46200 }, { "epoch": 14.964447317388494, "grad_norm": 1.03289794921875, "learning_rate": 0.001, "loss": 2.5857, "step": 46300 }, { "epoch": 14.996767937944409, "grad_norm": 1.284227728843689, "learning_rate": 0.001, "loss": 2.598, "step": 46400 }, { "epoch": 15.029088558500323, "grad_norm": 1.4091312885284424, "learning_rate": 0.001, "loss": 2.4898, "step": 46500 }, { "epoch": 15.061409179056238, "grad_norm": 1.0931891202926636, "learning_rate": 0.001, "loss": 2.4753, "step": 46600 }, { "epoch": 15.093729799612152, "grad_norm": 1.5152266025543213, "learning_rate": 0.001, "loss": 2.5056, "step": 46700 }, { "epoch": 15.126050420168067, "grad_norm": 1.9954066276550293, "learning_rate": 0.001, "loss": 2.4675, "step": 46800 }, { "epoch": 15.158371040723981, "grad_norm": 1.0176713466644287, "learning_rate": 0.001, "loss": 2.4798, "step": 46900 }, { "epoch": 15.190691661279896, "grad_norm": 1.0484946966171265, "learning_rate": 0.001, "loss": 2.4814, "step": 47000 }, { "epoch": 15.22301228183581, "grad_norm": 1.1364214420318604, "learning_rate": 0.001, "loss": 2.4909, "step": 47100 }, { "epoch": 15.255332902391725, "grad_norm": 0.843647301197052, "learning_rate": 0.001, "loss": 2.5254, "step": 47200 }, { "epoch": 15.287653522947641, "grad_norm": 1.0442661046981812, "learning_rate": 0.001, "loss": 2.4996, "step": 47300 }, { "epoch": 15.319974143503556, "grad_norm": 1.1026116609573364, "learning_rate": 0.001, "loss": 2.5112, "step": 47400 }, { "epoch": 15.35229476405947, "grad_norm": 1.0261306762695312, "learning_rate": 0.001, "loss": 2.5319, "step": 47500 }, { "epoch": 15.384615384615385, "grad_norm": 1.0595712661743164, "learning_rate": 0.001, "loss": 2.5023, "step": 47600 }, { "epoch": 15.4169360051713, "grad_norm": 0.9965971112251282, "learning_rate": 0.001, "loss": 2.5155, "step": 47700 }, { "epoch": 15.449256625727214, "grad_norm": 1.0712156295776367, "learning_rate": 0.001, "loss": 2.5204, "step": 47800 }, { "epoch": 15.481577246283129, "grad_norm": 1.0345665216445923, "learning_rate": 0.001, "loss": 2.5161, "step": 47900 }, { "epoch": 15.513897866839043, "grad_norm": 1.0655579566955566, "learning_rate": 0.001, "loss": 2.5388, "step": 48000 }, { "epoch": 15.546218487394958, "grad_norm": 0.9177590012550354, "learning_rate": 0.001, "loss": 2.5338, "step": 48100 }, { "epoch": 15.578539107950872, "grad_norm": 1.2494372129440308, "learning_rate": 0.001, "loss": 2.5208, "step": 48200 }, { "epoch": 15.610859728506787, "grad_norm": 1.279309630393982, "learning_rate": 0.001, "loss": 2.5318, "step": 48300 }, { "epoch": 15.643180349062701, "grad_norm": 1.2379471063613892, "learning_rate": 0.001, "loss": 2.547, "step": 48400 }, { "epoch": 15.675500969618616, "grad_norm": 2.951427698135376, "learning_rate": 0.001, "loss": 2.5488, "step": 48500 }, { "epoch": 15.70782159017453, "grad_norm": 1.290220856666565, "learning_rate": 0.001, "loss": 2.533, "step": 48600 }, { "epoch": 15.740142210730447, "grad_norm": 1.1895333528518677, "learning_rate": 0.001, "loss": 2.5444, "step": 48700 }, { "epoch": 15.772462831286362, "grad_norm": 2.34897518157959, "learning_rate": 0.001, "loss": 2.5489, "step": 48800 }, { "epoch": 15.804783451842276, "grad_norm": 1.3422526121139526, "learning_rate": 0.001, "loss": 2.5439, "step": 48900 }, { "epoch": 15.83710407239819, "grad_norm": 4.121556758880615, "learning_rate": 0.001, "loss": 2.5613, "step": 49000 }, { "epoch": 15.869424692954105, "grad_norm": 1.2812308073043823, "learning_rate": 0.001, "loss": 2.5426, "step": 49100 }, { "epoch": 15.90174531351002, "grad_norm": 1.0165331363677979, "learning_rate": 0.001, "loss": 2.5463, "step": 49200 }, { "epoch": 15.934065934065934, "grad_norm": 1.50202476978302, "learning_rate": 0.001, "loss": 2.5549, "step": 49300 }, { "epoch": 15.966386554621849, "grad_norm": 0.862257182598114, "learning_rate": 0.001, "loss": 2.5587, "step": 49400 }, { "epoch": 15.998707175177763, "grad_norm": 2.2072973251342773, "learning_rate": 0.001, "loss": 2.5526, "step": 49500 }, { "epoch": 16.031027795733678, "grad_norm": 1.0963802337646484, "learning_rate": 0.001, "loss": 2.4422, "step": 49600 }, { "epoch": 16.063348416289593, "grad_norm": 1.2042686939239502, "learning_rate": 0.001, "loss": 2.4421, "step": 49700 }, { "epoch": 16.095669036845507, "grad_norm": 1.6315501928329468, "learning_rate": 0.001, "loss": 2.4412, "step": 49800 }, { "epoch": 16.12798965740142, "grad_norm": 2.00921630859375, "learning_rate": 0.001, "loss": 2.4665, "step": 49900 }, { "epoch": 16.160310277957336, "grad_norm": 1.055575966835022, "learning_rate": 0.001, "loss": 2.452, "step": 50000 }, { "epoch": 16.19263089851325, "grad_norm": 1.1581629514694214, "learning_rate": 0.001, "loss": 2.4615, "step": 50100 }, { "epoch": 16.224951519069165, "grad_norm": 1.7904787063598633, "learning_rate": 0.001, "loss": 2.4714, "step": 50200 }, { "epoch": 16.25727213962508, "grad_norm": 1.4589978456497192, "learning_rate": 0.001, "loss": 2.4683, "step": 50300 }, { "epoch": 16.289592760180994, "grad_norm": 1.5155261754989624, "learning_rate": 0.001, "loss": 2.47, "step": 50400 }, { "epoch": 16.32191338073691, "grad_norm": 1.252715826034546, "learning_rate": 0.001, "loss": 2.4642, "step": 50500 }, { "epoch": 16.354234001292824, "grad_norm": 1.4739776849746704, "learning_rate": 0.001, "loss": 2.4842, "step": 50600 }, { "epoch": 16.386554621848738, "grad_norm": 1.3864160776138306, "learning_rate": 0.001, "loss": 2.4822, "step": 50700 }, { "epoch": 16.418875242404653, "grad_norm": 1.8719546794891357, "learning_rate": 0.001, "loss": 2.4923, "step": 50800 }, { "epoch": 16.451195862960567, "grad_norm": 1.2018576860427856, "learning_rate": 0.001, "loss": 2.4878, "step": 50900 }, { "epoch": 16.483516483516482, "grad_norm": 1.3293647766113281, "learning_rate": 0.001, "loss": 2.5061, "step": 51000 }, { "epoch": 16.5158371040724, "grad_norm": 1.0113029479980469, "learning_rate": 0.001, "loss": 2.4841, "step": 51100 }, { "epoch": 16.548157724628314, "grad_norm": 1.2614195346832275, "learning_rate": 0.001, "loss": 2.4905, "step": 51200 }, { "epoch": 16.58047834518423, "grad_norm": 1.2076184749603271, "learning_rate": 0.001, "loss": 2.514, "step": 51300 }, { "epoch": 16.612798965740144, "grad_norm": 1.0827510356903076, "learning_rate": 0.001, "loss": 2.4936, "step": 51400 }, { "epoch": 16.645119586296058, "grad_norm": 1.4097312688827515, "learning_rate": 0.001, "loss": 2.4964, "step": 51500 }, { "epoch": 16.677440206851973, "grad_norm": 1.2123239040374756, "learning_rate": 0.001, "loss": 2.5064, "step": 51600 }, { "epoch": 16.709760827407887, "grad_norm": 1.6266664266586304, "learning_rate": 0.001, "loss": 2.5138, "step": 51700 }, { "epoch": 16.742081447963802, "grad_norm": 2.1003901958465576, "learning_rate": 0.001, "loss": 2.5102, "step": 51800 }, { "epoch": 16.774402068519716, "grad_norm": 1.2779039144515991, "learning_rate": 0.001, "loss": 2.5151, "step": 51900 }, { "epoch": 16.80672268907563, "grad_norm": 2.2204203605651855, "learning_rate": 0.001, "loss": 2.5308, "step": 52000 }, { "epoch": 16.839043309631545, "grad_norm": 1.3629536628723145, "learning_rate": 0.001, "loss": 2.5066, "step": 52100 }, { "epoch": 16.87136393018746, "grad_norm": 1.068474292755127, "learning_rate": 0.001, "loss": 2.5069, "step": 52200 }, { "epoch": 16.903684550743375, "grad_norm": 1.5404812097549438, "learning_rate": 0.001, "loss": 2.5187, "step": 52300 }, { "epoch": 16.93600517129929, "grad_norm": 1.736141324043274, "learning_rate": 0.001, "loss": 2.5116, "step": 52400 }, { "epoch": 16.968325791855204, "grad_norm": 1.3849798440933228, "learning_rate": 0.001, "loss": 2.5445, "step": 52500 }, { "epoch": 17.00064641241112, "grad_norm": 1.5114665031433105, "learning_rate": 0.001, "loss": 2.5246, "step": 52600 }, { "epoch": 17.032967032967033, "grad_norm": 1.2839502096176147, "learning_rate": 0.001, "loss": 2.4067, "step": 52700 }, { "epoch": 17.065287653522947, "grad_norm": 1.2795181274414062, "learning_rate": 0.001, "loss": 2.4142, "step": 52800 }, { "epoch": 17.097608274078862, "grad_norm": 1.0442157983779907, "learning_rate": 0.001, "loss": 2.4037, "step": 52900 }, { "epoch": 17.129928894634777, "grad_norm": 1.378151535987854, "learning_rate": 0.001, "loss": 2.4203, "step": 53000 }, { "epoch": 17.16224951519069, "grad_norm": 1.5451349020004272, "learning_rate": 0.001, "loss": 2.4442, "step": 53100 }, { "epoch": 17.194570135746606, "grad_norm": 1.6236653327941895, "learning_rate": 0.001, "loss": 2.4324, "step": 53200 }, { "epoch": 17.22689075630252, "grad_norm": 1.5340590476989746, "learning_rate": 0.001, "loss": 2.4547, "step": 53300 }, { "epoch": 17.259211376858435, "grad_norm": 1.5516611337661743, "learning_rate": 0.001, "loss": 2.4306, "step": 53400 }, { "epoch": 17.29153199741435, "grad_norm": 1.6382969617843628, "learning_rate": 0.001, "loss": 2.4354, "step": 53500 }, { "epoch": 17.323852617970264, "grad_norm": 1.3518773317337036, "learning_rate": 0.001, "loss": 2.4373, "step": 53600 }, { "epoch": 17.35617323852618, "grad_norm": 1.695602297782898, "learning_rate": 0.001, "loss": 2.4504, "step": 53700 }, { "epoch": 17.388493859082093, "grad_norm": 1.5255744457244873, "learning_rate": 0.001, "loss": 2.456, "step": 53800 }, { "epoch": 17.420814479638008, "grad_norm": 1.6949161291122437, "learning_rate": 0.001, "loss": 2.4675, "step": 53900 }, { "epoch": 17.453135100193922, "grad_norm": 1.2067433595657349, "learning_rate": 0.001, "loss": 2.457, "step": 54000 }, { "epoch": 17.485455720749837, "grad_norm": 1.4657877683639526, "learning_rate": 0.001, "loss": 2.4693, "step": 54100 }, { "epoch": 17.517776341305755, "grad_norm": 1.708070993423462, "learning_rate": 0.001, "loss": 2.4641, "step": 54200 }, { "epoch": 17.55009696186167, "grad_norm": 2.292349100112915, "learning_rate": 0.001, "loss": 2.4723, "step": 54300 }, { "epoch": 17.582417582417584, "grad_norm": 1.5095382928848267, "learning_rate": 0.001, "loss": 2.506, "step": 54400 }, { "epoch": 17.6147382029735, "grad_norm": 1.7089265584945679, "learning_rate": 0.001, "loss": 2.4716, "step": 54500 }, { "epoch": 17.647058823529413, "grad_norm": 2.317176103591919, "learning_rate": 0.001, "loss": 2.4866, "step": 54600 }, { "epoch": 17.679379444085328, "grad_norm": 1.623173475265503, "learning_rate": 0.001, "loss": 2.4643, "step": 54700 }, { "epoch": 17.711700064641242, "grad_norm": 3.325761079788208, "learning_rate": 0.001, "loss": 2.4856, "step": 54800 }, { "epoch": 17.744020685197157, "grad_norm": 1.4427716732025146, "learning_rate": 0.001, "loss": 2.5098, "step": 54900 }, { "epoch": 17.77634130575307, "grad_norm": 1.4329845905303955, "learning_rate": 0.001, "loss": 2.4915, "step": 55000 }, { "epoch": 17.808661926308986, "grad_norm": 1.14120352268219, "learning_rate": 0.001, "loss": 2.4935, "step": 55100 }, { "epoch": 17.8409825468649, "grad_norm": 2.3714635372161865, "learning_rate": 0.001, "loss": 2.4936, "step": 55200 }, { "epoch": 17.873303167420815, "grad_norm": 1.9895669221878052, "learning_rate": 0.001, "loss": 2.4947, "step": 55300 }, { "epoch": 17.90562378797673, "grad_norm": 1.5808202028274536, "learning_rate": 0.001, "loss": 2.4771, "step": 55400 }, { "epoch": 17.937944408532644, "grad_norm": 1.684810996055603, "learning_rate": 0.001, "loss": 2.4792, "step": 55500 }, { "epoch": 17.97026502908856, "grad_norm": 1.3118782043457031, "learning_rate": 0.001, "loss": 2.5075, "step": 55600 }, { "epoch": 18.002585649644473, "grad_norm": 1.1173139810562134, "learning_rate": 0.001, "loss": 2.5273, "step": 55700 }, { "epoch": 18.034906270200388, "grad_norm": 1.118417739868164, "learning_rate": 0.001, "loss": 2.3807, "step": 55800 }, { "epoch": 18.067226890756302, "grad_norm": 1.1829055547714233, "learning_rate": 0.001, "loss": 2.3833, "step": 55900 }, { "epoch": 18.099547511312217, "grad_norm": 1.0254830121994019, "learning_rate": 0.001, "loss": 2.3902, "step": 56000 }, { "epoch": 18.13186813186813, "grad_norm": 1.535539150238037, "learning_rate": 0.001, "loss": 2.4022, "step": 56100 }, { "epoch": 18.164188752424046, "grad_norm": 1.5581403970718384, "learning_rate": 0.001, "loss": 2.4331, "step": 56200 }, { "epoch": 18.19650937297996, "grad_norm": 1.2088077068328857, "learning_rate": 0.001, "loss": 2.4185, "step": 56300 }, { "epoch": 18.228829993535875, "grad_norm": 0.9398021697998047, "learning_rate": 0.001, "loss": 2.4297, "step": 56400 }, { "epoch": 18.26115061409179, "grad_norm": 1.510496735572815, "learning_rate": 0.001, "loss": 2.4221, "step": 56500 }, { "epoch": 18.293471234647704, "grad_norm": 1.3331588506698608, "learning_rate": 0.001, "loss": 2.4167, "step": 56600 }, { "epoch": 18.32579185520362, "grad_norm": 0.774921178817749, "learning_rate": 0.001, "loss": 2.439, "step": 56700 }, { "epoch": 18.358112475759533, "grad_norm": 1.1739826202392578, "learning_rate": 0.001, "loss": 2.3976, "step": 56800 }, { "epoch": 18.390433096315448, "grad_norm": 2.1356887817382812, "learning_rate": 0.001, "loss": 2.4275, "step": 56900 }, { "epoch": 18.422753716871362, "grad_norm": 1.2472251653671265, "learning_rate": 0.001, "loss": 2.429, "step": 57000 }, { "epoch": 18.455074337427277, "grad_norm": 1.3715382814407349, "learning_rate": 0.001, "loss": 2.446, "step": 57100 }, { "epoch": 18.48739495798319, "grad_norm": 2.452895402908325, "learning_rate": 0.001, "loss": 2.4084, "step": 57200 }, { "epoch": 18.51971557853911, "grad_norm": 1.2471091747283936, "learning_rate": 0.001, "loss": 2.4494, "step": 57300 }, { "epoch": 18.552036199095024, "grad_norm": 1.332595705986023, "learning_rate": 0.001, "loss": 2.4633, "step": 57400 }, { "epoch": 18.58435681965094, "grad_norm": 1.9065806865692139, "learning_rate": 0.001, "loss": 2.4549, "step": 57500 }, { "epoch": 18.616677440206853, "grad_norm": 1.8377671241760254, "learning_rate": 0.001, "loss": 2.4392, "step": 57600 }, { "epoch": 18.648998060762768, "grad_norm": 4.071170806884766, "learning_rate": 0.001, "loss": 2.4563, "step": 57700 }, { "epoch": 18.681318681318682, "grad_norm": 1.4802820682525635, "learning_rate": 0.001, "loss": 2.4475, "step": 57800 }, { "epoch": 18.713639301874597, "grad_norm": 1.4836938381195068, "learning_rate": 0.001, "loss": 2.436, "step": 57900 }, { "epoch": 18.74595992243051, "grad_norm": 2.211913824081421, "learning_rate": 0.001, "loss": 2.4629, "step": 58000 }, { "epoch": 18.778280542986426, "grad_norm": 1.2345269918441772, "learning_rate": 0.001, "loss": 2.4782, "step": 58100 }, { "epoch": 18.81060116354234, "grad_norm": 1.3505396842956543, "learning_rate": 0.001, "loss": 2.4717, "step": 58200 }, { "epoch": 18.842921784098255, "grad_norm": 1.277950644493103, "learning_rate": 0.001, "loss": 2.4651, "step": 58300 }, { "epoch": 18.87524240465417, "grad_norm": 1.647342324256897, "learning_rate": 0.001, "loss": 2.4763, "step": 58400 }, { "epoch": 18.907563025210084, "grad_norm": 1.0530786514282227, "learning_rate": 0.001, "loss": 2.4538, "step": 58500 }, { "epoch": 18.939883645766, "grad_norm": 1.1907941102981567, "learning_rate": 0.001, "loss": 2.4825, "step": 58600 }, { "epoch": 18.972204266321913, "grad_norm": 1.3167283535003662, "learning_rate": 0.001, "loss": 2.484, "step": 58700 }, { "epoch": 19.004524886877828, "grad_norm": 1.194963812828064, "learning_rate": 0.001, "loss": 2.4721, "step": 58800 }, { "epoch": 19.036845507433743, "grad_norm": 1.211214542388916, "learning_rate": 0.001, "loss": 2.3636, "step": 58900 }, { "epoch": 19.069166127989657, "grad_norm": 1.4426138401031494, "learning_rate": 0.001, "loss": 2.3529, "step": 59000 }, { "epoch": 19.10148674854557, "grad_norm": 1.2670257091522217, "learning_rate": 0.001, "loss": 2.3665, "step": 59100 }, { "epoch": 19.133807369101486, "grad_norm": 1.0103507041931152, "learning_rate": 0.001, "loss": 2.391, "step": 59200 }, { "epoch": 19.1661279896574, "grad_norm": 1.0708131790161133, "learning_rate": 0.001, "loss": 2.3645, "step": 59300 }, { "epoch": 19.198448610213315, "grad_norm": 0.9243292808532715, "learning_rate": 0.001, "loss": 2.3752, "step": 59400 }, { "epoch": 19.23076923076923, "grad_norm": 1.0486375093460083, "learning_rate": 0.001, "loss": 2.3803, "step": 59500 }, { "epoch": 19.263089851325145, "grad_norm": 1.3514083623886108, "learning_rate": 0.001, "loss": 2.399, "step": 59600 }, { "epoch": 19.29541047188106, "grad_norm": 1.2398810386657715, "learning_rate": 0.001, "loss": 2.3822, "step": 59700 }, { "epoch": 19.327731092436974, "grad_norm": 1.156231164932251, "learning_rate": 0.001, "loss": 2.3911, "step": 59800 }, { "epoch": 19.360051712992888, "grad_norm": 1.154314637184143, "learning_rate": 0.001, "loss": 2.4117, "step": 59900 }, { "epoch": 19.392372333548803, "grad_norm": 1.0833338499069214, "learning_rate": 0.001, "loss": 2.3934, "step": 60000 }, { "epoch": 19.424692954104717, "grad_norm": 1.3735226392745972, "learning_rate": 0.001, "loss": 2.4067, "step": 60100 }, { "epoch": 19.457013574660632, "grad_norm": 1.299041509628296, "learning_rate": 0.001, "loss": 2.3968, "step": 60200 }, { "epoch": 19.489334195216546, "grad_norm": 1.3541337251663208, "learning_rate": 0.001, "loss": 2.4257, "step": 60300 }, { "epoch": 19.521654815772465, "grad_norm": 4.125209331512451, "learning_rate": 0.001, "loss": 2.4163, "step": 60400 }, { "epoch": 19.55397543632838, "grad_norm": 1.1537212133407593, "learning_rate": 0.001, "loss": 2.4088, "step": 60500 }, { "epoch": 19.586296056884294, "grad_norm": 1.5417110919952393, "learning_rate": 0.001, "loss": 2.4044, "step": 60600 }, { "epoch": 19.618616677440208, "grad_norm": 1.193629264831543, "learning_rate": 0.001, "loss": 2.4258, "step": 60700 }, { "epoch": 19.650937297996123, "grad_norm": 1.486586332321167, "learning_rate": 0.001, "loss": 2.4163, "step": 60800 }, { "epoch": 19.683257918552037, "grad_norm": 1.1932374238967896, "learning_rate": 0.001, "loss": 2.4115, "step": 60900 }, { "epoch": 19.715578539107952, "grad_norm": 1.2805578708648682, "learning_rate": 0.001, "loss": 2.4261, "step": 61000 }, { "epoch": 19.747899159663866, "grad_norm": 0.9185644388198853, "learning_rate": 0.001, "loss": 2.4094, "step": 61100 }, { "epoch": 19.78021978021978, "grad_norm": 0.8623614311218262, "learning_rate": 0.001, "loss": 2.4174, "step": 61200 }, { "epoch": 19.812540400775696, "grad_norm": 1.3587428331375122, "learning_rate": 0.001, "loss": 2.4428, "step": 61300 }, { "epoch": 19.84486102133161, "grad_norm": 1.630331039428711, "learning_rate": 0.001, "loss": 2.4071, "step": 61400 }, { "epoch": 19.877181641887525, "grad_norm": 1.189499020576477, "learning_rate": 0.001, "loss": 2.4466, "step": 61500 }, { "epoch": 19.90950226244344, "grad_norm": 1.0527502298355103, "learning_rate": 0.001, "loss": 2.4562, "step": 61600 }, { "epoch": 19.941822882999354, "grad_norm": 0.8929858207702637, "learning_rate": 0.001, "loss": 2.4639, "step": 61700 }, { "epoch": 19.97414350355527, "grad_norm": 1.1614911556243896, "learning_rate": 0.001, "loss": 2.4598, "step": 61800 }, { "epoch": 20.006464124111183, "grad_norm": 0.9483317136764526, "learning_rate": 0.001, "loss": 2.4296, "step": 61900 }, { "epoch": 20.038784744667097, "grad_norm": 1.1722543239593506, "learning_rate": 0.001, "loss": 2.3389, "step": 62000 }, { "epoch": 20.071105365223012, "grad_norm": 1.0930012464523315, "learning_rate": 0.001, "loss": 2.3199, "step": 62100 }, { "epoch": 20.103425985778927, "grad_norm": 0.8938237428665161, "learning_rate": 0.001, "loss": 2.3487, "step": 62200 }, { "epoch": 20.13574660633484, "grad_norm": 0.9773704409599304, "learning_rate": 0.001, "loss": 2.3299, "step": 62300 }, { "epoch": 20.168067226890756, "grad_norm": 1.046343445777893, "learning_rate": 0.001, "loss": 2.3465, "step": 62400 }, { "epoch": 20.20038784744667, "grad_norm": 1.1216031312942505, "learning_rate": 0.001, "loss": 2.3528, "step": 62500 }, { "epoch": 20.232708468002585, "grad_norm": 1.1554218530654907, "learning_rate": 0.001, "loss": 2.3575, "step": 62600 }, { "epoch": 20.2650290885585, "grad_norm": 1.0994579792022705, "learning_rate": 0.001, "loss": 2.3478, "step": 62700 }, { "epoch": 20.297349709114414, "grad_norm": 0.9941263198852539, "learning_rate": 0.001, "loss": 2.355, "step": 62800 }, { "epoch": 20.32967032967033, "grad_norm": 0.9981868863105774, "learning_rate": 0.001, "loss": 2.3589, "step": 62900 }, { "epoch": 20.361990950226243, "grad_norm": 1.2517554759979248, "learning_rate": 0.001, "loss": 2.3608, "step": 63000 }, { "epoch": 20.394311570782158, "grad_norm": 2.89823842048645, "learning_rate": 0.001, "loss": 2.3934, "step": 63100 }, { "epoch": 20.426632191338072, "grad_norm": 1.340299129486084, "learning_rate": 0.001, "loss": 2.3879, "step": 63200 }, { "epoch": 20.458952811893987, "grad_norm": 1.2757624387741089, "learning_rate": 0.001, "loss": 2.3904, "step": 63300 }, { "epoch": 20.4912734324499, "grad_norm": 1.1209827661514282, "learning_rate": 0.001, "loss": 2.3921, "step": 63400 }, { "epoch": 20.52359405300582, "grad_norm": 1.093017339706421, "learning_rate": 0.001, "loss": 2.3934, "step": 63500 }, { "epoch": 20.555914673561734, "grad_norm": 0.9462026357650757, "learning_rate": 0.001, "loss": 2.4078, "step": 63600 }, { "epoch": 20.58823529411765, "grad_norm": 2.434621810913086, "learning_rate": 0.001, "loss": 2.3957, "step": 63700 }, { "epoch": 20.620555914673563, "grad_norm": 2.4534294605255127, "learning_rate": 0.001, "loss": 2.4115, "step": 63800 }, { "epoch": 20.652876535229478, "grad_norm": 3.5562667846679688, "learning_rate": 0.001, "loss": 2.4128, "step": 63900 }, { "epoch": 20.685197155785392, "grad_norm": 1.1036243438720703, "learning_rate": 0.001, "loss": 2.4102, "step": 64000 }, { "epoch": 20.717517776341307, "grad_norm": 0.9519385099411011, "learning_rate": 0.001, "loss": 2.427, "step": 64100 }, { "epoch": 20.74983839689722, "grad_norm": 0.9384015798568726, "learning_rate": 0.001, "loss": 2.4156, "step": 64200 }, { "epoch": 20.782159017453136, "grad_norm": 1.4379868507385254, "learning_rate": 0.001, "loss": 2.4512, "step": 64300 }, { "epoch": 20.81447963800905, "grad_norm": 0.9812710881233215, "learning_rate": 0.001, "loss": 2.4319, "step": 64400 }, { "epoch": 20.846800258564965, "grad_norm": 1.0452686548233032, "learning_rate": 0.001, "loss": 2.409, "step": 64500 }, { "epoch": 20.87912087912088, "grad_norm": 0.8607979416847229, "learning_rate": 0.001, "loss": 2.4109, "step": 64600 }, { "epoch": 20.911441499676794, "grad_norm": 1.2818719148635864, "learning_rate": 0.001, "loss": 2.4038, "step": 64700 }, { "epoch": 20.94376212023271, "grad_norm": 0.885199785232544, "learning_rate": 0.001, "loss": 2.4233, "step": 64800 }, { "epoch": 20.976082740788623, "grad_norm": 0.9898984432220459, "learning_rate": 0.001, "loss": 2.41, "step": 64900 }, { "epoch": 21.008403361344538, "grad_norm": 0.8644759058952332, "learning_rate": 0.001, "loss": 2.3982, "step": 65000 }, { "epoch": 21.040723981900452, "grad_norm": 0.9438836574554443, "learning_rate": 0.001, "loss": 2.3011, "step": 65100 }, { "epoch": 21.073044602456367, "grad_norm": 0.9586792588233948, "learning_rate": 0.001, "loss": 2.297, "step": 65200 }, { "epoch": 21.10536522301228, "grad_norm": 1.5962207317352295, "learning_rate": 0.001, "loss": 2.3051, "step": 65300 }, { "epoch": 21.137685843568196, "grad_norm": 0.8631432056427002, "learning_rate": 0.001, "loss": 2.3267, "step": 65400 }, { "epoch": 21.17000646412411, "grad_norm": 1.1386375427246094, "learning_rate": 0.001, "loss": 2.338, "step": 65500 }, { "epoch": 21.202327084680025, "grad_norm": 1.4897397756576538, "learning_rate": 0.001, "loss": 2.3367, "step": 65600 }, { "epoch": 21.23464770523594, "grad_norm": 1.0409691333770752, "learning_rate": 0.001, "loss": 2.3314, "step": 65700 }, { "epoch": 21.266968325791854, "grad_norm": 1.1130894422531128, "learning_rate": 0.001, "loss": 2.3314, "step": 65800 }, { "epoch": 21.29928894634777, "grad_norm": 1.1224957704544067, "learning_rate": 0.001, "loss": 2.3371, "step": 65900 }, { "epoch": 21.331609566903683, "grad_norm": 1.1175363063812256, "learning_rate": 0.001, "loss": 2.346, "step": 66000 }, { "epoch": 21.363930187459598, "grad_norm": 1.0475705862045288, "learning_rate": 0.001, "loss": 2.3292, "step": 66100 }, { "epoch": 21.396250808015512, "grad_norm": 0.9737367033958435, "learning_rate": 0.001, "loss": 2.3323, "step": 66200 }, { "epoch": 21.428571428571427, "grad_norm": 1.0883287191390991, "learning_rate": 0.001, "loss": 2.3302, "step": 66300 }, { "epoch": 21.46089204912734, "grad_norm": 1.2491554021835327, "learning_rate": 0.001, "loss": 2.3637, "step": 66400 }, { "epoch": 21.49321266968326, "grad_norm": 1.4668452739715576, "learning_rate": 0.001, "loss": 2.3435, "step": 66500 }, { "epoch": 21.525533290239174, "grad_norm": 0.9797831773757935, "learning_rate": 0.001, "loss": 2.3387, "step": 66600 }, { "epoch": 21.55785391079509, "grad_norm": 0.9604955315589905, "learning_rate": 0.001, "loss": 2.3681, "step": 66700 }, { "epoch": 21.590174531351003, "grad_norm": 1.1717238426208496, "learning_rate": 0.001, "loss": 2.3543, "step": 66800 }, { "epoch": 21.622495151906918, "grad_norm": 0.8783063888549805, "learning_rate": 0.001, "loss": 2.3617, "step": 66900 }, { "epoch": 21.654815772462833, "grad_norm": 1.0490163564682007, "learning_rate": 0.001, "loss": 2.3799, "step": 67000 }, { "epoch": 21.687136393018747, "grad_norm": 1.2691450119018555, "learning_rate": 0.001, "loss": 2.3761, "step": 67100 }, { "epoch": 21.71945701357466, "grad_norm": 1.249742865562439, "learning_rate": 0.001, "loss": 2.3873, "step": 67200 }, { "epoch": 21.751777634130576, "grad_norm": 1.089523196220398, "learning_rate": 0.001, "loss": 2.3821, "step": 67300 }, { "epoch": 21.78409825468649, "grad_norm": 1.1108379364013672, "learning_rate": 0.001, "loss": 2.3644, "step": 67400 }, { "epoch": 21.816418875242405, "grad_norm": 1.2876147031784058, "learning_rate": 0.001, "loss": 2.377, "step": 67500 }, { "epoch": 21.84873949579832, "grad_norm": 1.2201858758926392, "learning_rate": 0.001, "loss": 2.3722, "step": 67600 }, { "epoch": 21.881060116354234, "grad_norm": 1.0787039995193481, "learning_rate": 0.001, "loss": 2.3805, "step": 67700 }, { "epoch": 21.91338073691015, "grad_norm": 1.1165393590927124, "learning_rate": 0.001, "loss": 2.3913, "step": 67800 }, { "epoch": 21.945701357466064, "grad_norm": 0.8625513315200806, "learning_rate": 0.001, "loss": 2.3901, "step": 67900 }, { "epoch": 21.978021978021978, "grad_norm": 0.8368663191795349, "learning_rate": 0.001, "loss": 2.4074, "step": 68000 }, { "epoch": 22.010342598577893, "grad_norm": 3.856996536254883, "learning_rate": 0.001, "loss": 2.3562, "step": 68100 }, { "epoch": 22.042663219133807, "grad_norm": 1.1679662466049194, "learning_rate": 0.001, "loss": 2.2785, "step": 68200 }, { "epoch": 22.07498383968972, "grad_norm": 0.8213762044906616, "learning_rate": 0.001, "loss": 2.2896, "step": 68300 }, { "epoch": 22.107304460245636, "grad_norm": 1.5368238687515259, "learning_rate": 0.001, "loss": 2.2667, "step": 68400 }, { "epoch": 22.13962508080155, "grad_norm": 1.0807921886444092, "learning_rate": 0.001, "loss": 2.2694, "step": 68500 }, { "epoch": 22.171945701357465, "grad_norm": 1.1393674612045288, "learning_rate": 0.001, "loss": 2.2975, "step": 68600 }, { "epoch": 22.20426632191338, "grad_norm": 0.9241352677345276, "learning_rate": 0.001, "loss": 2.3101, "step": 68700 }, { "epoch": 22.236586942469295, "grad_norm": 2.782007932662964, "learning_rate": 0.001, "loss": 2.314, "step": 68800 }, { "epoch": 22.26890756302521, "grad_norm": 1.119933009147644, "learning_rate": 0.001, "loss": 2.3097, "step": 68900 }, { "epoch": 22.301228183581124, "grad_norm": 0.9576383829116821, "learning_rate": 0.001, "loss": 2.3261, "step": 69000 }, { "epoch": 22.33354880413704, "grad_norm": 1.4238148927688599, "learning_rate": 0.001, "loss": 2.3033, "step": 69100 }, { "epoch": 22.365869424692953, "grad_norm": 0.9853901863098145, "learning_rate": 0.001, "loss": 2.326, "step": 69200 }, { "epoch": 22.398190045248867, "grad_norm": 4.695359230041504, "learning_rate": 0.001, "loss": 2.3155, "step": 69300 }, { "epoch": 22.430510665804782, "grad_norm": 2.7994208335876465, "learning_rate": 0.001, "loss": 2.3066, "step": 69400 }, { "epoch": 22.462831286360696, "grad_norm": 1.8417226076126099, "learning_rate": 0.001, "loss": 2.312, "step": 69500 }, { "epoch": 22.49515190691661, "grad_norm": 1.1564440727233887, "learning_rate": 0.001, "loss": 2.3097, "step": 69600 }, { "epoch": 22.52747252747253, "grad_norm": 1.0256704092025757, "learning_rate": 0.001, "loss": 2.337, "step": 69700 }, { "epoch": 22.559793148028444, "grad_norm": 1.3998005390167236, "learning_rate": 0.001, "loss": 2.3303, "step": 69800 }, { "epoch": 22.59211376858436, "grad_norm": 1.0680184364318848, "learning_rate": 0.001, "loss": 2.3296, "step": 69900 }, { "epoch": 22.624434389140273, "grad_norm": 0.9365416765213013, "learning_rate": 0.001, "loss": 2.3542, "step": 70000 }, { "epoch": 22.656755009696187, "grad_norm": 0.8922919034957886, "learning_rate": 0.001, "loss": 2.3507, "step": 70100 }, { "epoch": 22.689075630252102, "grad_norm": 1.119478464126587, "learning_rate": 0.001, "loss": 2.3563, "step": 70200 }, { "epoch": 22.721396250808017, "grad_norm": 0.9544029831886292, "learning_rate": 0.001, "loss": 2.3385, "step": 70300 }, { "epoch": 22.75371687136393, "grad_norm": 1.0548988580703735, "learning_rate": 0.001, "loss": 2.3463, "step": 70400 }, { "epoch": 22.786037491919846, "grad_norm": 4.401344299316406, "learning_rate": 0.001, "loss": 2.35, "step": 70500 }, { "epoch": 22.81835811247576, "grad_norm": 0.9633042812347412, "learning_rate": 0.001, "loss": 2.3525, "step": 70600 }, { "epoch": 22.850678733031675, "grad_norm": 1.1808711290359497, "learning_rate": 0.001, "loss": 2.3619, "step": 70700 }, { "epoch": 22.88299935358759, "grad_norm": 1.4953620433807373, "learning_rate": 0.001, "loss": 2.3585, "step": 70800 }, { "epoch": 22.915319974143504, "grad_norm": 0.8753530383110046, "learning_rate": 0.001, "loss": 2.3568, "step": 70900 }, { "epoch": 22.94764059469942, "grad_norm": 2.5827279090881348, "learning_rate": 0.001, "loss": 2.3598, "step": 71000 }, { "epoch": 22.979961215255333, "grad_norm": 1.084316611289978, "learning_rate": 0.001, "loss": 2.3881, "step": 71100 }, { "epoch": 23.012281835811248, "grad_norm": 0.9306713342666626, "learning_rate": 0.001, "loss": 2.3112, "step": 71200 }, { "epoch": 23.044602456367162, "grad_norm": 3.2590067386627197, "learning_rate": 0.001, "loss": 2.2374, "step": 71300 }, { "epoch": 23.076923076923077, "grad_norm": 2.478151559829712, "learning_rate": 0.001, "loss": 2.2595, "step": 71400 }, { "epoch": 23.10924369747899, "grad_norm": 1.5168392658233643, "learning_rate": 0.001, "loss": 2.27, "step": 71500 }, { "epoch": 23.141564318034906, "grad_norm": 1.6399967670440674, "learning_rate": 0.001, "loss": 2.2667, "step": 71600 }, { "epoch": 23.17388493859082, "grad_norm": 0.9589540958404541, "learning_rate": 0.001, "loss": 2.2641, "step": 71700 }, { "epoch": 23.206205559146735, "grad_norm": 1.291298747062683, "learning_rate": 0.001, "loss": 2.2849, "step": 71800 }, { "epoch": 23.23852617970265, "grad_norm": 1.4648727178573608, "learning_rate": 0.001, "loss": 2.2824, "step": 71900 }, { "epoch": 23.270846800258564, "grad_norm": 1.0592926740646362, "learning_rate": 0.001, "loss": 2.2675, "step": 72000 }, { "epoch": 23.30316742081448, "grad_norm": 1.1572728157043457, "learning_rate": 0.001, "loss": 2.2846, "step": 72100 }, { "epoch": 23.335488041370393, "grad_norm": 1.5121734142303467, "learning_rate": 0.001, "loss": 2.2889, "step": 72200 }, { "epoch": 23.367808661926308, "grad_norm": 0.9962171316146851, "learning_rate": 0.001, "loss": 2.285, "step": 72300 }, { "epoch": 23.400129282482222, "grad_norm": 1.1676732301712036, "learning_rate": 0.001, "loss": 2.306, "step": 72400 }, { "epoch": 23.432449903038137, "grad_norm": 1.1063849925994873, "learning_rate": 0.001, "loss": 2.2873, "step": 72500 }, { "epoch": 23.46477052359405, "grad_norm": 1.4836238622665405, "learning_rate": 0.001, "loss": 2.305, "step": 72600 }, { "epoch": 23.49709114414997, "grad_norm": 1.351938247680664, "learning_rate": 0.001, "loss": 2.3072, "step": 72700 }, { "epoch": 23.529411764705884, "grad_norm": 1.3444651365280151, "learning_rate": 0.001, "loss": 2.2978, "step": 72800 }, { "epoch": 23.5617323852618, "grad_norm": 1.615316390991211, "learning_rate": 0.001, "loss": 2.3184, "step": 72900 }, { "epoch": 23.594053005817713, "grad_norm": 0.9772735238075256, "learning_rate": 0.001, "loss": 2.3023, "step": 73000 }, { "epoch": 23.626373626373628, "grad_norm": 1.0487920045852661, "learning_rate": 0.001, "loss": 2.3082, "step": 73100 }, { "epoch": 23.658694246929542, "grad_norm": 1.4884216785430908, "learning_rate": 0.001, "loss": 2.3206, "step": 73200 }, { "epoch": 23.691014867485457, "grad_norm": 1.3372656106948853, "learning_rate": 0.001, "loss": 2.3219, "step": 73300 }, { "epoch": 23.72333548804137, "grad_norm": 1.463338017463684, "learning_rate": 0.001, "loss": 2.321, "step": 73400 }, { "epoch": 23.755656108597286, "grad_norm": 1.1713473796844482, "learning_rate": 0.001, "loss": 2.3162, "step": 73500 }, { "epoch": 23.7879767291532, "grad_norm": 1.4164543151855469, "learning_rate": 0.001, "loss": 2.3472, "step": 73600 }, { "epoch": 23.820297349709115, "grad_norm": 1.3859729766845703, "learning_rate": 0.001, "loss": 2.333, "step": 73700 }, { "epoch": 23.85261797026503, "grad_norm": 1.201858639717102, "learning_rate": 0.001, "loss": 2.3198, "step": 73800 }, { "epoch": 23.884938590820944, "grad_norm": 1.031951665878296, "learning_rate": 0.001, "loss": 2.3278, "step": 73900 }, { "epoch": 23.91725921137686, "grad_norm": 1.1423040628433228, "learning_rate": 0.001, "loss": 2.3259, "step": 74000 }, { "epoch": 23.949579831932773, "grad_norm": 1.2927318811416626, "learning_rate": 0.001, "loss": 2.3504, "step": 74100 }, { "epoch": 23.981900452488688, "grad_norm": 1.3169260025024414, "learning_rate": 0.001, "loss": 2.3494, "step": 74200 }, { "epoch": 24.014221073044602, "grad_norm": 1.7082717418670654, "learning_rate": 0.001, "loss": 2.2751, "step": 74300 }, { "epoch": 24.046541693600517, "grad_norm": 1.192042350769043, "learning_rate": 0.001, "loss": 2.2094, "step": 74400 }, { "epoch": 24.07886231415643, "grad_norm": 1.5198389291763306, "learning_rate": 0.001, "loss": 2.234, "step": 74500 }, { "epoch": 24.111182934712346, "grad_norm": 1.3938764333724976, "learning_rate": 0.001, "loss": 2.2351, "step": 74600 }, { "epoch": 24.14350355526826, "grad_norm": 1.2057175636291504, "learning_rate": 0.001, "loss": 2.2495, "step": 74700 }, { "epoch": 24.175824175824175, "grad_norm": 1.147182822227478, "learning_rate": 0.001, "loss": 2.2234, "step": 74800 }, { "epoch": 24.20814479638009, "grad_norm": 1.3473224639892578, "learning_rate": 0.001, "loss": 2.2254, "step": 74900 }, { "epoch": 24.240465416936004, "grad_norm": 1.2540322542190552, "learning_rate": 0.001, "loss": 2.2459, "step": 75000 }, { "epoch": 24.27278603749192, "grad_norm": 1.1672024726867676, "learning_rate": 0.001, "loss": 2.2351, "step": 75100 }, { "epoch": 24.305106658047833, "grad_norm": 1.6299784183502197, "learning_rate": 0.001, "loss": 2.2677, "step": 75200 }, { "epoch": 24.337427278603748, "grad_norm": 1.9479172229766846, "learning_rate": 0.001, "loss": 2.2551, "step": 75300 }, { "epoch": 24.369747899159663, "grad_norm": 1.5675148963928223, "learning_rate": 0.001, "loss": 2.2672, "step": 75400 }, { "epoch": 24.402068519715577, "grad_norm": 1.5553711652755737, "learning_rate": 0.001, "loss": 2.2825, "step": 75500 }, { "epoch": 24.43438914027149, "grad_norm": 10.259523391723633, "learning_rate": 0.001, "loss": 2.2485, "step": 75600 }, { "epoch": 24.466709760827406, "grad_norm": 1.1620965003967285, "learning_rate": 0.001, "loss": 2.2766, "step": 75700 }, { "epoch": 24.49903038138332, "grad_norm": 1.401378870010376, "learning_rate": 0.001, "loss": 2.2686, "step": 75800 }, { "epoch": 24.53135100193924, "grad_norm": 4.824052333831787, "learning_rate": 0.001, "loss": 2.2715, "step": 75900 }, { "epoch": 24.563671622495153, "grad_norm": 1.1687517166137695, "learning_rate": 0.001, "loss": 2.2927, "step": 76000 }, { "epoch": 24.595992243051068, "grad_norm": 1.6928391456604004, "learning_rate": 0.001, "loss": 2.2871, "step": 76100 }, { "epoch": 24.628312863606983, "grad_norm": 1.3831573724746704, "learning_rate": 0.001, "loss": 2.2921, "step": 76200 }, { "epoch": 24.660633484162897, "grad_norm": 1.2568433284759521, "learning_rate": 0.001, "loss": 2.3051, "step": 76300 }, { "epoch": 24.69295410471881, "grad_norm": 1.2737929821014404, "learning_rate": 0.001, "loss": 2.2884, "step": 76400 }, { "epoch": 24.725274725274726, "grad_norm": 1.5951337814331055, "learning_rate": 0.001, "loss": 2.29, "step": 76500 }, { "epoch": 24.75759534583064, "grad_norm": 1.684795618057251, "learning_rate": 0.001, "loss": 2.3079, "step": 76600 }, { "epoch": 24.789915966386555, "grad_norm": 3.8057632446289062, "learning_rate": 0.001, "loss": 2.2988, "step": 76700 }, { "epoch": 24.82223658694247, "grad_norm": 1.263152003288269, "learning_rate": 0.001, "loss": 2.3103, "step": 76800 }, { "epoch": 24.854557207498384, "grad_norm": 1.2463979721069336, "learning_rate": 0.001, "loss": 2.2916, "step": 76900 }, { "epoch": 24.8868778280543, "grad_norm": 1.428267002105713, "learning_rate": 0.001, "loss": 2.3097, "step": 77000 }, { "epoch": 24.919198448610214, "grad_norm": 1.2961901426315308, "learning_rate": 0.001, "loss": 2.3208, "step": 77100 }, { "epoch": 24.951519069166128, "grad_norm": 1.5123456716537476, "learning_rate": 0.001, "loss": 2.326, "step": 77200 }, { "epoch": 24.983839689722043, "grad_norm": 1.0409616231918335, "learning_rate": 0.001, "loss": 2.3152, "step": 77300 }, { "epoch": 25.016160310277957, "grad_norm": 1.6853736639022827, "learning_rate": 0.001, "loss": 2.239, "step": 77400 }, { "epoch": 25.048480930833872, "grad_norm": 1.911928653717041, "learning_rate": 0.001, "loss": 2.2062, "step": 77500 }, { "epoch": 25.080801551389786, "grad_norm": 2.460665702819824, "learning_rate": 0.001, "loss": 2.2078, "step": 77600 }, { "epoch": 25.1131221719457, "grad_norm": 1.558672308921814, "learning_rate": 0.001, "loss": 2.2228, "step": 77700 }, { "epoch": 25.145442792501616, "grad_norm": 2.149746894836426, "learning_rate": 0.001, "loss": 2.2287, "step": 77800 }, { "epoch": 25.17776341305753, "grad_norm": 1.8480651378631592, "learning_rate": 0.001, "loss": 2.2123, "step": 77900 }, { "epoch": 25.210084033613445, "grad_norm": 1.9073114395141602, "learning_rate": 0.001, "loss": 2.2098, "step": 78000 }, { "epoch": 25.24240465416936, "grad_norm": 2.053379535675049, "learning_rate": 0.001, "loss": 2.2102, "step": 78100 }, { "epoch": 25.274725274725274, "grad_norm": 3.227494955062866, "learning_rate": 0.001, "loss": 2.2488, "step": 78200 }, { "epoch": 25.30704589528119, "grad_norm": 1.980688214302063, "learning_rate": 0.001, "loss": 2.225, "step": 78300 }, { "epoch": 25.339366515837103, "grad_norm": 2.000521659851074, "learning_rate": 0.001, "loss": 2.2266, "step": 78400 }, { "epoch": 25.371687136393017, "grad_norm": 2.2884345054626465, "learning_rate": 0.001, "loss": 2.2111, "step": 78500 }, { "epoch": 25.404007756948932, "grad_norm": 2.1715219020843506, "learning_rate": 0.001, "loss": 2.2356, "step": 78600 }, { "epoch": 25.436328377504847, "grad_norm": 2.1534175872802734, "learning_rate": 0.001, "loss": 2.2424, "step": 78700 }, { "epoch": 25.46864899806076, "grad_norm": 1.9701682329177856, "learning_rate": 0.001, "loss": 2.249, "step": 78800 }, { "epoch": 25.50096961861668, "grad_norm": 1.5657604932785034, "learning_rate": 0.001, "loss": 2.2518, "step": 78900 }, { "epoch": 25.533290239172594, "grad_norm": 1.7488117218017578, "learning_rate": 0.001, "loss": 2.2555, "step": 79000 }, { "epoch": 25.56561085972851, "grad_norm": 2.088778495788574, "learning_rate": 0.001, "loss": 2.2546, "step": 79100 }, { "epoch": 25.597931480284423, "grad_norm": 1.9367692470550537, "learning_rate": 0.001, "loss": 2.2721, "step": 79200 }, { "epoch": 25.630252100840337, "grad_norm": 1.5775336027145386, "learning_rate": 0.001, "loss": 2.2623, "step": 79300 }, { "epoch": 25.662572721396252, "grad_norm": 1.8892109394073486, "learning_rate": 0.001, "loss": 2.2813, "step": 79400 }, { "epoch": 25.694893341952167, "grad_norm": 1.7530161142349243, "learning_rate": 0.001, "loss": 2.2818, "step": 79500 }, { "epoch": 25.72721396250808, "grad_norm": 1.7765891551971436, "learning_rate": 0.001, "loss": 2.2833, "step": 79600 }, { "epoch": 25.759534583063996, "grad_norm": 1.7423548698425293, "learning_rate": 0.001, "loss": 2.2767, "step": 79700 }, { "epoch": 25.79185520361991, "grad_norm": 2.3986012935638428, "learning_rate": 0.001, "loss": 2.2904, "step": 79800 }, { "epoch": 25.824175824175825, "grad_norm": 1.88714599609375, "learning_rate": 0.001, "loss": 2.2667, "step": 79900 }, { "epoch": 25.85649644473174, "grad_norm": 1.7131158113479614, "learning_rate": 0.001, "loss": 2.2945, "step": 80000 }, { "epoch": 25.888817065287654, "grad_norm": 1.7139971256256104, "learning_rate": 0.001, "loss": 2.2759, "step": 80100 }, { "epoch": 25.92113768584357, "grad_norm": 2.6489851474761963, "learning_rate": 0.001, "loss": 2.3036, "step": 80200 }, { "epoch": 25.953458306399483, "grad_norm": 1.595976710319519, "learning_rate": 0.001, "loss": 2.2829, "step": 80300 }, { "epoch": 25.985778926955398, "grad_norm": 1.4456608295440674, "learning_rate": 0.001, "loss": 2.3096, "step": 80400 }, { "epoch": 26.018099547511312, "grad_norm": 1.4020755290985107, "learning_rate": 0.001, "loss": 2.2125, "step": 80500 }, { "epoch": 26.050420168067227, "grad_norm": 1.2599384784698486, "learning_rate": 0.001, "loss": 2.1567, "step": 80600 }, { "epoch": 26.08274078862314, "grad_norm": 2.7306346893310547, "learning_rate": 0.001, "loss": 2.1702, "step": 80700 }, { "epoch": 26.115061409179056, "grad_norm": 1.177088975906372, "learning_rate": 0.001, "loss": 2.1623, "step": 80800 }, { "epoch": 26.14738202973497, "grad_norm": 1.378434181213379, "learning_rate": 0.001, "loss": 2.203, "step": 80900 }, { "epoch": 26.179702650290885, "grad_norm": 1.258918046951294, "learning_rate": 0.001, "loss": 2.206, "step": 81000 }, { "epoch": 26.2120232708468, "grad_norm": 2.0789012908935547, "learning_rate": 0.001, "loss": 2.2114, "step": 81100 }, { "epoch": 26.244343891402714, "grad_norm": 1.6580952405929565, "learning_rate": 0.001, "loss": 2.201, "step": 81200 }, { "epoch": 26.27666451195863, "grad_norm": 1.432867407798767, "learning_rate": 0.001, "loss": 2.2149, "step": 81300 }, { "epoch": 26.308985132514543, "grad_norm": 1.0452007055282593, "learning_rate": 0.001, "loss": 2.2246, "step": 81400 }, { "epoch": 26.341305753070458, "grad_norm": 1.3239336013793945, "learning_rate": 0.001, "loss": 2.2263, "step": 81500 }, { "epoch": 26.373626373626372, "grad_norm": 1.519089698791504, "learning_rate": 0.001, "loss": 2.2012, "step": 81600 }, { "epoch": 26.405946994182287, "grad_norm": 0.9804332256317139, "learning_rate": 0.001, "loss": 2.2323, "step": 81700 }, { "epoch": 26.4382676147382, "grad_norm": 2.368727684020996, "learning_rate": 0.001, "loss": 2.236, "step": 81800 }, { "epoch": 26.470588235294116, "grad_norm": 6.719095706939697, "learning_rate": 0.001, "loss": 2.2294, "step": 81900 }, { "epoch": 26.50290885585003, "grad_norm": 1.2683913707733154, "learning_rate": 0.001, "loss": 2.2318, "step": 82000 }, { "epoch": 26.53522947640595, "grad_norm": 1.159319281578064, "learning_rate": 0.001, "loss": 2.249, "step": 82100 }, { "epoch": 26.567550096961863, "grad_norm": 1.3529342412948608, "learning_rate": 0.001, "loss": 2.2378, "step": 82200 }, { "epoch": 26.599870717517778, "grad_norm": 1.221890926361084, "learning_rate": 0.001, "loss": 2.25, "step": 82300 }, { "epoch": 26.632191338073692, "grad_norm": 1.4131715297698975, "learning_rate": 0.001, "loss": 2.2573, "step": 82400 }, { "epoch": 26.664511958629607, "grad_norm": 1.7766857147216797, "learning_rate": 0.001, "loss": 2.2598, "step": 82500 }, { "epoch": 26.69683257918552, "grad_norm": 1.225677251815796, "learning_rate": 0.001, "loss": 2.2602, "step": 82600 }, { "epoch": 26.729153199741436, "grad_norm": 1.6334868669509888, "learning_rate": 0.001, "loss": 2.2519, "step": 82700 }, { "epoch": 26.76147382029735, "grad_norm": 1.1198945045471191, "learning_rate": 0.001, "loss": 2.2749, "step": 82800 }, { "epoch": 26.793794440853265, "grad_norm": 1.1847219467163086, "learning_rate": 0.001, "loss": 2.2628, "step": 82900 }, { "epoch": 26.82611506140918, "grad_norm": 1.7721208333969116, "learning_rate": 0.001, "loss": 2.2734, "step": 83000 }, { "epoch": 26.858435681965094, "grad_norm": 1.3676013946533203, "learning_rate": 0.001, "loss": 2.2815, "step": 83100 }, { "epoch": 26.89075630252101, "grad_norm": 1.3370088338851929, "learning_rate": 0.001, "loss": 2.2801, "step": 83200 }, { "epoch": 26.923076923076923, "grad_norm": 1.9713144302368164, "learning_rate": 0.001, "loss": 2.2802, "step": 83300 }, { "epoch": 26.955397543632838, "grad_norm": 1.1067978143692017, "learning_rate": 0.001, "loss": 2.2669, "step": 83400 }, { "epoch": 26.987718164188752, "grad_norm": 1.7385209798812866, "learning_rate": 0.001, "loss": 2.2699, "step": 83500 }, { "epoch": 27.020038784744667, "grad_norm": 1.1542168855667114, "learning_rate": 0.001, "loss": 2.221, "step": 83600 }, { "epoch": 27.05235940530058, "grad_norm": 1.0730926990509033, "learning_rate": 0.001, "loss": 2.1628, "step": 83700 }, { "epoch": 27.084680025856496, "grad_norm": 2.0200247764587402, "learning_rate": 0.001, "loss": 2.1666, "step": 83800 }, { "epoch": 27.11700064641241, "grad_norm": 1.0857988595962524, "learning_rate": 0.001, "loss": 2.1806, "step": 83900 }, { "epoch": 27.149321266968325, "grad_norm": 1.274859070777893, "learning_rate": 0.001, "loss": 2.1722, "step": 84000 }, { "epoch": 27.18164188752424, "grad_norm": 0.989295244216919, "learning_rate": 0.001, "loss": 2.1986, "step": 84100 }, { "epoch": 27.213962508080154, "grad_norm": 1.2829957008361816, "learning_rate": 0.001, "loss": 2.1494, "step": 84200 }, { "epoch": 27.24628312863607, "grad_norm": 1.097731590270996, "learning_rate": 0.001, "loss": 2.1764, "step": 84300 }, { "epoch": 27.278603749191983, "grad_norm": 1.2137269973754883, "learning_rate": 0.001, "loss": 2.1769, "step": 84400 }, { "epoch": 27.310924369747898, "grad_norm": 1.0092753171920776, "learning_rate": 0.001, "loss": 2.1885, "step": 84500 }, { "epoch": 27.343244990303813, "grad_norm": 1.2095162868499756, "learning_rate": 0.001, "loss": 2.2131, "step": 84600 }, { "epoch": 27.375565610859727, "grad_norm": 1.31984543800354, "learning_rate": 0.001, "loss": 2.2029, "step": 84700 }, { "epoch": 27.40788623141564, "grad_norm": 1.1394065618515015, "learning_rate": 0.001, "loss": 2.1935, "step": 84800 }, { "epoch": 27.440206851971556, "grad_norm": 1.1504780054092407, "learning_rate": 0.001, "loss": 2.1948, "step": 84900 }, { "epoch": 27.47252747252747, "grad_norm": 1.305922508239746, "learning_rate": 0.001, "loss": 2.1989, "step": 85000 }, { "epoch": 27.50484809308339, "grad_norm": 1.1728363037109375, "learning_rate": 0.001, "loss": 2.2263, "step": 85100 }, { "epoch": 27.537168713639304, "grad_norm": 1.5369746685028076, "learning_rate": 0.001, "loss": 2.2237, "step": 85200 }, { "epoch": 27.569489334195218, "grad_norm": 0.9398338794708252, "learning_rate": 0.001, "loss": 2.2278, "step": 85300 }, { "epoch": 27.601809954751133, "grad_norm": 1.1463308334350586, "learning_rate": 0.001, "loss": 2.2215, "step": 85400 }, { "epoch": 27.634130575307047, "grad_norm": 1.8843040466308594, "learning_rate": 0.001, "loss": 2.2234, "step": 85500 }, { "epoch": 27.66645119586296, "grad_norm": 2.032595157623291, "learning_rate": 0.001, "loss": 2.2183, "step": 85600 }, { "epoch": 27.698771816418876, "grad_norm": 1.206725835800171, "learning_rate": 0.001, "loss": 2.2369, "step": 85700 }, { "epoch": 27.73109243697479, "grad_norm": 1.5010161399841309, "learning_rate": 0.001, "loss": 2.2279, "step": 85800 }, { "epoch": 27.763413057530705, "grad_norm": 1.5452054738998413, "learning_rate": 0.001, "loss": 2.2237, "step": 85900 }, { "epoch": 27.79573367808662, "grad_norm": 1.5035840272903442, "learning_rate": 0.001, "loss": 2.2342, "step": 86000 }, { "epoch": 27.828054298642535, "grad_norm": 1.000514268875122, "learning_rate": 0.001, "loss": 2.2292, "step": 86100 }, { "epoch": 27.86037491919845, "grad_norm": 0.9521571397781372, "learning_rate": 0.001, "loss": 2.2498, "step": 86200 }, { "epoch": 27.892695539754364, "grad_norm": 1.0766384601593018, "learning_rate": 0.001, "loss": 2.2316, "step": 86300 }, { "epoch": 27.92501616031028, "grad_norm": 1.8439937829971313, "learning_rate": 0.001, "loss": 2.2448, "step": 86400 }, { "epoch": 27.957336780866193, "grad_norm": 1.1737998723983765, "learning_rate": 0.001, "loss": 2.2582, "step": 86500 }, { "epoch": 27.989657401422107, "grad_norm": 1.1375584602355957, "learning_rate": 0.001, "loss": 2.2505, "step": 86600 }, { "epoch": 28.021978021978022, "grad_norm": 1.3607385158538818, "learning_rate": 0.001, "loss": 2.1904, "step": 86700 }, { "epoch": 28.054298642533936, "grad_norm": 1.0860687494277954, "learning_rate": 0.001, "loss": 2.1316, "step": 86800 }, { "epoch": 28.08661926308985, "grad_norm": 1.070517897605896, "learning_rate": 0.001, "loss": 2.1231, "step": 86900 }, { "epoch": 28.118939883645766, "grad_norm": 1.1106133460998535, "learning_rate": 0.001, "loss": 2.1401, "step": 87000 }, { "epoch": 28.15126050420168, "grad_norm": 1.6354790925979614, "learning_rate": 0.001, "loss": 2.1314, "step": 87100 }, { "epoch": 28.183581124757595, "grad_norm": 28.03631591796875, "learning_rate": 0.001, "loss": 2.1455, "step": 87200 }, { "epoch": 28.21590174531351, "grad_norm": 1.3974888324737549, "learning_rate": 0.001, "loss": 2.1636, "step": 87300 }, { "epoch": 28.248222365869424, "grad_norm": 1.1488933563232422, "learning_rate": 0.001, "loss": 2.1683, "step": 87400 }, { "epoch": 28.28054298642534, "grad_norm": 1.0957202911376953, "learning_rate": 0.001, "loss": 2.1714, "step": 87500 }, { "epoch": 28.312863606981253, "grad_norm": 1.2515236139297485, "learning_rate": 0.001, "loss": 2.161, "step": 87600 }, { "epoch": 28.345184227537167, "grad_norm": 1.1955662965774536, "learning_rate": 0.001, "loss": 2.1599, "step": 87700 }, { "epoch": 28.377504848093082, "grad_norm": 1.0725890398025513, "learning_rate": 0.001, "loss": 2.2005, "step": 87800 }, { "epoch": 28.409825468648997, "grad_norm": 1.2410542964935303, "learning_rate": 0.001, "loss": 2.1743, "step": 87900 }, { "epoch": 28.44214608920491, "grad_norm": 1.2690292596817017, "learning_rate": 0.001, "loss": 2.1696, "step": 88000 }, { "epoch": 28.474466709760826, "grad_norm": 1.1697804927825928, "learning_rate": 0.001, "loss": 2.1874, "step": 88100 }, { "epoch": 28.50678733031674, "grad_norm": 1.0169364213943481, "learning_rate": 0.001, "loss": 2.188, "step": 88200 }, { "epoch": 28.53910795087266, "grad_norm": 0.8271397948265076, "learning_rate": 0.001, "loss": 2.1988, "step": 88300 }, { "epoch": 28.571428571428573, "grad_norm": 1.2231976985931396, "learning_rate": 0.001, "loss": 2.2002, "step": 88400 }, { "epoch": 28.603749191984488, "grad_norm": 1.2481681108474731, "learning_rate": 0.001, "loss": 2.2185, "step": 88500 }, { "epoch": 28.636069812540402, "grad_norm": 1.3616920709609985, "learning_rate": 0.001, "loss": 2.21, "step": 88600 }, { "epoch": 28.668390433096317, "grad_norm": 1.0401321649551392, "learning_rate": 0.001, "loss": 2.2071, "step": 88700 }, { "epoch": 28.70071105365223, "grad_norm": 0.976053774356842, "learning_rate": 0.001, "loss": 2.2129, "step": 88800 }, { "epoch": 28.733031674208146, "grad_norm": 1.392301082611084, "learning_rate": 0.001, "loss": 2.2093, "step": 88900 }, { "epoch": 28.76535229476406, "grad_norm": 1.0414776802062988, "learning_rate": 0.001, "loss": 2.2196, "step": 89000 }, { "epoch": 28.797672915319975, "grad_norm": 1.9845554828643799, "learning_rate": 0.001, "loss": 2.2055, "step": 89100 }, { "epoch": 28.82999353587589, "grad_norm": 10.93977165222168, "learning_rate": 0.001, "loss": 2.2156, "step": 89200 }, { "epoch": 28.862314156431804, "grad_norm": 0.9448211789131165, "learning_rate": 0.001, "loss": 2.2177, "step": 89300 }, { "epoch": 28.89463477698772, "grad_norm": 0.8784899115562439, "learning_rate": 0.001, "loss": 2.2053, "step": 89400 }, { "epoch": 28.926955397543633, "grad_norm": 1.4218798875808716, "learning_rate": 0.001, "loss": 2.2227, "step": 89500 }, { "epoch": 28.959276018099548, "grad_norm": 1.2364318370819092, "learning_rate": 0.001, "loss": 2.2177, "step": 89600 }, { "epoch": 28.991596638655462, "grad_norm": 1.1323843002319336, "learning_rate": 0.001, "loss": 2.2434, "step": 89700 }, { "epoch": 29.023917259211377, "grad_norm": 0.8343545198440552, "learning_rate": 0.001, "loss": 2.1462, "step": 89800 }, { "epoch": 29.05623787976729, "grad_norm": 1.4199050664901733, "learning_rate": 0.001, "loss": 2.1012, "step": 89900 }, { "epoch": 29.088558500323206, "grad_norm": 1.02996027469635, "learning_rate": 0.001, "loss": 2.1051, "step": 90000 }, { "epoch": 29.12087912087912, "grad_norm": 1.4084829092025757, "learning_rate": 0.001, "loss": 2.1258, "step": 90100 }, { "epoch": 29.153199741435035, "grad_norm": 0.8884556293487549, "learning_rate": 0.001, "loss": 2.1146, "step": 90200 }, { "epoch": 29.18552036199095, "grad_norm": 1.1272987127304077, "learning_rate": 0.001, "loss": 2.1347, "step": 90300 }, { "epoch": 29.217840982546864, "grad_norm": 1.0297330617904663, "learning_rate": 0.001, "loss": 2.1284, "step": 90400 }, { "epoch": 29.25016160310278, "grad_norm": 1.1189401149749756, "learning_rate": 0.001, "loss": 2.1351, "step": 90500 }, { "epoch": 29.282482223658693, "grad_norm": 1.408944845199585, "learning_rate": 0.001, "loss": 2.1353, "step": 90600 }, { "epoch": 29.314802844214608, "grad_norm": 1.3574317693710327, "learning_rate": 0.001, "loss": 2.1521, "step": 90700 }, { "epoch": 29.347123464770522, "grad_norm": 1.1054710149765015, "learning_rate": 0.001, "loss": 2.1603, "step": 90800 }, { "epoch": 29.379444085326437, "grad_norm": 1.1560982465744019, "learning_rate": 0.001, "loss": 2.1429, "step": 90900 }, { "epoch": 29.41176470588235, "grad_norm": 1.1717305183410645, "learning_rate": 0.001, "loss": 2.162, "step": 91000 }, { "epoch": 29.444085326438266, "grad_norm": 1.106182336807251, "learning_rate": 0.001, "loss": 2.1773, "step": 91100 }, { "epoch": 29.47640594699418, "grad_norm": 0.9934647083282471, "learning_rate": 0.001, "loss": 2.1656, "step": 91200 }, { "epoch": 29.5087265675501, "grad_norm": 0.9938405156135559, "learning_rate": 0.001, "loss": 2.1751, "step": 91300 }, { "epoch": 29.541047188106013, "grad_norm": 0.9692370891571045, "learning_rate": 0.001, "loss": 2.1948, "step": 91400 }, { "epoch": 29.573367808661928, "grad_norm": 1.238140344619751, "learning_rate": 0.001, "loss": 2.1738, "step": 91500 }, { "epoch": 29.605688429217842, "grad_norm": 1.7226778268814087, "learning_rate": 0.001, "loss": 2.1615, "step": 91600 }, { "epoch": 29.638009049773757, "grad_norm": 1.4086564779281616, "learning_rate": 0.001, "loss": 2.1834, "step": 91700 }, { "epoch": 29.67032967032967, "grad_norm": 1.4223945140838623, "learning_rate": 0.001, "loss": 2.1997, "step": 91800 }, { "epoch": 29.702650290885586, "grad_norm": 1.1218960285186768, "learning_rate": 0.001, "loss": 2.2017, "step": 91900 }, { "epoch": 29.7349709114415, "grad_norm": 2.2295119762420654, "learning_rate": 0.001, "loss": 2.1965, "step": 92000 }, { "epoch": 29.767291531997415, "grad_norm": 1.2413989305496216, "learning_rate": 0.001, "loss": 2.1923, "step": 92100 }, { "epoch": 29.79961215255333, "grad_norm": 1.0953292846679688, "learning_rate": 0.001, "loss": 2.2173, "step": 92200 }, { "epoch": 29.831932773109244, "grad_norm": 1.1456645727157593, "learning_rate": 0.001, "loss": 2.1983, "step": 92300 }, { "epoch": 29.86425339366516, "grad_norm": 1.0011004209518433, "learning_rate": 0.001, "loss": 2.2122, "step": 92400 }, { "epoch": 29.896574014221073, "grad_norm": 1.0928899049758911, "learning_rate": 0.001, "loss": 2.2137, "step": 92500 }, { "epoch": 29.928894634776988, "grad_norm": 1.0410959720611572, "learning_rate": 0.001, "loss": 2.2171, "step": 92600 }, { "epoch": 29.961215255332903, "grad_norm": 1.3174846172332764, "learning_rate": 0.001, "loss": 2.2112, "step": 92700 }, { "epoch": 29.993535875888817, "grad_norm": 1.1399115324020386, "learning_rate": 0.001, "loss": 2.232, "step": 92800 }, { "epoch": 30.02585649644473, "grad_norm": 27.08103370666504, "learning_rate": 0.001, "loss": 2.115, "step": 92900 }, { "epoch": 30.058177117000646, "grad_norm": 1.2668479681015015, "learning_rate": 0.001, "loss": 2.0992, "step": 93000 }, { "epoch": 30.09049773755656, "grad_norm": 1.2871204614639282, "learning_rate": 0.001, "loss": 2.0968, "step": 93100 }, { "epoch": 30.122818358112475, "grad_norm": 1.2867836952209473, "learning_rate": 0.001, "loss": 2.0963, "step": 93200 }, { "epoch": 30.15513897866839, "grad_norm": 0.9447452425956726, "learning_rate": 0.001, "loss": 2.1017, "step": 93300 }, { "epoch": 30.187459599224304, "grad_norm": 2.0423269271850586, "learning_rate": 0.001, "loss": 2.13, "step": 93400 }, { "epoch": 30.21978021978022, "grad_norm": 1.066361665725708, "learning_rate": 0.001, "loss": 2.116, "step": 93500 }, { "epoch": 30.252100840336134, "grad_norm": 1.039259672164917, "learning_rate": 0.001, "loss": 2.1351, "step": 93600 }, { "epoch": 30.284421460892048, "grad_norm": 1.1280465126037598, "learning_rate": 0.001, "loss": 2.1319, "step": 93700 }, { "epoch": 30.316742081447963, "grad_norm": 1.211856484413147, "learning_rate": 0.001, "loss": 2.1109, "step": 93800 }, { "epoch": 30.349062702003877, "grad_norm": 2.000886917114258, "learning_rate": 0.001, "loss": 2.143, "step": 93900 }, { "epoch": 30.381383322559792, "grad_norm": 5.124105930328369, "learning_rate": 0.001, "loss": 2.1531, "step": 94000 }, { "epoch": 30.413703943115706, "grad_norm": 1.494377851486206, "learning_rate": 0.001, "loss": 2.1589, "step": 94100 }, { "epoch": 30.44602456367162, "grad_norm": 1.9504001140594482, "learning_rate": 0.001, "loss": 2.1439, "step": 94200 }, { "epoch": 30.478345184227535, "grad_norm": 0.9151893854141235, "learning_rate": 0.001, "loss": 2.1725, "step": 94300 }, { "epoch": 30.51066580478345, "grad_norm": 1.1490334272384644, "learning_rate": 0.001, "loss": 2.1743, "step": 94400 }, { "epoch": 30.542986425339368, "grad_norm": 1.0751733779907227, "learning_rate": 0.001, "loss": 2.1583, "step": 94500 }, { "epoch": 30.575307045895283, "grad_norm": 1.0541670322418213, "learning_rate": 0.001, "loss": 2.1722, "step": 94600 }, { "epoch": 30.607627666451197, "grad_norm": 1.2270714044570923, "learning_rate": 0.001, "loss": 2.1721, "step": 94700 }, { "epoch": 30.639948287007112, "grad_norm": 1.0250167846679688, "learning_rate": 0.001, "loss": 2.133, "step": 94800 }, { "epoch": 30.672268907563026, "grad_norm": 4.159157752990723, "learning_rate": 0.001, "loss": 2.1505, "step": 94900 }, { "epoch": 30.70458952811894, "grad_norm": 1.440094232559204, "learning_rate": 0.001, "loss": 2.1628, "step": 95000 }, { "epoch": 30.736910148674855, "grad_norm": 1.0411964654922485, "learning_rate": 0.001, "loss": 2.1862, "step": 95100 }, { "epoch": 30.76923076923077, "grad_norm": 0.9589716792106628, "learning_rate": 0.001, "loss": 2.1623, "step": 95200 }, { "epoch": 30.801551389786685, "grad_norm": 1.8971158266067505, "learning_rate": 0.001, "loss": 2.1751, "step": 95300 }, { "epoch": 30.8338720103426, "grad_norm": 1.151806354522705, "learning_rate": 0.001, "loss": 2.1919, "step": 95400 }, { "epoch": 30.866192630898514, "grad_norm": 1.1728664636611938, "learning_rate": 0.001, "loss": 2.1815, "step": 95500 }, { "epoch": 30.89851325145443, "grad_norm": 4.444606781005859, "learning_rate": 0.001, "loss": 2.1715, "step": 95600 }, { "epoch": 30.930833872010343, "grad_norm": 1.0547741651535034, "learning_rate": 0.001, "loss": 2.2124, "step": 95700 }, { "epoch": 30.963154492566257, "grad_norm": 0.8716176152229309, "learning_rate": 0.001, "loss": 2.1917, "step": 95800 }, { "epoch": 30.995475113122172, "grad_norm": 2.0710461139678955, "learning_rate": 0.001, "loss": 2.1686, "step": 95900 }, { "epoch": 31.027795733678087, "grad_norm": 1.399344801902771, "learning_rate": 0.001, "loss": 2.0675, "step": 96000 }, { "epoch": 31.060116354234, "grad_norm": 1.5554308891296387, "learning_rate": 0.001, "loss": 2.0681, "step": 96100 }, { "epoch": 31.092436974789916, "grad_norm": 1.0496906042099, "learning_rate": 0.001, "loss": 2.0923, "step": 96200 }, { "epoch": 31.12475759534583, "grad_norm": 1.3592922687530518, "learning_rate": 0.001, "loss": 2.0989, "step": 96300 }, { "epoch": 31.157078215901745, "grad_norm": 1.0739270448684692, "learning_rate": 0.001, "loss": 2.0944, "step": 96400 }, { "epoch": 31.18939883645766, "grad_norm": 1.4673694372177124, "learning_rate": 0.001, "loss": 2.0788, "step": 96500 }, { "epoch": 31.221719457013574, "grad_norm": 2.271284818649292, "learning_rate": 0.001, "loss": 2.1038, "step": 96600 }, { "epoch": 31.25404007756949, "grad_norm": 0.9998298287391663, "learning_rate": 0.001, "loss": 2.0957, "step": 96700 }, { "epoch": 31.286360698125403, "grad_norm": 1.350243330001831, "learning_rate": 0.001, "loss": 2.1187, "step": 96800 }, { "epoch": 31.318681318681318, "grad_norm": 1.8980063199996948, "learning_rate": 0.001, "loss": 2.1194, "step": 96900 }, { "epoch": 31.351001939237232, "grad_norm": 1.521952509880066, "learning_rate": 0.001, "loss": 2.1174, "step": 97000 }, { "epoch": 31.383322559793147, "grad_norm": 1.5607898235321045, "learning_rate": 0.001, "loss": 2.1144, "step": 97100 }, { "epoch": 31.41564318034906, "grad_norm": 1.6727725267410278, "learning_rate": 0.001, "loss": 2.1329, "step": 97200 }, { "epoch": 31.447963800904976, "grad_norm": 1.125823736190796, "learning_rate": 0.001, "loss": 2.1309, "step": 97300 }, { "epoch": 31.48028442146089, "grad_norm": 1.1037801504135132, "learning_rate": 0.001, "loss": 2.1348, "step": 97400 }, { "epoch": 31.51260504201681, "grad_norm": 1.4201444387435913, "learning_rate": 0.001, "loss": 2.1425, "step": 97500 }, { "epoch": 31.544925662572723, "grad_norm": 1.1230400800704956, "learning_rate": 0.001, "loss": 2.1404, "step": 97600 }, { "epoch": 31.577246283128638, "grad_norm": 1.4163681268692017, "learning_rate": 0.001, "loss": 2.1546, "step": 97700 }, { "epoch": 31.609566903684552, "grad_norm": 1.3696402311325073, "learning_rate": 0.001, "loss": 2.1404, "step": 97800 }, { "epoch": 31.641887524240467, "grad_norm": 1.1569052934646606, "learning_rate": 0.001, "loss": 2.1491, "step": 97900 }, { "epoch": 31.67420814479638, "grad_norm": 5.932373523712158, "learning_rate": 0.001, "loss": 2.1549, "step": 98000 }, { "epoch": 31.706528765352296, "grad_norm": 1.450197458267212, "learning_rate": 0.001, "loss": 2.1615, "step": 98100 }, { "epoch": 31.73884938590821, "grad_norm": 1.257980465888977, "learning_rate": 0.001, "loss": 2.146, "step": 98200 }, { "epoch": 31.771170006464125, "grad_norm": 1.2402591705322266, "learning_rate": 0.001, "loss": 2.1579, "step": 98300 }, { "epoch": 31.80349062702004, "grad_norm": 1.1138743162155151, "learning_rate": 0.001, "loss": 2.1591, "step": 98400 }, { "epoch": 31.835811247575954, "grad_norm": 1.0513564348220825, "learning_rate": 0.001, "loss": 2.1625, "step": 98500 }, { "epoch": 31.86813186813187, "grad_norm": 1.1577000617980957, "learning_rate": 0.001, "loss": 2.1631, "step": 98600 }, { "epoch": 31.900452488687783, "grad_norm": 1.0872490406036377, "learning_rate": 0.001, "loss": 2.1774, "step": 98700 }, { "epoch": 31.932773109243698, "grad_norm": 1.5173872709274292, "learning_rate": 0.001, "loss": 2.182, "step": 98800 }, { "epoch": 31.965093729799612, "grad_norm": 1.5394326448440552, "learning_rate": 0.001, "loss": 2.1675, "step": 98900 }, { "epoch": 31.997414350355527, "grad_norm": 376.0273132324219, "learning_rate": 0.001, "loss": 2.1669, "step": 99000 }, { "epoch": 32.02973497091144, "grad_norm": 3.688533067703247, "learning_rate": 0.001, "loss": 2.0842, "step": 99100 }, { "epoch": 32.062055591467356, "grad_norm": 1.4524699449539185, "learning_rate": 0.001, "loss": 2.0509, "step": 99200 }, { "epoch": 32.09437621202327, "grad_norm": 1.5523630380630493, "learning_rate": 0.001, "loss": 2.0463, "step": 99300 }, { "epoch": 32.126696832579185, "grad_norm": 1.9534733295440674, "learning_rate": 0.001, "loss": 2.0631, "step": 99400 }, { "epoch": 32.1590174531351, "grad_norm": 1.1293518543243408, "learning_rate": 0.001, "loss": 2.0782, "step": 99500 }, { "epoch": 32.191338073691014, "grad_norm": 0.9940181374549866, "learning_rate": 0.001, "loss": 2.0838, "step": 99600 }, { "epoch": 32.22365869424693, "grad_norm": 1.2948293685913086, "learning_rate": 0.001, "loss": 2.0815, "step": 99700 }, { "epoch": 32.25597931480284, "grad_norm": 1.3236920833587646, "learning_rate": 0.001, "loss": 2.0868, "step": 99800 }, { "epoch": 32.28829993535876, "grad_norm": 1.6906945705413818, "learning_rate": 0.001, "loss": 2.093, "step": 99900 }, { "epoch": 32.32062055591467, "grad_norm": 1.3024359941482544, "learning_rate": 0.001, "loss": 2.1018, "step": 100000 }, { "epoch": 32.35294117647059, "grad_norm": 1.8449146747589111, "learning_rate": 0.001, "loss": 2.106, "step": 100100 }, { "epoch": 32.3852617970265, "grad_norm": 1.5294415950775146, "learning_rate": 0.001, "loss": 2.102, "step": 100200 }, { "epoch": 32.417582417582416, "grad_norm": 1.2855937480926514, "learning_rate": 0.001, "loss": 2.0999, "step": 100300 }, { "epoch": 32.44990303813833, "grad_norm": 1.5564039945602417, "learning_rate": 0.001, "loss": 2.1067, "step": 100400 }, { "epoch": 32.482223658694245, "grad_norm": 1.1746330261230469, "learning_rate": 0.001, "loss": 2.1012, "step": 100500 }, { "epoch": 32.51454427925016, "grad_norm": 1.775856852531433, "learning_rate": 0.001, "loss": 2.1081, "step": 100600 }, { "epoch": 32.546864899806074, "grad_norm": 1.446081280708313, "learning_rate": 0.001, "loss": 2.1211, "step": 100700 }, { "epoch": 32.57918552036199, "grad_norm": 1.2077734470367432, "learning_rate": 0.001, "loss": 2.1123, "step": 100800 }, { "epoch": 32.6115061409179, "grad_norm": 1.1097933053970337, "learning_rate": 0.001, "loss": 2.1344, "step": 100900 }, { "epoch": 32.64382676147382, "grad_norm": 1.043678879737854, "learning_rate": 0.001, "loss": 2.128, "step": 101000 }, { "epoch": 32.67614738202973, "grad_norm": 1.042712688446045, "learning_rate": 0.001, "loss": 2.1244, "step": 101100 }, { "epoch": 32.70846800258565, "grad_norm": 1.5376156568527222, "learning_rate": 0.001, "loss": 2.1394, "step": 101200 }, { "epoch": 32.74078862314156, "grad_norm": 1.4139293432235718, "learning_rate": 0.001, "loss": 2.1362, "step": 101300 }, { "epoch": 32.773109243697476, "grad_norm": 1.2184932231903076, "learning_rate": 0.001, "loss": 2.1421, "step": 101400 }, { "epoch": 32.80542986425339, "grad_norm": 2.9759700298309326, "learning_rate": 0.001, "loss": 2.1499, "step": 101500 }, { "epoch": 32.837750484809305, "grad_norm": 1.2288818359375, "learning_rate": 0.001, "loss": 2.1463, "step": 101600 }, { "epoch": 32.87007110536522, "grad_norm": 1.4117445945739746, "learning_rate": 0.001, "loss": 2.1385, "step": 101700 }, { "epoch": 32.902391725921134, "grad_norm": 1.0710313320159912, "learning_rate": 0.001, "loss": 2.1485, "step": 101800 }, { "epoch": 32.93471234647705, "grad_norm": 1.8286625146865845, "learning_rate": 0.001, "loss": 2.1558, "step": 101900 }, { "epoch": 32.967032967032964, "grad_norm": 1.0844237804412842, "learning_rate": 0.001, "loss": 2.1518, "step": 102000 }, { "epoch": 32.999353587588885, "grad_norm": 2.408316135406494, "learning_rate": 0.001, "loss": 2.1479, "step": 102100 }, { "epoch": 33.0316742081448, "grad_norm": 1.376294493675232, "learning_rate": 0.001, "loss": 2.0317, "step": 102200 }, { "epoch": 33.063994828700714, "grad_norm": 1.6813457012176514, "learning_rate": 0.001, "loss": 2.0276, "step": 102300 }, { "epoch": 33.09631544925663, "grad_norm": 3.063542366027832, "learning_rate": 0.001, "loss": 2.0335, "step": 102400 }, { "epoch": 33.12863606981254, "grad_norm": 1.5739392042160034, "learning_rate": 0.001, "loss": 2.0367, "step": 102500 }, { "epoch": 33.16095669036846, "grad_norm": 1.4421710968017578, "learning_rate": 0.001, "loss": 2.0579, "step": 102600 }, { "epoch": 33.19327731092437, "grad_norm": 1.6407829523086548, "learning_rate": 0.001, "loss": 2.0476, "step": 102700 }, { "epoch": 33.22559793148029, "grad_norm": 2.7323858737945557, "learning_rate": 0.001, "loss": 2.0638, "step": 102800 }, { "epoch": 33.2579185520362, "grad_norm": 1.314718246459961, "learning_rate": 0.001, "loss": 2.0645, "step": 102900 }, { "epoch": 33.290239172592116, "grad_norm": 1.7618801593780518, "learning_rate": 0.001, "loss": 2.08, "step": 103000 }, { "epoch": 33.32255979314803, "grad_norm": 1.3760364055633545, "learning_rate": 0.001, "loss": 2.073, "step": 103100 }, { "epoch": 33.354880413703945, "grad_norm": 1.6700538396835327, "learning_rate": 0.001, "loss": 2.0877, "step": 103200 }, { "epoch": 33.38720103425986, "grad_norm": 2.31313419342041, "learning_rate": 0.001, "loss": 2.0883, "step": 103300 }, { "epoch": 33.419521654815775, "grad_norm": 1.8538918495178223, "learning_rate": 0.001, "loss": 2.1058, "step": 103400 }, { "epoch": 33.45184227537169, "grad_norm": 1.3206936120986938, "learning_rate": 0.001, "loss": 2.0977, "step": 103500 }, { "epoch": 33.484162895927604, "grad_norm": 1.8867946863174438, "learning_rate": 0.001, "loss": 2.0852, "step": 103600 }, { "epoch": 33.51648351648352, "grad_norm": 1.2346138954162598, "learning_rate": 0.001, "loss": 2.0929, "step": 103700 }, { "epoch": 33.54880413703943, "grad_norm": 1.279885172843933, "learning_rate": 0.001, "loss": 2.1013, "step": 103800 }, { "epoch": 33.58112475759535, "grad_norm": 1.9628183841705322, "learning_rate": 0.001, "loss": 2.0935, "step": 103900 }, { "epoch": 33.61344537815126, "grad_norm": 1.3602826595306396, "learning_rate": 0.001, "loss": 2.1109, "step": 104000 }, { "epoch": 33.645765998707176, "grad_norm": 1.8199571371078491, "learning_rate": 0.001, "loss": 2.1016, "step": 104100 }, { "epoch": 33.67808661926309, "grad_norm": 1.701765775680542, "learning_rate": 0.001, "loss": 2.1102, "step": 104200 }, { "epoch": 33.710407239819006, "grad_norm": 9.048108100891113, "learning_rate": 0.001, "loss": 2.1064, "step": 104300 }, { "epoch": 33.74272786037492, "grad_norm": 1.7844713926315308, "learning_rate": 0.001, "loss": 2.1311, "step": 104400 }, { "epoch": 33.775048480930835, "grad_norm": 1.5251983404159546, "learning_rate": 0.001, "loss": 2.1274, "step": 104500 }, { "epoch": 33.80736910148675, "grad_norm": 1.3404167890548706, "learning_rate": 0.001, "loss": 2.1157, "step": 104600 }, { "epoch": 33.839689722042664, "grad_norm": 1.5998722314834595, "learning_rate": 0.001, "loss": 2.1337, "step": 104700 }, { "epoch": 33.87201034259858, "grad_norm": 1.3175721168518066, "learning_rate": 0.001, "loss": 2.1369, "step": 104800 }, { "epoch": 33.90433096315449, "grad_norm": 1.913131594657898, "learning_rate": 0.001, "loss": 2.1311, "step": 104900 }, { "epoch": 33.93665158371041, "grad_norm": 1.7143213748931885, "learning_rate": 0.001, "loss": 2.1423, "step": 105000 }, { "epoch": 33.96897220426632, "grad_norm": 1.378402590751648, "learning_rate": 0.001, "loss": 2.1696, "step": 105100 }, { "epoch": 34.00129282482224, "grad_norm": 1.370124101638794, "learning_rate": 0.001, "loss": 2.1355, "step": 105200 }, { "epoch": 34.03361344537815, "grad_norm": 2.696305513381958, "learning_rate": 0.001, "loss": 2.0251, "step": 105300 }, { "epoch": 34.065934065934066, "grad_norm": 1.7715506553649902, "learning_rate": 0.001, "loss": 2.0175, "step": 105400 }, { "epoch": 34.09825468648998, "grad_norm": 3.330325126647949, "learning_rate": 0.001, "loss": 2.0179, "step": 105500 }, { "epoch": 34.130575307045895, "grad_norm": 1.5982743501663208, "learning_rate": 0.001, "loss": 2.0444, "step": 105600 }, { "epoch": 34.16289592760181, "grad_norm": 2.021436929702759, "learning_rate": 0.001, "loss": 2.0454, "step": 105700 }, { "epoch": 34.195216548157724, "grad_norm": 1.4831916093826294, "learning_rate": 0.001, "loss": 2.0458, "step": 105800 }, { "epoch": 34.22753716871364, "grad_norm": 1.5908377170562744, "learning_rate": 0.001, "loss": 2.0392, "step": 105900 }, { "epoch": 34.25985778926955, "grad_norm": 1.23472261428833, "learning_rate": 0.001, "loss": 2.046, "step": 106000 }, { "epoch": 34.29217840982547, "grad_norm": 1.3308864831924438, "learning_rate": 0.001, "loss": 2.0494, "step": 106100 }, { "epoch": 34.32449903038138, "grad_norm": 1.4027752876281738, "learning_rate": 0.001, "loss": 2.0661, "step": 106200 }, { "epoch": 34.3568196509373, "grad_norm": 1.4350675344467163, "learning_rate": 0.001, "loss": 2.0828, "step": 106300 }, { "epoch": 34.38914027149321, "grad_norm": 1.5049538612365723, "learning_rate": 0.001, "loss": 2.0607, "step": 106400 }, { "epoch": 34.421460892049126, "grad_norm": 1.0770230293273926, "learning_rate": 0.001, "loss": 2.0813, "step": 106500 }, { "epoch": 34.45378151260504, "grad_norm": 1.4571819305419922, "learning_rate": 0.001, "loss": 2.0671, "step": 106600 }, { "epoch": 34.486102133160955, "grad_norm": 1.3443101644515991, "learning_rate": 0.001, "loss": 2.0655, "step": 106700 }, { "epoch": 34.51842275371687, "grad_norm": 1.5039441585540771, "learning_rate": 0.001, "loss": 2.0902, "step": 106800 }, { "epoch": 34.550743374272784, "grad_norm": 6.2228617668151855, "learning_rate": 0.001, "loss": 2.0741, "step": 106900 }, { "epoch": 34.5830639948287, "grad_norm": 1.433332920074463, "learning_rate": 0.001, "loss": 2.0797, "step": 107000 }, { "epoch": 34.61538461538461, "grad_norm": 1.5355336666107178, "learning_rate": 0.001, "loss": 2.0983, "step": 107100 }, { "epoch": 34.64770523594053, "grad_norm": 1.7707475423812866, "learning_rate": 0.001, "loss": 2.1035, "step": 107200 }, { "epoch": 34.68002585649644, "grad_norm": 1.5812116861343384, "learning_rate": 0.001, "loss": 2.0976, "step": 107300 }, { "epoch": 34.71234647705236, "grad_norm": 1.267074465751648, "learning_rate": 0.001, "loss": 2.0932, "step": 107400 }, { "epoch": 34.74466709760827, "grad_norm": 1.3154644966125488, "learning_rate": 0.001, "loss": 2.0954, "step": 107500 }, { "epoch": 34.776987718164186, "grad_norm": 1.5400298833847046, "learning_rate": 0.001, "loss": 2.1065, "step": 107600 }, { "epoch": 34.8093083387201, "grad_norm": 1.1817779541015625, "learning_rate": 0.001, "loss": 2.1142, "step": 107700 }, { "epoch": 34.841628959276015, "grad_norm": 1.5125499963760376, "learning_rate": 0.001, "loss": 2.1112, "step": 107800 }, { "epoch": 34.87394957983193, "grad_norm": 2.23714280128479, "learning_rate": 0.001, "loss": 2.1132, "step": 107900 }, { "epoch": 34.906270200387844, "grad_norm": 1.5894731283187866, "learning_rate": 0.001, "loss": 2.1083, "step": 108000 }, { "epoch": 34.93859082094376, "grad_norm": 3.5683343410491943, "learning_rate": 0.001, "loss": 2.1306, "step": 108100 }, { "epoch": 34.97091144149967, "grad_norm": 1.686200499534607, "learning_rate": 0.001, "loss": 2.1196, "step": 108200 }, { "epoch": 35.003232062055595, "grad_norm": 1.8118089437484741, "learning_rate": 0.001, "loss": 2.1167, "step": 108300 }, { "epoch": 35.03555268261151, "grad_norm": 2.675708055496216, "learning_rate": 0.001, "loss": 1.9932, "step": 108400 }, { "epoch": 35.067873303167424, "grad_norm": 1.690611481666565, "learning_rate": 0.001, "loss": 1.9931, "step": 108500 }, { "epoch": 35.10019392372334, "grad_norm": 1.1233583688735962, "learning_rate": 0.001, "loss": 1.9895, "step": 108600 }, { "epoch": 35.13251454427925, "grad_norm": 2.5434956550598145, "learning_rate": 0.001, "loss": 2.03, "step": 108700 }, { "epoch": 35.16483516483517, "grad_norm": 1.265218734741211, "learning_rate": 0.001, "loss": 2.0224, "step": 108800 }, { "epoch": 35.19715578539108, "grad_norm": 0.8994515538215637, "learning_rate": 0.001, "loss": 2.0408, "step": 108900 }, { "epoch": 35.229476405947, "grad_norm": 1.5718497037887573, "learning_rate": 0.001, "loss": 2.0281, "step": 109000 }, { "epoch": 35.26179702650291, "grad_norm": 1.3742915391921997, "learning_rate": 0.001, "loss": 2.0468, "step": 109100 }, { "epoch": 35.294117647058826, "grad_norm": 1.6106927394866943, "learning_rate": 0.001, "loss": 2.0344, "step": 109200 }, { "epoch": 35.32643826761474, "grad_norm": 1.7273683547973633, "learning_rate": 0.001, "loss": 2.0378, "step": 109300 }, { "epoch": 35.358758888170655, "grad_norm": 1.373431921005249, "learning_rate": 0.001, "loss": 2.0436, "step": 109400 }, { "epoch": 35.39107950872657, "grad_norm": 1.3295968770980835, "learning_rate": 0.001, "loss": 2.0387, "step": 109500 }, { "epoch": 35.423400129282484, "grad_norm": 1.3996498584747314, "learning_rate": 0.001, "loss": 2.036, "step": 109600 }, { "epoch": 35.4557207498384, "grad_norm": 1.0477360486984253, "learning_rate": 0.001, "loss": 2.062, "step": 109700 }, { "epoch": 35.48804137039431, "grad_norm": 1.1163381338119507, "learning_rate": 0.001, "loss": 2.0591, "step": 109800 }, { "epoch": 35.52036199095023, "grad_norm": 1.2472792863845825, "learning_rate": 0.001, "loss": 2.0779, "step": 109900 }, { "epoch": 35.55268261150614, "grad_norm": 1.0851945877075195, "learning_rate": 0.001, "loss": 2.0811, "step": 110000 }, { "epoch": 35.58500323206206, "grad_norm": 1.661994457244873, "learning_rate": 0.001, "loss": 2.056, "step": 110100 }, { "epoch": 35.61732385261797, "grad_norm": 1.7032674551010132, "learning_rate": 0.001, "loss": 2.0644, "step": 110200 }, { "epoch": 35.649644473173886, "grad_norm": 1.634842038154602, "learning_rate": 0.001, "loss": 2.1003, "step": 110300 }, { "epoch": 35.6819650937298, "grad_norm": 1.619712471961975, "learning_rate": 0.001, "loss": 2.081, "step": 110400 }, { "epoch": 35.714285714285715, "grad_norm": 1.1081892251968384, "learning_rate": 0.001, "loss": 2.0662, "step": 110500 }, { "epoch": 35.74660633484163, "grad_norm": 1.5331933498382568, "learning_rate": 0.001, "loss": 2.0772, "step": 110600 }, { "epoch": 35.778926955397544, "grad_norm": 1.161687970161438, "learning_rate": 0.001, "loss": 2.0848, "step": 110700 }, { "epoch": 35.81124757595346, "grad_norm": 1.4617877006530762, "learning_rate": 0.001, "loss": 2.0675, "step": 110800 }, { "epoch": 35.84356819650937, "grad_norm": 1.370492696762085, "learning_rate": 0.001, "loss": 2.1177, "step": 110900 }, { "epoch": 35.87588881706529, "grad_norm": 1.2550355195999146, "learning_rate": 0.001, "loss": 2.0967, "step": 111000 }, { "epoch": 35.9082094376212, "grad_norm": 1.174294114112854, "learning_rate": 0.001, "loss": 2.1084, "step": 111100 }, { "epoch": 35.94053005817712, "grad_norm": 0.9929604530334473, "learning_rate": 0.001, "loss": 2.1032, "step": 111200 }, { "epoch": 35.97285067873303, "grad_norm": 1.8174422979354858, "learning_rate": 0.001, "loss": 2.1086, "step": 111300 }, { "epoch": 36.005171299288946, "grad_norm": 1.2662022113800049, "learning_rate": 0.001, "loss": 2.112, "step": 111400 }, { "epoch": 36.03749191984486, "grad_norm": 1.661314845085144, "learning_rate": 0.001, "loss": 1.9841, "step": 111500 }, { "epoch": 36.069812540400775, "grad_norm": 1.2541825771331787, "learning_rate": 0.001, "loss": 1.9851, "step": 111600 }, { "epoch": 36.10213316095669, "grad_norm": 1.1146622896194458, "learning_rate": 0.001, "loss": 2.0023, "step": 111700 }, { "epoch": 36.134453781512605, "grad_norm": 1.280148983001709, "learning_rate": 0.001, "loss": 1.9862, "step": 111800 }, { "epoch": 36.16677440206852, "grad_norm": 1.3915441036224365, "learning_rate": 0.001, "loss": 2.014, "step": 111900 }, { "epoch": 36.199095022624434, "grad_norm": 1.139222264289856, "learning_rate": 0.001, "loss": 2.011, "step": 112000 }, { "epoch": 36.23141564318035, "grad_norm": 1.46260404586792, "learning_rate": 0.001, "loss": 2.0271, "step": 112100 }, { "epoch": 36.26373626373626, "grad_norm": 0.9220998287200928, "learning_rate": 0.001, "loss": 2.0109, "step": 112200 }, { "epoch": 36.29605688429218, "grad_norm": 1.228346586227417, "learning_rate": 0.001, "loss": 2.0278, "step": 112300 }, { "epoch": 36.32837750484809, "grad_norm": 1.8117899894714355, "learning_rate": 0.001, "loss": 2.0367, "step": 112400 }, { "epoch": 36.36069812540401, "grad_norm": 1.4313738346099854, "learning_rate": 0.001, "loss": 2.0301, "step": 112500 }, { "epoch": 36.39301874595992, "grad_norm": 1.5974383354187012, "learning_rate": 0.001, "loss": 2.0242, "step": 112600 }, { "epoch": 36.425339366515836, "grad_norm": 1.6225992441177368, "learning_rate": 0.001, "loss": 2.0203, "step": 112700 }, { "epoch": 36.45765998707175, "grad_norm": 1.4460171461105347, "learning_rate": 0.001, "loss": 2.0537, "step": 112800 }, { "epoch": 36.489980607627665, "grad_norm": 3.29064679145813, "learning_rate": 0.001, "loss": 2.0542, "step": 112900 }, { "epoch": 36.52230122818358, "grad_norm": 6.470909595489502, "learning_rate": 0.001, "loss": 2.0264, "step": 113000 }, { "epoch": 36.554621848739494, "grad_norm": 0.9478192329406738, "learning_rate": 0.001, "loss": 2.0404, "step": 113100 }, { "epoch": 36.58694246929541, "grad_norm": 1.653351902961731, "learning_rate": 0.001, "loss": 2.0478, "step": 113200 }, { "epoch": 36.61926308985132, "grad_norm": 1.1961288452148438, "learning_rate": 0.001, "loss": 2.0699, "step": 113300 }, { "epoch": 36.65158371040724, "grad_norm": 1.6648602485656738, "learning_rate": 0.001, "loss": 2.0526, "step": 113400 }, { "epoch": 36.68390433096315, "grad_norm": 1.280319333076477, "learning_rate": 0.001, "loss": 2.0517, "step": 113500 }, { "epoch": 36.71622495151907, "grad_norm": 1.351252794265747, "learning_rate": 0.001, "loss": 2.0579, "step": 113600 }, { "epoch": 36.74854557207498, "grad_norm": 1.2157869338989258, "learning_rate": 0.001, "loss": 2.0688, "step": 113700 }, { "epoch": 36.780866192630896, "grad_norm": 1.405073881149292, "learning_rate": 0.001, "loss": 2.0627, "step": 113800 }, { "epoch": 36.81318681318681, "grad_norm": 1.49695885181427, "learning_rate": 0.001, "loss": 2.0752, "step": 113900 }, { "epoch": 36.845507433742725, "grad_norm": 1.8724697828292847, "learning_rate": 0.001, "loss": 2.069, "step": 114000 }, { "epoch": 36.87782805429864, "grad_norm": 1.7394821643829346, "learning_rate": 0.001, "loss": 2.0748, "step": 114100 }, { "epoch": 36.910148674854554, "grad_norm": 1.195928931236267, "learning_rate": 0.001, "loss": 2.0849, "step": 114200 }, { "epoch": 36.94246929541047, "grad_norm": 1.5557273626327515, "learning_rate": 0.001, "loss": 2.0798, "step": 114300 }, { "epoch": 36.97478991596638, "grad_norm": 2.6745998859405518, "learning_rate": 0.001, "loss": 2.0866, "step": 114400 }, { "epoch": 37.007110536522305, "grad_norm": 1.2646483182907104, "learning_rate": 0.001, "loss": 2.0566, "step": 114500 }, { "epoch": 37.03943115707822, "grad_norm": 1.8610411882400513, "learning_rate": 0.001, "loss": 1.963, "step": 114600 }, { "epoch": 37.071751777634134, "grad_norm": 1.2506316900253296, "learning_rate": 0.001, "loss": 1.973, "step": 114700 }, { "epoch": 37.10407239819005, "grad_norm": 1.2107717990875244, "learning_rate": 0.001, "loss": 1.9793, "step": 114800 }, { "epoch": 37.13639301874596, "grad_norm": 1.2314270734786987, "learning_rate": 0.001, "loss": 1.9767, "step": 114900 }, { "epoch": 37.16871363930188, "grad_norm": 1.4105151891708374, "learning_rate": 0.001, "loss": 1.9831, "step": 115000 }, { "epoch": 37.20103425985779, "grad_norm": 1.0658131837844849, "learning_rate": 0.001, "loss": 1.985, "step": 115100 }, { "epoch": 37.23335488041371, "grad_norm": 1.3296290636062622, "learning_rate": 0.001, "loss": 2.0204, "step": 115200 }, { "epoch": 37.26567550096962, "grad_norm": 1.6618130207061768, "learning_rate": 0.001, "loss": 2.0003, "step": 115300 }, { "epoch": 37.297996121525536, "grad_norm": 1.1363368034362793, "learning_rate": 0.001, "loss": 2.0099, "step": 115400 }, { "epoch": 37.33031674208145, "grad_norm": 6.337588310241699, "learning_rate": 0.001, "loss": 2.0145, "step": 115500 }, { "epoch": 37.362637362637365, "grad_norm": 1.8100254535675049, "learning_rate": 0.001, "loss": 2.0121, "step": 115600 }, { "epoch": 37.39495798319328, "grad_norm": 2.3716182708740234, "learning_rate": 0.001, "loss": 2.0346, "step": 115700 }, { "epoch": 37.427278603749194, "grad_norm": 0.9943607449531555, "learning_rate": 0.001, "loss": 2.0353, "step": 115800 }, { "epoch": 37.45959922430511, "grad_norm": 1.1550334692001343, "learning_rate": 0.001, "loss": 2.0233, "step": 115900 }, { "epoch": 37.49191984486102, "grad_norm": 1.8431400060653687, "learning_rate": 0.001, "loss": 2.0297, "step": 116000 }, { "epoch": 37.52424046541694, "grad_norm": 5.019477367401123, "learning_rate": 0.001, "loss": 2.027, "step": 116100 }, { "epoch": 37.55656108597285, "grad_norm": 2.139863967895508, "learning_rate": 0.001, "loss": 2.0235, "step": 116200 }, { "epoch": 37.58888170652877, "grad_norm": 2.1754961013793945, "learning_rate": 0.001, "loss": 2.0376, "step": 116300 }, { "epoch": 37.62120232708468, "grad_norm": 1.1024724245071411, "learning_rate": 0.001, "loss": 2.0434, "step": 116400 }, { "epoch": 37.653522947640596, "grad_norm": 1.0138988494873047, "learning_rate": 0.001, "loss": 2.0404, "step": 116500 }, { "epoch": 37.68584356819651, "grad_norm": 1.3273720741271973, "learning_rate": 0.001, "loss": 2.0443, "step": 116600 }, { "epoch": 37.718164188752425, "grad_norm": 0.9947942495346069, "learning_rate": 0.001, "loss": 2.0583, "step": 116700 }, { "epoch": 37.75048480930834, "grad_norm": 1.0169286727905273, "learning_rate": 0.001, "loss": 2.0388, "step": 116800 }, { "epoch": 37.782805429864254, "grad_norm": 0.9837519526481628, "learning_rate": 0.001, "loss": 2.0715, "step": 116900 }, { "epoch": 37.81512605042017, "grad_norm": 3.083686590194702, "learning_rate": 0.001, "loss": 2.0498, "step": 117000 }, { "epoch": 37.84744667097608, "grad_norm": 1.1390721797943115, "learning_rate": 0.001, "loss": 2.0712, "step": 117100 }, { "epoch": 37.879767291532, "grad_norm": 1.1165333986282349, "learning_rate": 0.001, "loss": 2.0428, "step": 117200 }, { "epoch": 37.91208791208791, "grad_norm": 1.401591181755066, "learning_rate": 0.001, "loss": 2.0848, "step": 117300 }, { "epoch": 37.94440853264383, "grad_norm": 1.6450353860855103, "learning_rate": 0.001, "loss": 2.0842, "step": 117400 }, { "epoch": 37.97672915319974, "grad_norm": 1.2841455936431885, "learning_rate": 0.001, "loss": 2.0809, "step": 117500 }, { "epoch": 38.009049773755656, "grad_norm": 0.9486263394355774, "learning_rate": 0.001, "loss": 2.0455, "step": 117600 }, { "epoch": 38.04137039431157, "grad_norm": 1.038133978843689, "learning_rate": 0.001, "loss": 1.9583, "step": 117700 }, { "epoch": 38.073691014867485, "grad_norm": 2.0118277072906494, "learning_rate": 0.001, "loss": 1.9497, "step": 117800 }, { "epoch": 38.1060116354234, "grad_norm": 1.326648473739624, "learning_rate": 0.001, "loss": 1.9715, "step": 117900 }, { "epoch": 38.138332255979314, "grad_norm": 1.1895781755447388, "learning_rate": 0.001, "loss": 1.9775, "step": 118000 }, { "epoch": 38.17065287653523, "grad_norm": 1.1015886068344116, "learning_rate": 0.001, "loss": 1.9723, "step": 118100 }, { "epoch": 38.20297349709114, "grad_norm": 1.0223196744918823, "learning_rate": 0.001, "loss": 1.9879, "step": 118200 }, { "epoch": 38.23529411764706, "grad_norm": 1.1206566095352173, "learning_rate": 0.001, "loss": 1.9644, "step": 118300 }, { "epoch": 38.26761473820297, "grad_norm": 1.3843104839324951, "learning_rate": 0.001, "loss": 1.9905, "step": 118400 }, { "epoch": 38.29993535875889, "grad_norm": 1.201468586921692, "learning_rate": 0.001, "loss": 1.9906, "step": 118500 }, { "epoch": 38.3322559793148, "grad_norm": 3.2559967041015625, "learning_rate": 0.001, "loss": 1.9867, "step": 118600 }, { "epoch": 38.364576599870716, "grad_norm": 1.235195279121399, "learning_rate": 0.001, "loss": 1.9993, "step": 118700 }, { "epoch": 38.39689722042663, "grad_norm": 1.267943263053894, "learning_rate": 0.001, "loss": 2.0121, "step": 118800 }, { "epoch": 38.429217840982545, "grad_norm": 1.3650599718093872, "learning_rate": 0.001, "loss": 1.9998, "step": 118900 }, { "epoch": 38.46153846153846, "grad_norm": 1.071438193321228, "learning_rate": 0.001, "loss": 1.9969, "step": 119000 }, { "epoch": 38.493859082094374, "grad_norm": 1.3332175016403198, "learning_rate": 0.001, "loss": 2.0023, "step": 119100 }, { "epoch": 38.52617970265029, "grad_norm": 2.631466865539551, "learning_rate": 0.001, "loss": 2.0308, "step": 119200 }, { "epoch": 38.558500323206204, "grad_norm": 1.419049620628357, "learning_rate": 0.001, "loss": 2.0323, "step": 119300 }, { "epoch": 38.59082094376212, "grad_norm": 1.2397457361221313, "learning_rate": 0.001, "loss": 2.0229, "step": 119400 }, { "epoch": 38.62314156431803, "grad_norm": 1.0661416053771973, "learning_rate": 0.001, "loss": 2.0316, "step": 119500 }, { "epoch": 38.65546218487395, "grad_norm": 2.6966307163238525, "learning_rate": 0.001, "loss": 2.0364, "step": 119600 }, { "epoch": 38.68778280542986, "grad_norm": 1.983799934387207, "learning_rate": 0.001, "loss": 2.0498, "step": 119700 }, { "epoch": 38.720103425985776, "grad_norm": 1.1368643045425415, "learning_rate": 0.001, "loss": 2.0538, "step": 119800 }, { "epoch": 38.75242404654169, "grad_norm": 2.3371005058288574, "learning_rate": 0.001, "loss": 2.0483, "step": 119900 }, { "epoch": 38.784744667097605, "grad_norm": 1.345335841178894, "learning_rate": 0.001, "loss": 2.0572, "step": 120000 }, { "epoch": 38.81706528765352, "grad_norm": 1.293865442276001, "learning_rate": 0.001, "loss": 2.043, "step": 120100 }, { "epoch": 38.849385908209435, "grad_norm": 1.6003596782684326, "learning_rate": 0.001, "loss": 2.0769, "step": 120200 }, { "epoch": 38.88170652876535, "grad_norm": 2.650212526321411, "learning_rate": 0.001, "loss": 2.0799, "step": 120300 }, { "epoch": 38.914027149321264, "grad_norm": 1.7294082641601562, "learning_rate": 0.001, "loss": 2.0692, "step": 120400 }, { "epoch": 38.94634776987718, "grad_norm": 1.6566860675811768, "learning_rate": 0.001, "loss": 2.0579, "step": 120500 }, { "epoch": 38.97866839043309, "grad_norm": 1.0070867538452148, "learning_rate": 0.001, "loss": 2.0783, "step": 120600 }, { "epoch": 39.010989010989015, "grad_norm": 1.140474557876587, "learning_rate": 0.001, "loss": 2.0226, "step": 120700 }, { "epoch": 39.04330963154493, "grad_norm": 1.1137397289276123, "learning_rate": 0.001, "loss": 1.9503, "step": 120800 }, { "epoch": 39.075630252100844, "grad_norm": 1.451230764389038, "learning_rate": 0.001, "loss": 1.9558, "step": 120900 }, { "epoch": 39.10795087265676, "grad_norm": 1.4322075843811035, "learning_rate": 0.001, "loss": 1.9762, "step": 121000 }, { "epoch": 39.14027149321267, "grad_norm": 1.1680893898010254, "learning_rate": 0.001, "loss": 1.9571, "step": 121100 }, { "epoch": 39.17259211376859, "grad_norm": 1.4579657316207886, "learning_rate": 0.001, "loss": 1.9533, "step": 121200 }, { "epoch": 39.2049127343245, "grad_norm": 1.1083571910858154, "learning_rate": 0.001, "loss": 1.9532, "step": 121300 }, { "epoch": 39.237233354880416, "grad_norm": 1.5122196674346924, "learning_rate": 0.001, "loss": 1.9917, "step": 121400 }, { "epoch": 39.26955397543633, "grad_norm": 1.2212244272232056, "learning_rate": 0.001, "loss": 1.9888, "step": 121500 }, { "epoch": 39.301874595992246, "grad_norm": 1.1726443767547607, "learning_rate": 0.001, "loss": 1.979, "step": 121600 }, { "epoch": 39.33419521654816, "grad_norm": 1.315265417098999, "learning_rate": 0.001, "loss": 1.9943, "step": 121700 }, { "epoch": 39.366515837104075, "grad_norm": 1.7507109642028809, "learning_rate": 0.001, "loss": 1.9878, "step": 121800 }, { "epoch": 39.39883645765999, "grad_norm": 2.6302542686462402, "learning_rate": 0.001, "loss": 1.9813, "step": 121900 }, { "epoch": 39.431157078215904, "grad_norm": 1.1360636949539185, "learning_rate": 0.001, "loss": 1.9971, "step": 122000 }, { "epoch": 39.46347769877182, "grad_norm": 1.164288878440857, "learning_rate": 0.001, "loss": 1.9995, "step": 122100 }, { "epoch": 39.49579831932773, "grad_norm": 1.3028528690338135, "learning_rate": 0.001, "loss": 2.0007, "step": 122200 }, { "epoch": 39.52811893988365, "grad_norm": 1.3410977125167847, "learning_rate": 0.001, "loss": 2.0127, "step": 122300 }, { "epoch": 39.56043956043956, "grad_norm": 1.1426804065704346, "learning_rate": 0.001, "loss": 2.0022, "step": 122400 }, { "epoch": 39.59276018099548, "grad_norm": 1.248864769935608, "learning_rate": 0.001, "loss": 2.0111, "step": 122500 }, { "epoch": 39.62508080155139, "grad_norm": 2.0510551929473877, "learning_rate": 0.001, "loss": 2.0105, "step": 122600 }, { "epoch": 39.657401422107306, "grad_norm": 1.4321231842041016, "learning_rate": 0.001, "loss": 2.0259, "step": 122700 }, { "epoch": 39.68972204266322, "grad_norm": 1.6090155839920044, "learning_rate": 0.001, "loss": 2.0272, "step": 122800 }, { "epoch": 39.722042663219135, "grad_norm": 1.514089584350586, "learning_rate": 0.001, "loss": 2.0201, "step": 122900 }, { "epoch": 39.75436328377505, "grad_norm": 1.3112493753433228, "learning_rate": 0.001, "loss": 2.028, "step": 123000 }, { "epoch": 39.786683904330964, "grad_norm": 1.0231093168258667, "learning_rate": 0.001, "loss": 2.0445, "step": 123100 }, { "epoch": 39.81900452488688, "grad_norm": 1.2192585468292236, "learning_rate": 0.001, "loss": 2.0459, "step": 123200 }, { "epoch": 39.85132514544279, "grad_norm": 1.383726716041565, "learning_rate": 0.001, "loss": 2.023, "step": 123300 }, { "epoch": 39.88364576599871, "grad_norm": 3.7562310695648193, "learning_rate": 0.001, "loss": 2.0495, "step": 123400 }, { "epoch": 39.91596638655462, "grad_norm": 1.3290687799453735, "learning_rate": 0.001, "loss": 2.045, "step": 123500 }, { "epoch": 39.94828700711054, "grad_norm": 1.3152869939804077, "learning_rate": 0.001, "loss": 2.0333, "step": 123600 }, { "epoch": 39.98060762766645, "grad_norm": 1.1923072338104248, "learning_rate": 0.001, "loss": 2.0413, "step": 123700 }, { "epoch": 40.012928248222366, "grad_norm": 3.9820635318756104, "learning_rate": 0.001, "loss": 2.0107, "step": 123800 }, { "epoch": 40.04524886877828, "grad_norm": 1.163718819618225, "learning_rate": 0.001, "loss": 1.9479, "step": 123900 }, { "epoch": 40.077569489334195, "grad_norm": 1.3528404235839844, "learning_rate": 0.001, "loss": 1.9253, "step": 124000 }, { "epoch": 40.10989010989011, "grad_norm": 3.396636962890625, "learning_rate": 0.001, "loss": 1.9296, "step": 124100 }, { "epoch": 40.142210730446024, "grad_norm": 1.5089370012283325, "learning_rate": 0.001, "loss": 1.9258, "step": 124200 }, { "epoch": 40.17453135100194, "grad_norm": 1.4317644834518433, "learning_rate": 0.001, "loss": 1.9403, "step": 124300 }, { "epoch": 40.20685197155785, "grad_norm": 1.1057049036026, "learning_rate": 0.001, "loss": 1.967, "step": 124400 }, { "epoch": 40.23917259211377, "grad_norm": 1.3713023662567139, "learning_rate": 0.001, "loss": 1.9472, "step": 124500 }, { "epoch": 40.27149321266968, "grad_norm": 1.4861721992492676, "learning_rate": 0.001, "loss": 1.9586, "step": 124600 }, { "epoch": 40.3038138332256, "grad_norm": 1.3846735954284668, "learning_rate": 0.001, "loss": 1.9671, "step": 124700 }, { "epoch": 40.33613445378151, "grad_norm": 1.277343988418579, "learning_rate": 0.001, "loss": 1.9666, "step": 124800 }, { "epoch": 40.368455074337426, "grad_norm": 1.4501807689666748, "learning_rate": 0.001, "loss": 1.9903, "step": 124900 }, { "epoch": 40.40077569489334, "grad_norm": 1.301296591758728, "learning_rate": 0.001, "loss": 1.9862, "step": 125000 }, { "epoch": 40.433096315449255, "grad_norm": 2.770455837249756, "learning_rate": 0.001, "loss": 1.9676, "step": 125100 }, { "epoch": 40.46541693600517, "grad_norm": 4.428115367889404, "learning_rate": 0.001, "loss": 1.992, "step": 125200 }, { "epoch": 40.497737556561084, "grad_norm": 1.4950238466262817, "learning_rate": 0.001, "loss": 1.9972, "step": 125300 }, { "epoch": 40.530058177117, "grad_norm": 1.3742254972457886, "learning_rate": 0.001, "loss": 1.9926, "step": 125400 }, { "epoch": 40.56237879767291, "grad_norm": 1.7078733444213867, "learning_rate": 0.001, "loss": 1.9968, "step": 125500 }, { "epoch": 40.59469941822883, "grad_norm": 1.5410032272338867, "learning_rate": 0.001, "loss": 2.0064, "step": 125600 }, { "epoch": 40.62702003878474, "grad_norm": 1.1786679029464722, "learning_rate": 0.001, "loss": 1.994, "step": 125700 }, { "epoch": 40.65934065934066, "grad_norm": 1.4871423244476318, "learning_rate": 0.001, "loss": 1.989, "step": 125800 }, { "epoch": 40.69166127989657, "grad_norm": 3.2317421436309814, "learning_rate": 0.001, "loss": 1.9985, "step": 125900 }, { "epoch": 40.723981900452486, "grad_norm": 1.53239905834198, "learning_rate": 0.001, "loss": 2.0185, "step": 126000 }, { "epoch": 40.7563025210084, "grad_norm": 1.7642790079116821, "learning_rate": 0.001, "loss": 2.0075, "step": 126100 }, { "epoch": 40.788623141564315, "grad_norm": 1.4949848651885986, "learning_rate": 0.001, "loss": 2.0209, "step": 126200 }, { "epoch": 40.82094376212023, "grad_norm": 1.3884633779525757, "learning_rate": 0.001, "loss": 2.0266, "step": 126300 }, { "epoch": 40.853264382676144, "grad_norm": 1.3454203605651855, "learning_rate": 0.001, "loss": 2.0246, "step": 126400 }, { "epoch": 40.88558500323206, "grad_norm": 1.463576078414917, "learning_rate": 0.001, "loss": 2.0122, "step": 126500 }, { "epoch": 40.91790562378797, "grad_norm": 1.5990676879882812, "learning_rate": 0.001, "loss": 2.02, "step": 126600 }, { "epoch": 40.95022624434389, "grad_norm": 6.330573081970215, "learning_rate": 0.001, "loss": 2.0382, "step": 126700 }, { "epoch": 40.9825468648998, "grad_norm": 2.934973955154419, "learning_rate": 0.001, "loss": 2.0448, "step": 126800 }, { "epoch": 41.014867485455724, "grad_norm": 1.8106731176376343, "learning_rate": 0.001, "loss": 1.9591, "step": 126900 }, { "epoch": 41.04718810601164, "grad_norm": 5.568515777587891, "learning_rate": 0.001, "loss": 1.9224, "step": 127000 }, { "epoch": 41.07950872656755, "grad_norm": 1.341778039932251, "learning_rate": 0.001, "loss": 1.9368, "step": 127100 }, { "epoch": 41.11182934712347, "grad_norm": 2.048631191253662, "learning_rate": 0.001, "loss": 1.924, "step": 127200 }, { "epoch": 41.14414996767938, "grad_norm": 2.069324493408203, "learning_rate": 0.001, "loss": 1.9359, "step": 127300 }, { "epoch": 41.1764705882353, "grad_norm": 2.6134934425354004, "learning_rate": 0.001, "loss": 1.919, "step": 127400 }, { "epoch": 41.20879120879121, "grad_norm": 4.533571243286133, "learning_rate": 0.001, "loss": 1.9483, "step": 127500 }, { "epoch": 41.241111829347126, "grad_norm": 1.5425249338150024, "learning_rate": 0.001, "loss": 1.9643, "step": 127600 }, { "epoch": 41.27343244990304, "grad_norm": 1.9601558446884155, "learning_rate": 0.001, "loss": 1.9699, "step": 127700 }, { "epoch": 41.305753070458955, "grad_norm": 1.598547339439392, "learning_rate": 0.001, "loss": 1.9565, "step": 127800 }, { "epoch": 41.33807369101487, "grad_norm": 1.304530143737793, "learning_rate": 0.001, "loss": 1.9464, "step": 127900 }, { "epoch": 41.370394311570784, "grad_norm": 1.7590181827545166, "learning_rate": 0.001, "loss": 1.9936, "step": 128000 }, { "epoch": 41.4027149321267, "grad_norm": 1.6568078994750977, "learning_rate": 0.001, "loss": 1.9689, "step": 128100 }, { "epoch": 41.43503555268261, "grad_norm": 1.5514376163482666, "learning_rate": 0.001, "loss": 1.9553, "step": 128200 }, { "epoch": 41.46735617323853, "grad_norm": 1.4648118019104004, "learning_rate": 0.001, "loss": 1.9739, "step": 128300 }, { "epoch": 41.49967679379444, "grad_norm": 1.6158056259155273, "learning_rate": 0.001, "loss": 1.9757, "step": 128400 }, { "epoch": 41.53199741435036, "grad_norm": 3.42694354057312, "learning_rate": 0.001, "loss": 1.9828, "step": 128500 }, { "epoch": 41.56431803490627, "grad_norm": 1.2903165817260742, "learning_rate": 0.001, "loss": 1.9767, "step": 128600 }, { "epoch": 41.596638655462186, "grad_norm": 1.472036361694336, "learning_rate": 0.001, "loss": 1.9631, "step": 128700 }, { "epoch": 41.6289592760181, "grad_norm": 1.8981190919876099, "learning_rate": 0.001, "loss": 1.978, "step": 128800 }, { "epoch": 41.661279896574015, "grad_norm": 1.3812053203582764, "learning_rate": 0.001, "loss": 1.983, "step": 128900 }, { "epoch": 41.69360051712993, "grad_norm": 1.340676188468933, "learning_rate": 0.001, "loss": 1.9986, "step": 129000 }, { "epoch": 41.725921137685845, "grad_norm": 2.1788086891174316, "learning_rate": 0.001, "loss": 2.0005, "step": 129100 }, { "epoch": 41.75824175824176, "grad_norm": 1.3890354633331299, "learning_rate": 0.001, "loss": 2.0146, "step": 129200 }, { "epoch": 41.790562378797674, "grad_norm": 1.6798447370529175, "learning_rate": 0.001, "loss": 1.9909, "step": 129300 }, { "epoch": 41.82288299935359, "grad_norm": 1.2619132995605469, "learning_rate": 0.001, "loss": 2.0093, "step": 129400 }, { "epoch": 41.8552036199095, "grad_norm": 2.880704402923584, "learning_rate": 0.001, "loss": 2.0169, "step": 129500 }, { "epoch": 41.88752424046542, "grad_norm": 1.818469762802124, "learning_rate": 0.001, "loss": 2.0115, "step": 129600 }, { "epoch": 41.91984486102133, "grad_norm": 3.7079341411590576, "learning_rate": 0.001, "loss": 2.0349, "step": 129700 }, { "epoch": 41.95216548157725, "grad_norm": 1.4983065128326416, "learning_rate": 0.001, "loss": 2.0059, "step": 129800 }, { "epoch": 41.98448610213316, "grad_norm": 1.6789497137069702, "learning_rate": 0.001, "loss": 2.0313, "step": 129900 }, { "epoch": 42.016806722689076, "grad_norm": 1.796940803527832, "learning_rate": 0.001, "loss": 1.9498, "step": 130000 }, { "epoch": 42.04912734324499, "grad_norm": 2.9226157665252686, "learning_rate": 0.001, "loss": 1.9143, "step": 130100 }, { "epoch": 42.081447963800905, "grad_norm": 1.4195504188537598, "learning_rate": 0.001, "loss": 1.9238, "step": 130200 }, { "epoch": 42.11376858435682, "grad_norm": 1.7703349590301514, "learning_rate": 0.001, "loss": 1.9198, "step": 130300 }, { "epoch": 42.146089204912734, "grad_norm": 1.8108232021331787, "learning_rate": 0.001, "loss": 1.9155, "step": 130400 }, { "epoch": 42.17840982546865, "grad_norm": 10.875456809997559, "learning_rate": 0.001, "loss": 1.9368, "step": 130500 }, { "epoch": 42.21073044602456, "grad_norm": 1.6553337574005127, "learning_rate": 0.001, "loss": 1.9364, "step": 130600 }, { "epoch": 42.24305106658048, "grad_norm": 1.548933744430542, "learning_rate": 0.001, "loss": 1.93, "step": 130700 }, { "epoch": 42.27537168713639, "grad_norm": 1.4559000730514526, "learning_rate": 0.001, "loss": 1.9473, "step": 130800 }, { "epoch": 42.30769230769231, "grad_norm": 1.2075352668762207, "learning_rate": 0.001, "loss": 1.9498, "step": 130900 }, { "epoch": 42.34001292824822, "grad_norm": 2.4885611534118652, "learning_rate": 0.001, "loss": 1.9451, "step": 131000 }, { "epoch": 42.372333548804136, "grad_norm": 1.6603654623031616, "learning_rate": 0.001, "loss": 1.9699, "step": 131100 }, { "epoch": 42.40465416936005, "grad_norm": 1.6623696088790894, "learning_rate": 0.001, "loss": 1.9713, "step": 131200 }, { "epoch": 42.436974789915965, "grad_norm": 2.6627957820892334, "learning_rate": 0.001, "loss": 1.9523, "step": 131300 }, { "epoch": 42.46929541047188, "grad_norm": 1.9103305339813232, "learning_rate": 0.001, "loss": 1.9667, "step": 131400 }, { "epoch": 42.501616031027794, "grad_norm": 2.3403966426849365, "learning_rate": 0.001, "loss": 1.972, "step": 131500 }, { "epoch": 42.53393665158371, "grad_norm": 1.352020502090454, "learning_rate": 0.001, "loss": 1.966, "step": 131600 }, { "epoch": 42.56625727213962, "grad_norm": 1.6333130598068237, "learning_rate": 0.001, "loss": 1.9728, "step": 131700 }, { "epoch": 42.59857789269554, "grad_norm": 1.9187365770339966, "learning_rate": 0.001, "loss": 2.0034, "step": 131800 }, { "epoch": 42.63089851325145, "grad_norm": 1.9130007028579712, "learning_rate": 0.001, "loss": 1.9637, "step": 131900 }, { "epoch": 42.66321913380737, "grad_norm": 1.3270819187164307, "learning_rate": 0.001, "loss": 1.9798, "step": 132000 }, { "epoch": 42.69553975436328, "grad_norm": 5.9544172286987305, "learning_rate": 0.001, "loss": 1.9786, "step": 132100 }, { "epoch": 42.727860374919196, "grad_norm": 2.1070775985717773, "learning_rate": 0.001, "loss": 1.9839, "step": 132200 }, { "epoch": 42.76018099547511, "grad_norm": 1.8662304878234863, "learning_rate": 0.001, "loss": 1.9843, "step": 132300 }, { "epoch": 42.792501616031025, "grad_norm": 1.603851079940796, "learning_rate": 0.001, "loss": 1.9949, "step": 132400 }, { "epoch": 42.82482223658694, "grad_norm": 1.6711206436157227, "learning_rate": 0.001, "loss": 2.0016, "step": 132500 }, { "epoch": 42.857142857142854, "grad_norm": 1.9021879434585571, "learning_rate": 0.001, "loss": 1.993, "step": 132600 }, { "epoch": 42.88946347769877, "grad_norm": 1.7930629253387451, "learning_rate": 0.001, "loss": 1.984, "step": 132700 }, { "epoch": 42.92178409825468, "grad_norm": 2.1397993564605713, "learning_rate": 0.001, "loss": 2.0297, "step": 132800 }, { "epoch": 42.9541047188106, "grad_norm": 1.5600769519805908, "learning_rate": 0.001, "loss": 2.011, "step": 132900 }, { "epoch": 42.98642533936652, "grad_norm": 2.858860969543457, "learning_rate": 0.001, "loss": 2.0082, "step": 133000 }, { "epoch": 43.018745959922434, "grad_norm": 1.5517375469207764, "learning_rate": 0.001, "loss": 1.9511, "step": 133100 }, { "epoch": 43.05106658047835, "grad_norm": 1.3803879022598267, "learning_rate": 0.001, "loss": 1.8843, "step": 133200 }, { "epoch": 43.08338720103426, "grad_norm": 1.240495204925537, "learning_rate": 0.001, "loss": 1.8907, "step": 133300 }, { "epoch": 43.11570782159018, "grad_norm": 1.2222603559494019, "learning_rate": 0.001, "loss": 1.9032, "step": 133400 }, { "epoch": 43.14802844214609, "grad_norm": 1.3209229707717896, "learning_rate": 0.001, "loss": 1.9169, "step": 133500 }, { "epoch": 43.18034906270201, "grad_norm": 1.2317159175872803, "learning_rate": 0.001, "loss": 1.9341, "step": 133600 }, { "epoch": 43.21266968325792, "grad_norm": 1.619140386581421, "learning_rate": 0.001, "loss": 1.9389, "step": 133700 }, { "epoch": 43.244990303813836, "grad_norm": 1.5229370594024658, "learning_rate": 0.001, "loss": 1.9092, "step": 133800 }, { "epoch": 43.27731092436975, "grad_norm": 1.4855724573135376, "learning_rate": 0.001, "loss": 1.9265, "step": 133900 }, { "epoch": 43.309631544925665, "grad_norm": 1.226261019706726, "learning_rate": 0.001, "loss": 1.9409, "step": 134000 }, { "epoch": 43.34195216548158, "grad_norm": 1.2717571258544922, "learning_rate": 0.001, "loss": 1.9503, "step": 134100 }, { "epoch": 43.374272786037494, "grad_norm": 1.1911205053329468, "learning_rate": 0.001, "loss": 1.951, "step": 134200 }, { "epoch": 43.40659340659341, "grad_norm": 1.395449161529541, "learning_rate": 0.001, "loss": 1.9542, "step": 134300 }, { "epoch": 43.43891402714932, "grad_norm": 1.2694145441055298, "learning_rate": 0.001, "loss": 1.9414, "step": 134400 }, { "epoch": 43.47123464770524, "grad_norm": 1.4304951429367065, "learning_rate": 0.001, "loss": 1.9784, "step": 134500 }, { "epoch": 43.50355526826115, "grad_norm": 2.916964530944824, "learning_rate": 0.001, "loss": 1.9571, "step": 134600 }, { "epoch": 43.53587588881707, "grad_norm": 1.3888237476348877, "learning_rate": 0.001, "loss": 1.9692, "step": 134700 }, { "epoch": 43.56819650937298, "grad_norm": 1.2487362623214722, "learning_rate": 0.001, "loss": 1.9771, "step": 134800 }, { "epoch": 43.600517129928896, "grad_norm": 1.2881702184677124, "learning_rate": 0.001, "loss": 1.9603, "step": 134900 }, { "epoch": 43.63283775048481, "grad_norm": 2.0000839233398438, "learning_rate": 0.001, "loss": 1.9844, "step": 135000 }, { "epoch": 43.665158371040725, "grad_norm": 1.4102736711502075, "learning_rate": 0.001, "loss": 1.9676, "step": 135100 }, { "epoch": 43.69747899159664, "grad_norm": 1.0733171701431274, "learning_rate": 0.001, "loss": 1.9871, "step": 135200 }, { "epoch": 43.729799612152554, "grad_norm": 1.3629958629608154, "learning_rate": 0.001, "loss": 1.9801, "step": 135300 }, { "epoch": 43.76212023270847, "grad_norm": 1.342240810394287, "learning_rate": 0.001, "loss": 1.9921, "step": 135400 }, { "epoch": 43.79444085326438, "grad_norm": 1.1792398691177368, "learning_rate": 0.001, "loss": 2.005, "step": 135500 }, { "epoch": 43.8267614738203, "grad_norm": 1.5314946174621582, "learning_rate": 0.001, "loss": 1.9809, "step": 135600 }, { "epoch": 43.85908209437621, "grad_norm": 1.299494743347168, "learning_rate": 0.001, "loss": 1.992, "step": 135700 }, { "epoch": 43.89140271493213, "grad_norm": 0.9295315742492676, "learning_rate": 0.001, "loss": 2.0016, "step": 135800 }, { "epoch": 43.92372333548804, "grad_norm": 1.4043889045715332, "learning_rate": 0.001, "loss": 2.0004, "step": 135900 }, { "epoch": 43.956043956043956, "grad_norm": 1.3731017112731934, "learning_rate": 0.001, "loss": 1.9946, "step": 136000 }, { "epoch": 43.98836457659987, "grad_norm": 1.2270859479904175, "learning_rate": 0.001, "loss": 2.0162, "step": 136100 }, { "epoch": 44.020685197155785, "grad_norm": 5.5850629806518555, "learning_rate": 0.001, "loss": 1.9415, "step": 136200 }, { "epoch": 44.0530058177117, "grad_norm": 2.552239179611206, "learning_rate": 0.001, "loss": 1.8978, "step": 136300 }, { "epoch": 44.085326438267614, "grad_norm": 1.3903478384017944, "learning_rate": 0.001, "loss": 1.8799, "step": 136400 }, { "epoch": 44.11764705882353, "grad_norm": 1.5142490863800049, "learning_rate": 0.001, "loss": 1.9134, "step": 136500 }, { "epoch": 44.14996767937944, "grad_norm": 1.4766541719436646, "learning_rate": 0.001, "loss": 1.8951, "step": 136600 }, { "epoch": 44.18228829993536, "grad_norm": 1.4894737005233765, "learning_rate": 0.001, "loss": 1.9172, "step": 136700 }, { "epoch": 44.21460892049127, "grad_norm": 1.4120560884475708, "learning_rate": 0.001, "loss": 1.9054, "step": 136800 }, { "epoch": 44.24692954104719, "grad_norm": 2.534087896347046, "learning_rate": 0.001, "loss": 1.9078, "step": 136900 }, { "epoch": 44.2792501616031, "grad_norm": 1.4340354204177856, "learning_rate": 0.001, "loss": 1.9302, "step": 137000 }, { "epoch": 44.311570782159016, "grad_norm": 1.3901655673980713, "learning_rate": 0.001, "loss": 1.9168, "step": 137100 }, { "epoch": 44.34389140271493, "grad_norm": 1.4648103713989258, "learning_rate": 0.001, "loss": 1.9312, "step": 137200 }, { "epoch": 44.376212023270845, "grad_norm": 0.9933161735534668, "learning_rate": 0.001, "loss": 1.9194, "step": 137300 }, { "epoch": 44.40853264382676, "grad_norm": 1.3491710424423218, "learning_rate": 0.001, "loss": 1.93, "step": 137400 }, { "epoch": 44.440853264382675, "grad_norm": 1.3563514947891235, "learning_rate": 0.001, "loss": 1.9259, "step": 137500 }, { "epoch": 44.47317388493859, "grad_norm": 1.2267310619354248, "learning_rate": 0.001, "loss": 1.9547, "step": 137600 }, { "epoch": 44.505494505494504, "grad_norm": 1.0981390476226807, "learning_rate": 0.001, "loss": 1.95, "step": 137700 }, { "epoch": 44.53781512605042, "grad_norm": 1.9077948331832886, "learning_rate": 0.001, "loss": 1.9516, "step": 137800 }, { "epoch": 44.57013574660633, "grad_norm": 1.422743797302246, "learning_rate": 0.001, "loss": 1.9592, "step": 137900 }, { "epoch": 44.60245636716225, "grad_norm": 1.3714661598205566, "learning_rate": 0.001, "loss": 1.9413, "step": 138000 }, { "epoch": 44.63477698771816, "grad_norm": 1.3728971481323242, "learning_rate": 0.001, "loss": 1.9552, "step": 138100 }, { "epoch": 44.66709760827408, "grad_norm": 1.4088006019592285, "learning_rate": 0.001, "loss": 1.9646, "step": 138200 }, { "epoch": 44.69941822882999, "grad_norm": 2.8559157848358154, "learning_rate": 0.001, "loss": 1.9835, "step": 138300 }, { "epoch": 44.731738849385906, "grad_norm": 1.6238328218460083, "learning_rate": 0.001, "loss": 1.975, "step": 138400 }, { "epoch": 44.76405946994182, "grad_norm": 1.3402780294418335, "learning_rate": 0.001, "loss": 1.9763, "step": 138500 }, { "epoch": 44.796380090497735, "grad_norm": 1.2792099714279175, "learning_rate": 0.001, "loss": 1.9617, "step": 138600 }, { "epoch": 44.82870071105365, "grad_norm": 1.649281620979309, "learning_rate": 0.001, "loss": 1.9851, "step": 138700 }, { "epoch": 44.861021331609564, "grad_norm": 1.0985745191574097, "learning_rate": 0.001, "loss": 1.9802, "step": 138800 }, { "epoch": 44.89334195216548, "grad_norm": 1.4287372827529907, "learning_rate": 0.001, "loss": 1.9788, "step": 138900 }, { "epoch": 44.92566257272139, "grad_norm": 1.2976144552230835, "learning_rate": 0.001, "loss": 1.9865, "step": 139000 }, { "epoch": 44.95798319327731, "grad_norm": 1.2866300344467163, "learning_rate": 0.001, "loss": 1.9953, "step": 139100 }, { "epoch": 44.99030381383322, "grad_norm": 2.5281121730804443, "learning_rate": 0.001, "loss": 1.9846, "step": 139200 }, { "epoch": 45.022624434389144, "grad_norm": 0.9322966933250427, "learning_rate": 0.001, "loss": 1.9139, "step": 139300 }, { "epoch": 45.05494505494506, "grad_norm": 1.6272239685058594, "learning_rate": 0.001, "loss": 1.8763, "step": 139400 }, { "epoch": 45.08726567550097, "grad_norm": 1.756226658821106, "learning_rate": 0.001, "loss": 1.8812, "step": 139500 }, { "epoch": 45.11958629605689, "grad_norm": 1.243112325668335, "learning_rate": 0.001, "loss": 1.8944, "step": 139600 }, { "epoch": 45.1519069166128, "grad_norm": 1.3239643573760986, "learning_rate": 0.001, "loss": 1.8612, "step": 139700 }, { "epoch": 45.18422753716872, "grad_norm": 1.1390424966812134, "learning_rate": 0.001, "loss": 1.8866, "step": 139800 }, { "epoch": 45.21654815772463, "grad_norm": 1.4782127141952515, "learning_rate": 0.001, "loss": 1.8993, "step": 139900 }, { "epoch": 45.248868778280546, "grad_norm": 1.325358271598816, "learning_rate": 0.001, "loss": 1.904, "step": 140000 }, { "epoch": 45.28118939883646, "grad_norm": 1.5114521980285645, "learning_rate": 0.001, "loss": 1.8893, "step": 140100 }, { "epoch": 45.313510019392375, "grad_norm": 13.562479019165039, "learning_rate": 0.001, "loss": 1.9001, "step": 140200 }, { "epoch": 45.34583063994829, "grad_norm": 1.1720770597457886, "learning_rate": 0.001, "loss": 1.9003, "step": 140300 }, { "epoch": 45.378151260504204, "grad_norm": 1.3906831741333008, "learning_rate": 0.001, "loss": 1.9164, "step": 140400 }, { "epoch": 45.41047188106012, "grad_norm": 1.8706270456314087, "learning_rate": 0.001, "loss": 1.9261, "step": 140500 }, { "epoch": 45.44279250161603, "grad_norm": 2.7911887168884277, "learning_rate": 0.001, "loss": 1.9263, "step": 140600 }, { "epoch": 45.47511312217195, "grad_norm": 1.446894884109497, "learning_rate": 0.001, "loss": 1.9228, "step": 140700 }, { "epoch": 45.50743374272786, "grad_norm": 1.3137013912200928, "learning_rate": 0.001, "loss": 1.9453, "step": 140800 }, { "epoch": 45.53975436328378, "grad_norm": 1.1383583545684814, "learning_rate": 0.001, "loss": 1.9306, "step": 140900 }, { "epoch": 45.57207498383969, "grad_norm": 1.590207576751709, "learning_rate": 0.001, "loss": 1.9331, "step": 141000 }, { "epoch": 45.604395604395606, "grad_norm": 1.3438366651535034, "learning_rate": 0.001, "loss": 1.9429, "step": 141100 }, { "epoch": 45.63671622495152, "grad_norm": 1.356629490852356, "learning_rate": 0.001, "loss": 1.9484, "step": 141200 }, { "epoch": 45.669036845507435, "grad_norm": 1.1725960969924927, "learning_rate": 0.001, "loss": 1.9576, "step": 141300 }, { "epoch": 45.70135746606335, "grad_norm": 0.9936198592185974, "learning_rate": 0.001, "loss": 1.9269, "step": 141400 }, { "epoch": 45.733678086619264, "grad_norm": 1.2449716329574585, "learning_rate": 0.001, "loss": 1.9587, "step": 141500 }, { "epoch": 45.76599870717518, "grad_norm": 1.2557263374328613, "learning_rate": 0.001, "loss": 1.9628, "step": 141600 }, { "epoch": 45.79831932773109, "grad_norm": 1.1909370422363281, "learning_rate": 0.001, "loss": 1.96, "step": 141700 }, { "epoch": 45.83063994828701, "grad_norm": 1.4673229455947876, "learning_rate": 0.001, "loss": 1.9543, "step": 141800 }, { "epoch": 45.86296056884292, "grad_norm": 1.2456512451171875, "learning_rate": 0.001, "loss": 1.9533, "step": 141900 }, { "epoch": 45.89528118939884, "grad_norm": 1.1183640956878662, "learning_rate": 0.001, "loss": 1.9684, "step": 142000 }, { "epoch": 45.92760180995475, "grad_norm": 1.2889524698257446, "learning_rate": 0.001, "loss": 1.9856, "step": 142100 }, { "epoch": 45.959922430510666, "grad_norm": 1.4960366487503052, "learning_rate": 0.001, "loss": 1.9604, "step": 142200 }, { "epoch": 45.99224305106658, "grad_norm": 1.3624542951583862, "learning_rate": 0.001, "loss": 1.9865, "step": 142300 }, { "epoch": 46.024563671622495, "grad_norm": 1.276051640510559, "learning_rate": 0.001, "loss": 1.8826, "step": 142400 }, { "epoch": 46.05688429217841, "grad_norm": 2.7992405891418457, "learning_rate": 0.001, "loss": 1.8651, "step": 142500 }, { "epoch": 46.089204912734324, "grad_norm": 1.4230403900146484, "learning_rate": 0.001, "loss": 1.8488, "step": 142600 }, { "epoch": 46.12152553329024, "grad_norm": 1.814460277557373, "learning_rate": 0.001, "loss": 1.8574, "step": 142700 }, { "epoch": 46.15384615384615, "grad_norm": 1.3071939945220947, "learning_rate": 0.001, "loss": 1.8706, "step": 142800 }, { "epoch": 46.18616677440207, "grad_norm": 1.2666304111480713, "learning_rate": 0.001, "loss": 1.8662, "step": 142900 }, { "epoch": 46.21848739495798, "grad_norm": 1.3148808479309082, "learning_rate": 0.001, "loss": 1.8895, "step": 143000 }, { "epoch": 46.2508080155139, "grad_norm": 1.0444884300231934, "learning_rate": 0.001, "loss": 1.8881, "step": 143100 }, { "epoch": 46.28312863606981, "grad_norm": 1.3372923135757446, "learning_rate": 0.001, "loss": 1.8925, "step": 143200 }, { "epoch": 46.315449256625726, "grad_norm": 1.164212942123413, "learning_rate": 0.001, "loss": 1.9011, "step": 143300 }, { "epoch": 46.34776987718164, "grad_norm": 1.3530352115631104, "learning_rate": 0.001, "loss": 1.9135, "step": 143400 }, { "epoch": 46.380090497737555, "grad_norm": 1.1822092533111572, "learning_rate": 0.001, "loss": 1.8868, "step": 143500 }, { "epoch": 46.41241111829347, "grad_norm": 1.9492522478103638, "learning_rate": 0.001, "loss": 1.8945, "step": 143600 }, { "epoch": 46.444731738849384, "grad_norm": 1.3363617658615112, "learning_rate": 0.001, "loss": 1.914, "step": 143700 }, { "epoch": 46.4770523594053, "grad_norm": 0.9405075311660767, "learning_rate": 0.001, "loss": 1.9175, "step": 143800 }, { "epoch": 46.50937297996121, "grad_norm": 1.1859244108200073, "learning_rate": 0.001, "loss": 1.91, "step": 143900 }, { "epoch": 46.54169360051713, "grad_norm": 1.1513789892196655, "learning_rate": 0.001, "loss": 1.9138, "step": 144000 }, { "epoch": 46.57401422107304, "grad_norm": 4.23075532913208, "learning_rate": 0.001, "loss": 1.9097, "step": 144100 }, { "epoch": 46.60633484162896, "grad_norm": 1.4291493892669678, "learning_rate": 0.001, "loss": 1.9234, "step": 144200 }, { "epoch": 46.63865546218487, "grad_norm": 1.0709179639816284, "learning_rate": 0.001, "loss": 1.9178, "step": 144300 }, { "epoch": 46.670976082740786, "grad_norm": 1.0300965309143066, "learning_rate": 0.001, "loss": 1.9303, "step": 144400 }, { "epoch": 46.7032967032967, "grad_norm": 1.119706630706787, "learning_rate": 0.001, "loss": 1.9514, "step": 144500 }, { "epoch": 46.735617323852615, "grad_norm": 1.1141287088394165, "learning_rate": 0.001, "loss": 1.9533, "step": 144600 }, { "epoch": 46.76793794440853, "grad_norm": 1.206587553024292, "learning_rate": 0.001, "loss": 1.9544, "step": 144700 }, { "epoch": 46.800258564964444, "grad_norm": 1.1145209074020386, "learning_rate": 0.001, "loss": 1.9523, "step": 144800 }, { "epoch": 46.83257918552036, "grad_norm": 3.683237075805664, "learning_rate": 0.001, "loss": 1.9541, "step": 144900 }, { "epoch": 46.864899806076274, "grad_norm": 1.4116101264953613, "learning_rate": 0.001, "loss": 1.9525, "step": 145000 }, { "epoch": 46.89722042663219, "grad_norm": 1.3738763332366943, "learning_rate": 0.001, "loss": 1.9468, "step": 145100 }, { "epoch": 46.9295410471881, "grad_norm": 1.483337163925171, "learning_rate": 0.001, "loss": 1.956, "step": 145200 }, { "epoch": 46.96186166774402, "grad_norm": 1.2264422178268433, "learning_rate": 0.001, "loss": 1.9642, "step": 145300 }, { "epoch": 46.99418228829994, "grad_norm": 1.0686662197113037, "learning_rate": 0.001, "loss": 1.9604, "step": 145400 }, { "epoch": 47.02650290885585, "grad_norm": 1.5856003761291504, "learning_rate": 0.001, "loss": 1.8664, "step": 145500 }, { "epoch": 47.05882352941177, "grad_norm": 1.7289409637451172, "learning_rate": 0.001, "loss": 1.8601, "step": 145600 }, { "epoch": 47.09114414996768, "grad_norm": 1.2159520387649536, "learning_rate": 0.001, "loss": 1.8536, "step": 145700 }, { "epoch": 47.1234647705236, "grad_norm": 1.191497802734375, "learning_rate": 0.001, "loss": 1.8522, "step": 145800 }, { "epoch": 47.15578539107951, "grad_norm": 1.5088447332382202, "learning_rate": 0.001, "loss": 1.8605, "step": 145900 }, { "epoch": 47.188106011635426, "grad_norm": 1.3735973834991455, "learning_rate": 0.001, "loss": 1.8927, "step": 146000 }, { "epoch": 47.22042663219134, "grad_norm": 79.85970306396484, "learning_rate": 0.001, "loss": 1.8745, "step": 146100 }, { "epoch": 47.252747252747255, "grad_norm": 1.3051249980926514, "learning_rate": 0.001, "loss": 1.8796, "step": 146200 }, { "epoch": 47.28506787330317, "grad_norm": 3.9679150581359863, "learning_rate": 0.001, "loss": 1.876, "step": 146300 }, { "epoch": 47.317388493859085, "grad_norm": 2.2026209831237793, "learning_rate": 0.001, "loss": 1.8862, "step": 146400 }, { "epoch": 47.349709114415, "grad_norm": 1.2821727991104126, "learning_rate": 0.001, "loss": 1.8853, "step": 146500 }, { "epoch": 47.382029734970914, "grad_norm": 2.680227518081665, "learning_rate": 0.001, "loss": 1.8953, "step": 146600 }, { "epoch": 47.41435035552683, "grad_norm": 3.0225305557250977, "learning_rate": 0.001, "loss": 1.9057, "step": 146700 }, { "epoch": 47.44667097608274, "grad_norm": 2.054591655731201, "learning_rate": 0.001, "loss": 1.9074, "step": 146800 }, { "epoch": 47.47899159663866, "grad_norm": 1.3884879350662231, "learning_rate": 0.001, "loss": 1.9215, "step": 146900 }, { "epoch": 47.51131221719457, "grad_norm": 1.450736403465271, "learning_rate": 0.001, "loss": 1.9119, "step": 147000 }, { "epoch": 47.543632837750486, "grad_norm": 1.366363763809204, "learning_rate": 0.001, "loss": 1.8915, "step": 147100 }, { "epoch": 47.5759534583064, "grad_norm": 2.539961338043213, "learning_rate": 0.001, "loss": 1.9213, "step": 147200 }, { "epoch": 47.608274078862316, "grad_norm": 1.1065471172332764, "learning_rate": 0.001, "loss": 1.9162, "step": 147300 }, { "epoch": 47.64059469941823, "grad_norm": 1.280946969985962, "learning_rate": 0.001, "loss": 1.9338, "step": 147400 }, { "epoch": 47.672915319974145, "grad_norm": 1.4230639934539795, "learning_rate": 0.001, "loss": 1.9446, "step": 147500 }, { "epoch": 47.70523594053006, "grad_norm": 1.95630943775177, "learning_rate": 0.001, "loss": 1.942, "step": 147600 }, { "epoch": 47.737556561085974, "grad_norm": 1.053483009338379, "learning_rate": 0.001, "loss": 1.9266, "step": 147700 }, { "epoch": 47.76987718164189, "grad_norm": 1.2863104343414307, "learning_rate": 0.001, "loss": 1.9203, "step": 147800 }, { "epoch": 47.8021978021978, "grad_norm": 1.852759838104248, "learning_rate": 0.001, "loss": 1.9638, "step": 147900 }, { "epoch": 47.83451842275372, "grad_norm": 1.0904704332351685, "learning_rate": 0.001, "loss": 1.9381, "step": 148000 }, { "epoch": 47.86683904330963, "grad_norm": 1.4535976648330688, "learning_rate": 0.001, "loss": 1.939, "step": 148100 }, { "epoch": 47.89915966386555, "grad_norm": 1.251557469367981, "learning_rate": 0.001, "loss": 1.9526, "step": 148200 }, { "epoch": 47.93148028442146, "grad_norm": 1.5214812755584717, "learning_rate": 0.001, "loss": 1.942, "step": 148300 }, { "epoch": 47.963800904977376, "grad_norm": 2.5378262996673584, "learning_rate": 0.001, "loss": 1.942, "step": 148400 }, { "epoch": 47.99612152553329, "grad_norm": 2.171137809753418, "learning_rate": 0.001, "loss": 1.9489, "step": 148500 }, { "epoch": 48.028442146089205, "grad_norm": 1.306046962738037, "learning_rate": 0.001, "loss": 1.8516, "step": 148600 }, { "epoch": 48.06076276664512, "grad_norm": 1.2905352115631104, "learning_rate": 0.001, "loss": 1.8544, "step": 148700 }, { "epoch": 48.093083387201034, "grad_norm": 1.099907636642456, "learning_rate": 0.001, "loss": 1.8399, "step": 148800 }, { "epoch": 48.12540400775695, "grad_norm": 1.688352346420288, "learning_rate": 0.001, "loss": 1.849, "step": 148900 }, { "epoch": 48.15772462831286, "grad_norm": 1.5573194026947021, "learning_rate": 0.001, "loss": 1.8642, "step": 149000 }, { "epoch": 48.19004524886878, "grad_norm": 1.2285957336425781, "learning_rate": 0.001, "loss": 1.8439, "step": 149100 }, { "epoch": 48.22236586942469, "grad_norm": 1.537477731704712, "learning_rate": 0.001, "loss": 1.8679, "step": 149200 }, { "epoch": 48.25468648998061, "grad_norm": 1.1804989576339722, "learning_rate": 0.001, "loss": 1.8735, "step": 149300 }, { "epoch": 48.28700711053652, "grad_norm": 1.7131823301315308, "learning_rate": 0.001, "loss": 1.8618, "step": 149400 }, { "epoch": 48.319327731092436, "grad_norm": 1.2613481283187866, "learning_rate": 0.001, "loss": 1.8688, "step": 149500 }, { "epoch": 48.35164835164835, "grad_norm": 1.8532971143722534, "learning_rate": 0.001, "loss": 1.8756, "step": 149600 }, { "epoch": 48.383968972204265, "grad_norm": 1.222306489944458, "learning_rate": 0.001, "loss": 1.8718, "step": 149700 }, { "epoch": 48.41628959276018, "grad_norm": 4.243698596954346, "learning_rate": 0.001, "loss": 1.8787, "step": 149800 }, { "epoch": 48.448610213316094, "grad_norm": 1.9790083169937134, "learning_rate": 0.001, "loss": 1.8926, "step": 149900 }, { "epoch": 48.48093083387201, "grad_norm": 7.227484226226807, "learning_rate": 0.001, "loss": 1.884, "step": 150000 }, { "epoch": 48.51325145442792, "grad_norm": 1.4504117965698242, "learning_rate": 0.001, "loss": 1.8832, "step": 150100 }, { "epoch": 48.54557207498384, "grad_norm": 1.701651692390442, "learning_rate": 0.001, "loss": 1.892, "step": 150200 }, { "epoch": 48.57789269553975, "grad_norm": 1.0524812936782837, "learning_rate": 0.001, "loss": 1.8876, "step": 150300 }, { "epoch": 48.61021331609567, "grad_norm": 1.8778553009033203, "learning_rate": 0.001, "loss": 1.9093, "step": 150400 }, { "epoch": 48.64253393665158, "grad_norm": 1.707296371459961, "learning_rate": 0.001, "loss": 1.907, "step": 150500 }, { "epoch": 48.674854557207496, "grad_norm": 1.1146116256713867, "learning_rate": 0.001, "loss": 1.9153, "step": 150600 }, { "epoch": 48.70717517776341, "grad_norm": 4.351861476898193, "learning_rate": 0.001, "loss": 1.9125, "step": 150700 }, { "epoch": 48.739495798319325, "grad_norm": 1.2428768873214722, "learning_rate": 0.001, "loss": 1.9244, "step": 150800 }, { "epoch": 48.77181641887524, "grad_norm": 1.6403248310089111, "learning_rate": 0.001, "loss": 1.9411, "step": 150900 }, { "epoch": 48.804137039431154, "grad_norm": 0.9744797945022583, "learning_rate": 0.001, "loss": 1.9076, "step": 151000 }, { "epoch": 48.83645765998707, "grad_norm": 1.5079376697540283, "learning_rate": 0.001, "loss": 1.9205, "step": 151100 }, { "epoch": 48.86877828054298, "grad_norm": 1.4556964635849, "learning_rate": 0.001, "loss": 1.9229, "step": 151200 }, { "epoch": 48.9010989010989, "grad_norm": 1.255356788635254, "learning_rate": 0.001, "loss": 1.9222, "step": 151300 }, { "epoch": 48.93341952165481, "grad_norm": 1.1656986474990845, "learning_rate": 0.001, "loss": 1.9393, "step": 151400 }, { "epoch": 48.96574014221073, "grad_norm": 1.203693151473999, "learning_rate": 0.001, "loss": 1.9491, "step": 151500 }, { "epoch": 48.99806076276664, "grad_norm": 1.3782533407211304, "learning_rate": 0.001, "loss": 1.9188, "step": 151600 }, { "epoch": 49.03038138332256, "grad_norm": 1.4690312147140503, "learning_rate": 0.001, "loss": 1.8281, "step": 151700 }, { "epoch": 49.06270200387848, "grad_norm": 1.6981852054595947, "learning_rate": 0.001, "loss": 1.8205, "step": 151800 }, { "epoch": 49.09502262443439, "grad_norm": 4.4031267166137695, "learning_rate": 0.001, "loss": 1.8291, "step": 151900 }, { "epoch": 49.12734324499031, "grad_norm": 1.5451769828796387, "learning_rate": 0.001, "loss": 1.8405, "step": 152000 }, { "epoch": 49.15966386554622, "grad_norm": 1.5537259578704834, "learning_rate": 0.001, "loss": 1.8557, "step": 152100 }, { "epoch": 49.191984486102136, "grad_norm": 1.3488463163375854, "learning_rate": 0.001, "loss": 1.8462, "step": 152200 }, { "epoch": 49.22430510665805, "grad_norm": 1.4971939325332642, "learning_rate": 0.001, "loss": 1.8556, "step": 152300 }, { "epoch": 49.256625727213965, "grad_norm": 1.7381188869476318, "learning_rate": 0.001, "loss": 1.8547, "step": 152400 }, { "epoch": 49.28894634776988, "grad_norm": 1.7048050165176392, "learning_rate": 0.001, "loss": 1.8608, "step": 152500 }, { "epoch": 49.321266968325794, "grad_norm": 1.5738157033920288, "learning_rate": 0.001, "loss": 1.863, "step": 152600 }, { "epoch": 49.35358758888171, "grad_norm": 1.2011820077896118, "learning_rate": 0.001, "loss": 1.8442, "step": 152700 }, { "epoch": 49.38590820943762, "grad_norm": 1.1261461973190308, "learning_rate": 0.001, "loss": 1.8688, "step": 152800 }, { "epoch": 49.41822882999354, "grad_norm": 1.293884038925171, "learning_rate": 0.001, "loss": 1.8828, "step": 152900 }, { "epoch": 49.45054945054945, "grad_norm": 1.8792201280593872, "learning_rate": 0.001, "loss": 1.8767, "step": 153000 }, { "epoch": 49.48287007110537, "grad_norm": 1.3025922775268555, "learning_rate": 0.001, "loss": 1.8634, "step": 153100 }, { "epoch": 49.51519069166128, "grad_norm": 1.7977771759033203, "learning_rate": 0.001, "loss": 1.8875, "step": 153200 }, { "epoch": 49.547511312217196, "grad_norm": 1.6496143341064453, "learning_rate": 0.001, "loss": 1.8842, "step": 153300 }, { "epoch": 49.57983193277311, "grad_norm": 1.1966819763183594, "learning_rate": 0.001, "loss": 1.8737, "step": 153400 }, { "epoch": 49.612152553329025, "grad_norm": 1.9843745231628418, "learning_rate": 0.001, "loss": 1.8904, "step": 153500 }, { "epoch": 49.64447317388494, "grad_norm": 1.659328579902649, "learning_rate": 0.001, "loss": 1.8992, "step": 153600 }, { "epoch": 49.676793794440854, "grad_norm": 1.6897447109222412, "learning_rate": 0.001, "loss": 1.891, "step": 153700 }, { "epoch": 49.70911441499677, "grad_norm": 1.2976375818252563, "learning_rate": 0.001, "loss": 1.9102, "step": 153800 }, { "epoch": 49.74143503555268, "grad_norm": 1.6344318389892578, "learning_rate": 0.001, "loss": 1.8832, "step": 153900 }, { "epoch": 49.7737556561086, "grad_norm": 1.857918381690979, "learning_rate": 0.001, "loss": 1.906, "step": 154000 }, { "epoch": 49.80607627666451, "grad_norm": 1.373241662979126, "learning_rate": 0.001, "loss": 1.9209, "step": 154100 }, { "epoch": 49.83839689722043, "grad_norm": 1.4718129634857178, "learning_rate": 0.001, "loss": 1.8927, "step": 154200 }, { "epoch": 49.87071751777634, "grad_norm": 2.915614366531372, "learning_rate": 0.001, "loss": 1.9038, "step": 154300 }, { "epoch": 49.903038138332256, "grad_norm": 1.3346368074417114, "learning_rate": 0.001, "loss": 1.9237, "step": 154400 }, { "epoch": 49.93535875888817, "grad_norm": 1.1678050756454468, "learning_rate": 0.001, "loss": 1.9251, "step": 154500 }, { "epoch": 49.967679379444085, "grad_norm": 1.6205052137374878, "learning_rate": 0.001, "loss": 1.9164, "step": 154600 }, { "epoch": 50.0, "grad_norm": 1.8468464612960815, "learning_rate": 0.001, "loss": 1.8929, "step": 154700 }, { "epoch": 50.032320620555915, "grad_norm": 2.0370256900787354, "learning_rate": 0.001, "loss": 1.8048, "step": 154800 }, { "epoch": 50.06464124111183, "grad_norm": 1.744167685508728, "learning_rate": 0.001, "loss": 1.8144, "step": 154900 }, { "epoch": 50.096961861667744, "grad_norm": 1.9358224868774414, "learning_rate": 0.001, "loss": 1.8151, "step": 155000 }, { "epoch": 50.12928248222366, "grad_norm": 1.9258112907409668, "learning_rate": 0.001, "loss": 1.8196, "step": 155100 }, { "epoch": 50.16160310277957, "grad_norm": 1.7790493965148926, "learning_rate": 0.001, "loss": 1.8147, "step": 155200 }, { "epoch": 50.19392372333549, "grad_norm": 1.8352715969085693, "learning_rate": 0.001, "loss": 1.8322, "step": 155300 }, { "epoch": 50.2262443438914, "grad_norm": 4.869323253631592, "learning_rate": 0.001, "loss": 1.8486, "step": 155400 }, { "epoch": 50.25856496444732, "grad_norm": 2.8676984310150146, "learning_rate": 0.001, "loss": 1.8537, "step": 155500 }, { "epoch": 50.29088558500323, "grad_norm": 2.0659713745117188, "learning_rate": 0.001, "loss": 1.8356, "step": 155600 }, { "epoch": 50.323206205559146, "grad_norm": 1.5774316787719727, "learning_rate": 0.001, "loss": 1.8462, "step": 155700 }, { "epoch": 50.35552682611506, "grad_norm": 1.8155359029769897, "learning_rate": 0.001, "loss": 1.8294, "step": 155800 }, { "epoch": 50.387847446670975, "grad_norm": 2.749790906906128, "learning_rate": 0.001, "loss": 1.8592, "step": 155900 }, { "epoch": 50.42016806722689, "grad_norm": 2.075669288635254, "learning_rate": 0.001, "loss": 1.8603, "step": 156000 }, { "epoch": 50.452488687782804, "grad_norm": 3.1616578102111816, "learning_rate": 0.001, "loss": 1.8597, "step": 156100 }, { "epoch": 50.48480930833872, "grad_norm": 5.347122669219971, "learning_rate": 0.001, "loss": 1.8664, "step": 156200 }, { "epoch": 50.51712992889463, "grad_norm": 15.473066329956055, "learning_rate": 0.001, "loss": 1.8707, "step": 156300 }, { "epoch": 50.54945054945055, "grad_norm": 2.3766722679138184, "learning_rate": 0.001, "loss": 1.8818, "step": 156400 }, { "epoch": 50.58177117000646, "grad_norm": 5.88328218460083, "learning_rate": 0.001, "loss": 1.8762, "step": 156500 }, { "epoch": 50.61409179056238, "grad_norm": 5.698803901672363, "learning_rate": 0.001, "loss": 1.8914, "step": 156600 }, { "epoch": 50.64641241111829, "grad_norm": 1.8124210834503174, "learning_rate": 0.001, "loss": 1.8659, "step": 156700 }, { "epoch": 50.678733031674206, "grad_norm": 4.411184787750244, "learning_rate": 0.001, "loss": 1.8849, "step": 156800 }, { "epoch": 50.71105365223012, "grad_norm": 2.6569597721099854, "learning_rate": 0.001, "loss": 1.9094, "step": 156900 }, { "epoch": 50.743374272786035, "grad_norm": 2.0783462524414062, "learning_rate": 0.001, "loss": 1.9044, "step": 157000 }, { "epoch": 50.77569489334195, "grad_norm": 1.8523979187011719, "learning_rate": 0.001, "loss": 1.9198, "step": 157100 }, { "epoch": 50.808015513897864, "grad_norm": 1.8430672883987427, "learning_rate": 0.001, "loss": 1.9108, "step": 157200 }, { "epoch": 50.84033613445378, "grad_norm": 2.2843167781829834, "learning_rate": 0.001, "loss": 1.9173, "step": 157300 }, { "epoch": 50.87265675500969, "grad_norm": 2.356163740158081, "learning_rate": 0.001, "loss": 1.9184, "step": 157400 }, { "epoch": 50.90497737556561, "grad_norm": 2.3737120628356934, "learning_rate": 0.001, "loss": 1.9044, "step": 157500 }, { "epoch": 50.93729799612152, "grad_norm": 1.9790130853652954, "learning_rate": 0.001, "loss": 1.9111, "step": 157600 }, { "epoch": 50.96961861667744, "grad_norm": 1.970112919807434, "learning_rate": 0.001, "loss": 1.9168, "step": 157700 }, { "epoch": 51.00193923723336, "grad_norm": 1.6840780973434448, "learning_rate": 0.001, "loss": 1.923, "step": 157800 }, { "epoch": 51.03425985778927, "grad_norm": 3.731515884399414, "learning_rate": 0.001, "loss": 1.7983, "step": 157900 }, { "epoch": 51.06658047834519, "grad_norm": 1.3742941617965698, "learning_rate": 0.001, "loss": 1.8105, "step": 158000 }, { "epoch": 51.0989010989011, "grad_norm": 1.9958996772766113, "learning_rate": 0.001, "loss": 1.8138, "step": 158100 }, { "epoch": 51.13122171945702, "grad_norm": 2.1182122230529785, "learning_rate": 0.001, "loss": 1.8007, "step": 158200 }, { "epoch": 51.16354234001293, "grad_norm": 1.5044934749603271, "learning_rate": 0.001, "loss": 1.8188, "step": 158300 }, { "epoch": 51.195862960568846, "grad_norm": 1.6445131301879883, "learning_rate": 0.001, "loss": 1.8454, "step": 158400 }, { "epoch": 51.22818358112476, "grad_norm": 1.787208914756775, "learning_rate": 0.001, "loss": 1.8321, "step": 158500 }, { "epoch": 51.260504201680675, "grad_norm": 1.4375091791152954, "learning_rate": 0.001, "loss": 1.8319, "step": 158600 }, { "epoch": 51.29282482223659, "grad_norm": 1.283644199371338, "learning_rate": 0.001, "loss": 1.8302, "step": 158700 }, { "epoch": 51.325145442792504, "grad_norm": 1.1551289558410645, "learning_rate": 0.001, "loss": 1.8314, "step": 158800 }, { "epoch": 51.35746606334842, "grad_norm": 1.4309033155441284, "learning_rate": 0.001, "loss": 1.8331, "step": 158900 }, { "epoch": 51.38978668390433, "grad_norm": 1.3727881908416748, "learning_rate": 0.001, "loss": 1.8446, "step": 159000 }, { "epoch": 51.42210730446025, "grad_norm": 1.1588155031204224, "learning_rate": 0.001, "loss": 1.8615, "step": 159100 }, { "epoch": 51.45442792501616, "grad_norm": 3.0590758323669434, "learning_rate": 0.001, "loss": 1.8578, "step": 159200 }, { "epoch": 51.48674854557208, "grad_norm": 1.5345908403396606, "learning_rate": 0.001, "loss": 1.8632, "step": 159300 }, { "epoch": 51.51906916612799, "grad_norm": 1.7815639972686768, "learning_rate": 0.001, "loss": 1.8659, "step": 159400 }, { "epoch": 51.551389786683906, "grad_norm": 1.2229465246200562, "learning_rate": 0.001, "loss": 1.8772, "step": 159500 }, { "epoch": 51.58371040723982, "grad_norm": 1.7601901292800903, "learning_rate": 0.001, "loss": 1.8693, "step": 159600 }, { "epoch": 51.616031027795735, "grad_norm": 1.5885440111160278, "learning_rate": 0.001, "loss": 1.8837, "step": 159700 }, { "epoch": 51.64835164835165, "grad_norm": 1.2639483213424683, "learning_rate": 0.001, "loss": 1.8737, "step": 159800 }, { "epoch": 51.680672268907564, "grad_norm": 1.950973629951477, "learning_rate": 0.001, "loss": 1.8797, "step": 159900 }, { "epoch": 51.71299288946348, "grad_norm": 1.4180629253387451, "learning_rate": 0.001, "loss": 1.8771, "step": 160000 }, { "epoch": 51.74531351001939, "grad_norm": 1.6293505430221558, "learning_rate": 0.001, "loss": 1.8754, "step": 160100 }, { "epoch": 51.77763413057531, "grad_norm": 1.6815139055252075, "learning_rate": 0.001, "loss": 1.9047, "step": 160200 }, { "epoch": 51.80995475113122, "grad_norm": 1.5936766862869263, "learning_rate": 0.001, "loss": 1.8805, "step": 160300 }, { "epoch": 51.84227537168714, "grad_norm": 1.4702264070510864, "learning_rate": 0.001, "loss": 1.8778, "step": 160400 }, { "epoch": 51.87459599224305, "grad_norm": 2.1558837890625, "learning_rate": 0.001, "loss": 1.8972, "step": 160500 }, { "epoch": 51.906916612798966, "grad_norm": 2.351548910140991, "learning_rate": 0.001, "loss": 1.8907, "step": 160600 }, { "epoch": 51.93923723335488, "grad_norm": 2.049895763397217, "learning_rate": 0.001, "loss": 1.8889, "step": 160700 }, { "epoch": 51.971557853910795, "grad_norm": 2.6896536350250244, "learning_rate": 0.001, "loss": 1.9258, "step": 160800 }, { "epoch": 52.00387847446671, "grad_norm": 1.3507665395736694, "learning_rate": 0.001, "loss": 1.935, "step": 160900 }, { "epoch": 52.036199095022624, "grad_norm": 1.7245376110076904, "learning_rate": 0.001, "loss": 1.7716, "step": 161000 }, { "epoch": 52.06851971557854, "grad_norm": 6.7242584228515625, "learning_rate": 0.001, "loss": 1.7684, "step": 161100 }, { "epoch": 52.10084033613445, "grad_norm": 1.5479717254638672, "learning_rate": 0.001, "loss": 1.8108, "step": 161200 }, { "epoch": 52.13316095669037, "grad_norm": 2.652596950531006, "learning_rate": 0.001, "loss": 1.8117, "step": 161300 }, { "epoch": 52.16548157724628, "grad_norm": 1.8098551034927368, "learning_rate": 0.001, "loss": 1.8008, "step": 161400 }, { "epoch": 52.1978021978022, "grad_norm": 1.2313597202301025, "learning_rate": 0.001, "loss": 1.8177, "step": 161500 }, { "epoch": 52.23012281835811, "grad_norm": 1.0582380294799805, "learning_rate": 0.001, "loss": 1.8264, "step": 161600 }, { "epoch": 52.262443438914026, "grad_norm": 1.1429657936096191, "learning_rate": 0.001, "loss": 1.8316, "step": 161700 }, { "epoch": 52.29476405946994, "grad_norm": 1.1730421781539917, "learning_rate": 0.001, "loss": 1.8299, "step": 161800 }, { "epoch": 52.327084680025855, "grad_norm": 2.528351306915283, "learning_rate": 0.001, "loss": 1.8369, "step": 161900 }, { "epoch": 52.35940530058177, "grad_norm": 1.3710192441940308, "learning_rate": 0.001, "loss": 1.8397, "step": 162000 }, { "epoch": 52.391725921137684, "grad_norm": 1.430686116218567, "learning_rate": 0.001, "loss": 1.8374, "step": 162100 }, { "epoch": 52.4240465416936, "grad_norm": 6.241026401519775, "learning_rate": 0.001, "loss": 1.8368, "step": 162200 }, { "epoch": 52.456367162249514, "grad_norm": 1.5896859169006348, "learning_rate": 0.001, "loss": 1.8419, "step": 162300 }, { "epoch": 52.48868778280543, "grad_norm": 1.4369114637374878, "learning_rate": 0.001, "loss": 1.8285, "step": 162400 }, { "epoch": 52.52100840336134, "grad_norm": 1.3587504625320435, "learning_rate": 0.001, "loss": 1.8418, "step": 162500 }, { "epoch": 52.55332902391726, "grad_norm": 1.8245264291763306, "learning_rate": 0.001, "loss": 1.8545, "step": 162600 }, { "epoch": 52.58564964447317, "grad_norm": 1.4387905597686768, "learning_rate": 0.001, "loss": 1.8544, "step": 162700 }, { "epoch": 52.617970265029086, "grad_norm": 0.9492968916893005, "learning_rate": 0.001, "loss": 1.866, "step": 162800 }, { "epoch": 52.650290885585, "grad_norm": 4.664299011230469, "learning_rate": 0.001, "loss": 1.8671, "step": 162900 }, { "epoch": 52.682611506140915, "grad_norm": 1.6182254552841187, "learning_rate": 0.001, "loss": 1.8712, "step": 163000 }, { "epoch": 52.71493212669683, "grad_norm": 2.860280990600586, "learning_rate": 0.001, "loss": 1.8776, "step": 163100 }, { "epoch": 52.747252747252745, "grad_norm": 2.2536051273345947, "learning_rate": 0.001, "loss": 1.8811, "step": 163200 }, { "epoch": 52.77957336780866, "grad_norm": 1.3039655685424805, "learning_rate": 0.001, "loss": 1.8746, "step": 163300 }, { "epoch": 52.811893988364574, "grad_norm": 1.3934969902038574, "learning_rate": 0.001, "loss": 1.8663, "step": 163400 }, { "epoch": 52.84421460892049, "grad_norm": 1.9957778453826904, "learning_rate": 0.001, "loss": 1.885, "step": 163500 }, { "epoch": 52.8765352294764, "grad_norm": 1.0933187007904053, "learning_rate": 0.001, "loss": 1.9014, "step": 163600 }, { "epoch": 52.90885585003232, "grad_norm": 1.288438081741333, "learning_rate": 0.001, "loss": 1.8917, "step": 163700 }, { "epoch": 52.94117647058823, "grad_norm": 0.9584426283836365, "learning_rate": 0.001, "loss": 1.8951, "step": 163800 }, { "epoch": 52.97349709114415, "grad_norm": 3.2793166637420654, "learning_rate": 0.001, "loss": 1.9127, "step": 163900 }, { "epoch": 53.00581771170007, "grad_norm": 1.2631394863128662, "learning_rate": 0.001, "loss": 1.8732, "step": 164000 }, { "epoch": 53.03813833225598, "grad_norm": 1.145633339881897, "learning_rate": 0.001, "loss": 1.7873, "step": 164100 }, { "epoch": 53.0704589528119, "grad_norm": 1.4306237697601318, "learning_rate": 0.001, "loss": 1.7575, "step": 164200 }, { "epoch": 53.10277957336781, "grad_norm": 1.217087745666504, "learning_rate": 0.001, "loss": 1.791, "step": 164300 }, { "epoch": 53.135100193923726, "grad_norm": 2.8959808349609375, "learning_rate": 0.001, "loss": 1.7986, "step": 164400 }, { "epoch": 53.16742081447964, "grad_norm": 1.2627071142196655, "learning_rate": 0.001, "loss": 1.8066, "step": 164500 }, { "epoch": 53.199741435035556, "grad_norm": 1.3099184036254883, "learning_rate": 0.001, "loss": 1.807, "step": 164600 }, { "epoch": 53.23206205559147, "grad_norm": 1.4667695760726929, "learning_rate": 0.001, "loss": 1.8037, "step": 164700 }, { "epoch": 53.264382676147385, "grad_norm": 2.2576076984405518, "learning_rate": 0.001, "loss": 1.8287, "step": 164800 }, { "epoch": 53.2967032967033, "grad_norm": 1.1869635581970215, "learning_rate": 0.001, "loss": 1.8236, "step": 164900 }, { "epoch": 53.329023917259214, "grad_norm": 1.0631247758865356, "learning_rate": 0.001, "loss": 1.8335, "step": 165000 }, { "epoch": 53.36134453781513, "grad_norm": 1.5530858039855957, "learning_rate": 0.001, "loss": 1.817, "step": 165100 }, { "epoch": 53.39366515837104, "grad_norm": 0.9374286532402039, "learning_rate": 0.001, "loss": 1.8363, "step": 165200 }, { "epoch": 53.42598577892696, "grad_norm": 3.2274820804595947, "learning_rate": 0.001, "loss": 1.8385, "step": 165300 }, { "epoch": 53.45830639948287, "grad_norm": 1.3099051713943481, "learning_rate": 0.001, "loss": 1.8338, "step": 165400 }, { "epoch": 53.49062702003879, "grad_norm": 1.258160948753357, "learning_rate": 0.001, "loss": 1.8393, "step": 165500 }, { "epoch": 53.5229476405947, "grad_norm": 1.2888972759246826, "learning_rate": 0.001, "loss": 1.8489, "step": 165600 }, { "epoch": 53.555268261150616, "grad_norm": 1.554524302482605, "learning_rate": 0.001, "loss": 1.8533, "step": 165700 }, { "epoch": 53.58758888170653, "grad_norm": 1.4187372922897339, "learning_rate": 0.001, "loss": 1.8304, "step": 165800 }, { "epoch": 53.619909502262445, "grad_norm": 1.47353196144104, "learning_rate": 0.001, "loss": 1.8543, "step": 165900 }, { "epoch": 53.65223012281836, "grad_norm": 1.277214527130127, "learning_rate": 0.001, "loss": 1.8586, "step": 166000 }, { "epoch": 53.684550743374274, "grad_norm": 1.174708604812622, "learning_rate": 0.001, "loss": 1.879, "step": 166100 }, { "epoch": 53.71687136393019, "grad_norm": 0.9619840979576111, "learning_rate": 0.001, "loss": 1.8689, "step": 166200 }, { "epoch": 53.7491919844861, "grad_norm": 1.1999341249465942, "learning_rate": 0.001, "loss": 1.8674, "step": 166300 }, { "epoch": 53.78151260504202, "grad_norm": 4.944643974304199, "learning_rate": 0.001, "loss": 1.8806, "step": 166400 }, { "epoch": 53.81383322559793, "grad_norm": 5.407577037811279, "learning_rate": 0.001, "loss": 1.8587, "step": 166500 }, { "epoch": 53.84615384615385, "grad_norm": 1.3429352045059204, "learning_rate": 0.001, "loss": 1.8651, "step": 166600 }, { "epoch": 53.87847446670976, "grad_norm": 1.863881230354309, "learning_rate": 0.001, "loss": 1.8794, "step": 166700 }, { "epoch": 53.910795087265676, "grad_norm": 1.5963608026504517, "learning_rate": 0.001, "loss": 1.8852, "step": 166800 }, { "epoch": 53.94311570782159, "grad_norm": 1.5546278953552246, "learning_rate": 0.001, "loss": 1.8869, "step": 166900 }, { "epoch": 53.975436328377505, "grad_norm": 1.6437301635742188, "learning_rate": 0.001, "loss": 1.897, "step": 167000 }, { "epoch": 54.00775694893342, "grad_norm": 2.449169874191284, "learning_rate": 0.001, "loss": 1.8858, "step": 167100 }, { "epoch": 54.040077569489334, "grad_norm": 1.021952509880066, "learning_rate": 0.001, "loss": 1.7814, "step": 167200 }, { "epoch": 54.07239819004525, "grad_norm": 1.1851582527160645, "learning_rate": 0.001, "loss": 1.7705, "step": 167300 }, { "epoch": 54.10471881060116, "grad_norm": 1.1264711618423462, "learning_rate": 0.001, "loss": 1.7813, "step": 167400 }, { "epoch": 54.13703943115708, "grad_norm": 1.2102171182632446, "learning_rate": 0.001, "loss": 1.7861, "step": 167500 }, { "epoch": 54.16936005171299, "grad_norm": 2.4573986530303955, "learning_rate": 0.001, "loss": 1.791, "step": 167600 }, { "epoch": 54.20168067226891, "grad_norm": 1.2831758260726929, "learning_rate": 0.001, "loss": 1.7889, "step": 167700 }, { "epoch": 54.23400129282482, "grad_norm": 1.3793467283248901, "learning_rate": 0.001, "loss": 1.8096, "step": 167800 }, { "epoch": 54.266321913380736, "grad_norm": 1.01155424118042, "learning_rate": 0.001, "loss": 1.8152, "step": 167900 }, { "epoch": 54.29864253393665, "grad_norm": 1.323516845703125, "learning_rate": 0.001, "loss": 1.8119, "step": 168000 }, { "epoch": 54.330963154492565, "grad_norm": 1.1691120862960815, "learning_rate": 0.001, "loss": 1.8188, "step": 168100 }, { "epoch": 54.36328377504848, "grad_norm": 2.8291749954223633, "learning_rate": 0.001, "loss": 1.8186, "step": 168200 }, { "epoch": 54.395604395604394, "grad_norm": 1.0759118795394897, "learning_rate": 0.001, "loss": 1.8217, "step": 168300 }, { "epoch": 54.42792501616031, "grad_norm": 1.238155484199524, "learning_rate": 0.001, "loss": 1.8315, "step": 168400 }, { "epoch": 54.46024563671622, "grad_norm": 5.923653602600098, "learning_rate": 0.001, "loss": 1.837, "step": 168500 }, { "epoch": 54.49256625727214, "grad_norm": 1.209948182106018, "learning_rate": 0.001, "loss": 1.829, "step": 168600 }, { "epoch": 54.52488687782805, "grad_norm": 1.2545963525772095, "learning_rate": 0.001, "loss": 1.8352, "step": 168700 }, { "epoch": 54.55720749838397, "grad_norm": 1.128759741783142, "learning_rate": 0.001, "loss": 1.8487, "step": 168800 }, { "epoch": 54.58952811893988, "grad_norm": 1.2234666347503662, "learning_rate": 0.001, "loss": 1.8589, "step": 168900 }, { "epoch": 54.621848739495796, "grad_norm": 1.0800775289535522, "learning_rate": 0.001, "loss": 1.8435, "step": 169000 }, { "epoch": 54.65416936005171, "grad_norm": 1.1415737867355347, "learning_rate": 0.001, "loss": 1.8586, "step": 169100 }, { "epoch": 54.686489980607625, "grad_norm": 1.398008942604065, "learning_rate": 0.001, "loss": 1.8661, "step": 169200 }, { "epoch": 54.71881060116354, "grad_norm": 1.3142955303192139, "learning_rate": 0.001, "loss": 1.835, "step": 169300 }, { "epoch": 54.751131221719454, "grad_norm": 1.0918524265289307, "learning_rate": 0.001, "loss": 1.8604, "step": 169400 }, { "epoch": 54.78345184227537, "grad_norm": 1.327147364616394, "learning_rate": 0.001, "loss": 1.864, "step": 169500 }, { "epoch": 54.81577246283128, "grad_norm": 1.2127423286437988, "learning_rate": 0.001, "loss": 1.8691, "step": 169600 }, { "epoch": 54.8480930833872, "grad_norm": 1.2684402465820312, "learning_rate": 0.001, "loss": 1.8593, "step": 169700 }, { "epoch": 54.88041370394311, "grad_norm": 1.2885279655456543, "learning_rate": 0.001, "loss": 1.8508, "step": 169800 }, { "epoch": 54.91273432449903, "grad_norm": 1.5327402353286743, "learning_rate": 0.001, "loss": 1.8744, "step": 169900 }, { "epoch": 54.94505494505494, "grad_norm": 3.417776346206665, "learning_rate": 0.001, "loss": 1.8923, "step": 170000 }, { "epoch": 54.977375565610856, "grad_norm": 0.8247045874595642, "learning_rate": 0.001, "loss": 1.8816, "step": 170100 }, { "epoch": 55.00969618616678, "grad_norm": 1.1719846725463867, "learning_rate": 0.001, "loss": 1.8541, "step": 170200 }, { "epoch": 55.04201680672269, "grad_norm": 0.9927612543106079, "learning_rate": 0.001, "loss": 1.7589, "step": 170300 }, { "epoch": 55.07433742727861, "grad_norm": 1.712304711341858, "learning_rate": 0.001, "loss": 1.7631, "step": 170400 }, { "epoch": 55.10665804783452, "grad_norm": 1.8125869035720825, "learning_rate": 0.001, "loss": 1.7651, "step": 170500 }, { "epoch": 55.138978668390436, "grad_norm": 1.0392944812774658, "learning_rate": 0.001, "loss": 1.7756, "step": 170600 }, { "epoch": 55.17129928894635, "grad_norm": 1.3846590518951416, "learning_rate": 0.001, "loss": 1.7587, "step": 170700 }, { "epoch": 55.203619909502265, "grad_norm": 2.3984463214874268, "learning_rate": 0.001, "loss": 1.7921, "step": 170800 }, { "epoch": 55.23594053005818, "grad_norm": 1.1340079307556152, "learning_rate": 0.001, "loss": 1.8209, "step": 170900 }, { "epoch": 55.268261150614094, "grad_norm": 1.5424491167068481, "learning_rate": 0.001, "loss": 1.7904, "step": 171000 }, { "epoch": 55.30058177117001, "grad_norm": 1.2161543369293213, "learning_rate": 0.001, "loss": 1.8053, "step": 171100 }, { "epoch": 55.33290239172592, "grad_norm": 1.1331323385238647, "learning_rate": 0.001, "loss": 1.8041, "step": 171200 }, { "epoch": 55.36522301228184, "grad_norm": 1.0388082265853882, "learning_rate": 0.001, "loss": 1.8021, "step": 171300 }, { "epoch": 55.39754363283775, "grad_norm": 1.200986623764038, "learning_rate": 0.001, "loss": 1.8285, "step": 171400 }, { "epoch": 55.42986425339367, "grad_norm": 1.1148531436920166, "learning_rate": 0.001, "loss": 1.8165, "step": 171500 }, { "epoch": 55.46218487394958, "grad_norm": 1.4607855081558228, "learning_rate": 0.001, "loss": 1.8177, "step": 171600 }, { "epoch": 55.494505494505496, "grad_norm": 1.7070599794387817, "learning_rate": 0.001, "loss": 1.8258, "step": 171700 }, { "epoch": 55.52682611506141, "grad_norm": 1.7141294479370117, "learning_rate": 0.001, "loss": 1.8181, "step": 171800 }, { "epoch": 55.559146735617325, "grad_norm": 1.0942274332046509, "learning_rate": 0.001, "loss": 1.8336, "step": 171900 }, { "epoch": 55.59146735617324, "grad_norm": 1.213194489479065, "learning_rate": 0.001, "loss": 1.8404, "step": 172000 }, { "epoch": 55.623787976729155, "grad_norm": 1.029559850692749, "learning_rate": 0.001, "loss": 1.8429, "step": 172100 }, { "epoch": 55.65610859728507, "grad_norm": 1.2740815877914429, "learning_rate": 0.001, "loss": 1.8354, "step": 172200 }, { "epoch": 55.688429217840984, "grad_norm": 1.1903679370880127, "learning_rate": 0.001, "loss": 1.8372, "step": 172300 }, { "epoch": 55.7207498383969, "grad_norm": 1.3859694004058838, "learning_rate": 0.001, "loss": 1.8551, "step": 172400 }, { "epoch": 55.75307045895281, "grad_norm": 2.3117423057556152, "learning_rate": 0.001, "loss": 1.8386, "step": 172500 }, { "epoch": 55.78539107950873, "grad_norm": 1.45729660987854, "learning_rate": 0.001, "loss": 1.8501, "step": 172600 }, { "epoch": 55.81771170006464, "grad_norm": 1.6276901960372925, "learning_rate": 0.001, "loss": 1.8479, "step": 172700 }, { "epoch": 55.85003232062056, "grad_norm": 1.1915055513381958, "learning_rate": 0.001, "loss": 1.8548, "step": 172800 }, { "epoch": 55.88235294117647, "grad_norm": 7.421400547027588, "learning_rate": 0.001, "loss": 1.8625, "step": 172900 }, { "epoch": 55.914673561732386, "grad_norm": 1.0755761861801147, "learning_rate": 0.001, "loss": 1.846, "step": 173000 }, { "epoch": 55.9469941822883, "grad_norm": 1.2608082294464111, "learning_rate": 0.001, "loss": 1.8679, "step": 173100 }, { "epoch": 55.979314802844215, "grad_norm": 1.0362014770507812, "learning_rate": 0.001, "loss": 1.8693, "step": 173200 }, { "epoch": 56.01163542340013, "grad_norm": 1.456121563911438, "learning_rate": 0.001, "loss": 1.8156, "step": 173300 }, { "epoch": 56.043956043956044, "grad_norm": 1.8202630281448364, "learning_rate": 0.001, "loss": 1.7388, "step": 173400 }, { "epoch": 56.07627666451196, "grad_norm": 4.554584503173828, "learning_rate": 0.001, "loss": 1.7499, "step": 173500 }, { "epoch": 56.10859728506787, "grad_norm": 1.4848321676254272, "learning_rate": 0.001, "loss": 1.7663, "step": 173600 }, { "epoch": 56.14091790562379, "grad_norm": 1.2876676321029663, "learning_rate": 0.001, "loss": 1.7633, "step": 173700 }, { "epoch": 56.1732385261797, "grad_norm": 2.1925060749053955, "learning_rate": 0.001, "loss": 1.7656, "step": 173800 }, { "epoch": 56.20555914673562, "grad_norm": 1.4661798477172852, "learning_rate": 0.001, "loss": 1.7576, "step": 173900 }, { "epoch": 56.23787976729153, "grad_norm": 1.3052462339401245, "learning_rate": 0.001, "loss": 1.7601, "step": 174000 }, { "epoch": 56.270200387847446, "grad_norm": 1.2052072286605835, "learning_rate": 0.001, "loss": 1.7724, "step": 174100 }, { "epoch": 56.30252100840336, "grad_norm": 1.3916724920272827, "learning_rate": 0.001, "loss": 1.7902, "step": 174200 }, { "epoch": 56.334841628959275, "grad_norm": 1.4068294763565063, "learning_rate": 0.001, "loss": 1.7863, "step": 174300 }, { "epoch": 56.36716224951519, "grad_norm": 1.3708422183990479, "learning_rate": 0.001, "loss": 1.7953, "step": 174400 }, { "epoch": 56.399482870071104, "grad_norm": 1.4875657558441162, "learning_rate": 0.001, "loss": 1.7897, "step": 174500 }, { "epoch": 56.43180349062702, "grad_norm": 1.345341682434082, "learning_rate": 0.001, "loss": 1.807, "step": 174600 }, { "epoch": 56.46412411118293, "grad_norm": 1.2544856071472168, "learning_rate": 0.001, "loss": 1.7944, "step": 174700 }, { "epoch": 56.49644473173885, "grad_norm": 1.37582528591156, "learning_rate": 0.001, "loss": 1.7955, "step": 174800 }, { "epoch": 56.52876535229476, "grad_norm": 1.2095996141433716, "learning_rate": 0.001, "loss": 1.8047, "step": 174900 }, { "epoch": 56.56108597285068, "grad_norm": 1.4993833303451538, "learning_rate": 0.001, "loss": 1.8166, "step": 175000 }, { "epoch": 56.59340659340659, "grad_norm": 1.1631335020065308, "learning_rate": 0.001, "loss": 1.8212, "step": 175100 }, { "epoch": 56.625727213962506, "grad_norm": 1.4197052717208862, "learning_rate": 0.001, "loss": 1.814, "step": 175200 }, { "epoch": 56.65804783451842, "grad_norm": 1.5908585786819458, "learning_rate": 0.001, "loss": 1.8327, "step": 175300 }, { "epoch": 56.690368455074335, "grad_norm": 1.5469480752944946, "learning_rate": 0.001, "loss": 1.815, "step": 175400 }, { "epoch": 56.72268907563025, "grad_norm": 1.5471348762512207, "learning_rate": 0.001, "loss": 1.8336, "step": 175500 }, { "epoch": 56.755009696186164, "grad_norm": 4.1824116706848145, "learning_rate": 0.001, "loss": 1.8291, "step": 175600 }, { "epoch": 56.78733031674208, "grad_norm": 1.0846354961395264, "learning_rate": 0.001, "loss": 1.8338, "step": 175700 }, { "epoch": 56.81965093729799, "grad_norm": 1.5942189693450928, "learning_rate": 0.001, "loss": 1.845, "step": 175800 }, { "epoch": 56.85197155785391, "grad_norm": 2.5594518184661865, "learning_rate": 0.001, "loss": 1.8493, "step": 175900 }, { "epoch": 56.88429217840982, "grad_norm": 1.9357625246047974, "learning_rate": 0.001, "loss": 1.8487, "step": 176000 }, { "epoch": 56.91661279896574, "grad_norm": 1.2673166990280151, "learning_rate": 0.001, "loss": 1.8471, "step": 176100 }, { "epoch": 56.94893341952165, "grad_norm": 1.433740258216858, "learning_rate": 0.001, "loss": 1.8468, "step": 176200 }, { "epoch": 56.981254040077566, "grad_norm": 1.2447668313980103, "learning_rate": 0.001, "loss": 1.847, "step": 176300 }, { "epoch": 57.01357466063349, "grad_norm": 1.3022136688232422, "learning_rate": 0.001, "loss": 1.8042, "step": 176400 }, { "epoch": 57.0458952811894, "grad_norm": 1.4763578176498413, "learning_rate": 0.001, "loss": 1.7297, "step": 176500 }, { "epoch": 57.07821590174532, "grad_norm": 1.57735013961792, "learning_rate": 0.001, "loss": 1.7354, "step": 176600 }, { "epoch": 57.11053652230123, "grad_norm": 1.3946782350540161, "learning_rate": 0.001, "loss": 1.7448, "step": 176700 }, { "epoch": 57.142857142857146, "grad_norm": 1.1271867752075195, "learning_rate": 0.001, "loss": 1.7499, "step": 176800 }, { "epoch": 57.17517776341306, "grad_norm": 1.4515053033828735, "learning_rate": 0.001, "loss": 1.7541, "step": 176900 }, { "epoch": 57.207498383968975, "grad_norm": 1.6748539209365845, "learning_rate": 0.001, "loss": 1.7559, "step": 177000 }, { "epoch": 57.23981900452489, "grad_norm": 1.371485710144043, "learning_rate": 0.001, "loss": 1.7561, "step": 177100 }, { "epoch": 57.272139625080804, "grad_norm": 1.3071129322052002, "learning_rate": 0.001, "loss": 1.7657, "step": 177200 }, { "epoch": 57.30446024563672, "grad_norm": 2.022899866104126, "learning_rate": 0.001, "loss": 1.7638, "step": 177300 }, { "epoch": 57.33678086619263, "grad_norm": 1.3041677474975586, "learning_rate": 0.001, "loss": 1.7856, "step": 177400 }, { "epoch": 57.36910148674855, "grad_norm": 1.8861032724380493, "learning_rate": 0.001, "loss": 1.779, "step": 177500 }, { "epoch": 57.40142210730446, "grad_norm": 1.3934675455093384, "learning_rate": 0.001, "loss": 1.7941, "step": 177600 }, { "epoch": 57.43374272786038, "grad_norm": 1.5645533800125122, "learning_rate": 0.001, "loss": 1.7995, "step": 177700 }, { "epoch": 57.46606334841629, "grad_norm": 1.3055294752120972, "learning_rate": 0.001, "loss": 1.7918, "step": 177800 }, { "epoch": 57.498383968972206, "grad_norm": 2.079103708267212, "learning_rate": 0.001, "loss": 1.8103, "step": 177900 }, { "epoch": 57.53070458952812, "grad_norm": 1.6711114645004272, "learning_rate": 0.001, "loss": 1.8095, "step": 178000 }, { "epoch": 57.563025210084035, "grad_norm": 1.6664762496948242, "learning_rate": 0.001, "loss": 1.8064, "step": 178100 }, { "epoch": 57.59534583063995, "grad_norm": 1.3973405361175537, "learning_rate": 0.001, "loss": 1.8028, "step": 178200 }, { "epoch": 57.627666451195864, "grad_norm": 1.1273155212402344, "learning_rate": 0.001, "loss": 1.8131, "step": 178300 }, { "epoch": 57.65998707175178, "grad_norm": 1.128306269645691, "learning_rate": 0.001, "loss": 1.8001, "step": 178400 }, { "epoch": 57.69230769230769, "grad_norm": 1.4434973001480103, "learning_rate": 0.001, "loss": 1.8239, "step": 178500 }, { "epoch": 57.72462831286361, "grad_norm": 1.5313880443572998, "learning_rate": 0.001, "loss": 1.8099, "step": 178600 }, { "epoch": 57.75694893341952, "grad_norm": 1.4012387990951538, "learning_rate": 0.001, "loss": 1.8257, "step": 178700 }, { "epoch": 57.78926955397544, "grad_norm": 2.8405165672302246, "learning_rate": 0.001, "loss": 1.8173, "step": 178800 }, { "epoch": 57.82159017453135, "grad_norm": 1.5098899602890015, "learning_rate": 0.001, "loss": 1.8224, "step": 178900 }, { "epoch": 57.853910795087266, "grad_norm": 1.6570192575454712, "learning_rate": 0.001, "loss": 1.8218, "step": 179000 }, { "epoch": 57.88623141564318, "grad_norm": 1.3915196657180786, "learning_rate": 0.001, "loss": 1.8305, "step": 179100 }, { "epoch": 57.918552036199095, "grad_norm": 1.8245536088943481, "learning_rate": 0.001, "loss": 1.8447, "step": 179200 }, { "epoch": 57.95087265675501, "grad_norm": 1.42441987991333, "learning_rate": 0.001, "loss": 1.8484, "step": 179300 }, { "epoch": 57.983193277310924, "grad_norm": 1.5760563611984253, "learning_rate": 0.001, "loss": 1.8518, "step": 179400 }, { "epoch": 58.01551389786684, "grad_norm": 1.5278595685958862, "learning_rate": 0.001, "loss": 1.7717, "step": 179500 }, { "epoch": 58.04783451842275, "grad_norm": 2.3923416137695312, "learning_rate": 0.001, "loss": 1.7298, "step": 179600 }, { "epoch": 58.08015513897867, "grad_norm": 1.443052887916565, "learning_rate": 0.001, "loss": 1.7341, "step": 179700 }, { "epoch": 58.11247575953458, "grad_norm": 1.6837795972824097, "learning_rate": 0.001, "loss": 1.7192, "step": 179800 }, { "epoch": 58.1447963800905, "grad_norm": 1.6423945426940918, "learning_rate": 0.001, "loss": 1.7403, "step": 179900 }, { "epoch": 58.17711700064641, "grad_norm": 1.5692648887634277, "learning_rate": 0.001, "loss": 1.7246, "step": 180000 }, { "epoch": 58.209437621202326, "grad_norm": 1.4588842391967773, "learning_rate": 0.001, "loss": 1.7483, "step": 180100 }, { "epoch": 58.24175824175824, "grad_norm": 2.4966108798980713, "learning_rate": 0.001, "loss": 1.768, "step": 180200 }, { "epoch": 58.274078862314155, "grad_norm": 1.6691197156906128, "learning_rate": 0.001, "loss": 1.7535, "step": 180300 }, { "epoch": 58.30639948287007, "grad_norm": 2.6301307678222656, "learning_rate": 0.001, "loss": 1.7597, "step": 180400 }, { "epoch": 58.338720103425985, "grad_norm": 1.7786924839019775, "learning_rate": 0.001, "loss": 1.7792, "step": 180500 }, { "epoch": 58.3710407239819, "grad_norm": 14.561443328857422, "learning_rate": 0.001, "loss": 1.7767, "step": 180600 }, { "epoch": 58.403361344537814, "grad_norm": 1.5682655572891235, "learning_rate": 0.001, "loss": 1.7665, "step": 180700 }, { "epoch": 58.43568196509373, "grad_norm": 5.713895320892334, "learning_rate": 0.001, "loss": 1.7821, "step": 180800 }, { "epoch": 58.46800258564964, "grad_norm": 1.7265334129333496, "learning_rate": 0.001, "loss": 1.7777, "step": 180900 }, { "epoch": 58.50032320620556, "grad_norm": 1.5130953788757324, "learning_rate": 0.001, "loss": 1.7839, "step": 181000 }, { "epoch": 58.53264382676147, "grad_norm": 1.8918101787567139, "learning_rate": 0.001, "loss": 1.7788, "step": 181100 }, { "epoch": 58.56496444731739, "grad_norm": 1.6453572511672974, "learning_rate": 0.001, "loss": 1.7822, "step": 181200 }, { "epoch": 58.5972850678733, "grad_norm": 1.5005327463150024, "learning_rate": 0.001, "loss": 1.8015, "step": 181300 }, { "epoch": 58.629605688429216, "grad_norm": 1.741249442100525, "learning_rate": 0.001, "loss": 1.7938, "step": 181400 }, { "epoch": 58.66192630898513, "grad_norm": 1.5377933979034424, "learning_rate": 0.001, "loss": 1.7953, "step": 181500 }, { "epoch": 58.694246929541045, "grad_norm": 1.4067682027816772, "learning_rate": 0.001, "loss": 1.792, "step": 181600 }, { "epoch": 58.72656755009696, "grad_norm": 1.7603799104690552, "learning_rate": 0.001, "loss": 1.8119, "step": 181700 }, { "epoch": 58.758888170652874, "grad_norm": 2.9131076335906982, "learning_rate": 0.001, "loss": 1.8057, "step": 181800 }, { "epoch": 58.79120879120879, "grad_norm": 1.556459903717041, "learning_rate": 0.001, "loss": 1.8421, "step": 181900 }, { "epoch": 58.8235294117647, "grad_norm": 1.7747646570205688, "learning_rate": 0.001, "loss": 1.8109, "step": 182000 }, { "epoch": 58.85585003232062, "grad_norm": 1.9286748170852661, "learning_rate": 0.001, "loss": 1.8007, "step": 182100 }, { "epoch": 58.88817065287653, "grad_norm": 1.5658574104309082, "learning_rate": 0.001, "loss": 1.824, "step": 182200 }, { "epoch": 58.92049127343245, "grad_norm": 1.6224015951156616, "learning_rate": 0.001, "loss": 1.8312, "step": 182300 }, { "epoch": 58.95281189398836, "grad_norm": 1.3887017965316772, "learning_rate": 0.001, "loss": 1.8313, "step": 182400 }, { "epoch": 58.985132514544276, "grad_norm": 1.6505757570266724, "learning_rate": 0.001, "loss": 1.8261, "step": 182500 }, { "epoch": 59.0174531351002, "grad_norm": 1.6991173028945923, "learning_rate": 0.001, "loss": 1.7642, "step": 182600 }, { "epoch": 59.04977375565611, "grad_norm": 1.7371593713760376, "learning_rate": 0.001, "loss": 1.7085, "step": 182700 }, { "epoch": 59.08209437621203, "grad_norm": 1.42960524559021, "learning_rate": 0.001, "loss": 1.705, "step": 182800 }, { "epoch": 59.11441499676794, "grad_norm": 1.349769949913025, "learning_rate": 0.001, "loss": 1.7173, "step": 182900 }, { "epoch": 59.146735617323856, "grad_norm": 1.3355274200439453, "learning_rate": 0.001, "loss": 1.7014, "step": 183000 }, { "epoch": 59.17905623787977, "grad_norm": 1.3785794973373413, "learning_rate": 0.001, "loss": 1.7205, "step": 183100 }, { "epoch": 59.211376858435685, "grad_norm": 1.4771546125411987, "learning_rate": 0.001, "loss": 1.7562, "step": 183200 }, { "epoch": 59.2436974789916, "grad_norm": 1.449419379234314, "learning_rate": 0.001, "loss": 1.7454, "step": 183300 }, { "epoch": 59.276018099547514, "grad_norm": 1.9918302297592163, "learning_rate": 0.001, "loss": 1.7493, "step": 183400 }, { "epoch": 59.30833872010343, "grad_norm": 1.6224300861358643, "learning_rate": 0.001, "loss": 1.755, "step": 183500 }, { "epoch": 59.34065934065934, "grad_norm": 1.3978979587554932, "learning_rate": 0.001, "loss": 1.7724, "step": 183600 }, { "epoch": 59.37297996121526, "grad_norm": 1.4457424879074097, "learning_rate": 0.001, "loss": 1.7638, "step": 183700 }, { "epoch": 59.40530058177117, "grad_norm": 1.188538670539856, "learning_rate": 0.001, "loss": 1.7664, "step": 183800 }, { "epoch": 59.43762120232709, "grad_norm": 1.8420406579971313, "learning_rate": 0.001, "loss": 1.7664, "step": 183900 }, { "epoch": 59.469941822883, "grad_norm": 1.5948349237442017, "learning_rate": 0.001, "loss": 1.7698, "step": 184000 }, { "epoch": 59.502262443438916, "grad_norm": 1.6295017004013062, "learning_rate": 0.001, "loss": 1.7722, "step": 184100 }, { "epoch": 59.53458306399483, "grad_norm": 1.6350919008255005, "learning_rate": 0.001, "loss": 1.7672, "step": 184200 }, { "epoch": 59.566903684550745, "grad_norm": 1.6131404638290405, "learning_rate": 0.001, "loss": 1.7784, "step": 184300 }, { "epoch": 59.59922430510666, "grad_norm": 5.170358180999756, "learning_rate": 0.001, "loss": 1.7805, "step": 184400 }, { "epoch": 59.631544925662574, "grad_norm": 1.682341456413269, "learning_rate": 0.001, "loss": 1.7693, "step": 184500 }, { "epoch": 59.66386554621849, "grad_norm": 4.203854560852051, "learning_rate": 0.001, "loss": 1.795, "step": 184600 }, { "epoch": 59.6961861667744, "grad_norm": 1.5529115200042725, "learning_rate": 0.001, "loss": 1.7868, "step": 184700 }, { "epoch": 59.72850678733032, "grad_norm": 1.5324428081512451, "learning_rate": 0.001, "loss": 1.794, "step": 184800 }, { "epoch": 59.76082740788623, "grad_norm": 1.6343352794647217, "learning_rate": 0.001, "loss": 1.8002, "step": 184900 }, { "epoch": 59.79314802844215, "grad_norm": 8.351738929748535, "learning_rate": 0.001, "loss": 1.8128, "step": 185000 }, { "epoch": 59.82546864899806, "grad_norm": 1.1468507051467896, "learning_rate": 0.001, "loss": 1.8232, "step": 185100 }, { "epoch": 59.857789269553976, "grad_norm": 1.2308132648468018, "learning_rate": 0.001, "loss": 1.806, "step": 185200 }, { "epoch": 59.89010989010989, "grad_norm": 1.6725716590881348, "learning_rate": 0.001, "loss": 1.8052, "step": 185300 }, { "epoch": 59.922430510665805, "grad_norm": 1.4296369552612305, "learning_rate": 0.001, "loss": 1.8058, "step": 185400 }, { "epoch": 59.95475113122172, "grad_norm": 1.2924681901931763, "learning_rate": 0.001, "loss": 1.812, "step": 185500 }, { "epoch": 59.987071751777634, "grad_norm": 1.4495798349380493, "learning_rate": 0.001, "loss": 1.8153, "step": 185600 }, { "epoch": 60.01939237233355, "grad_norm": 1.6949528455734253, "learning_rate": 0.001, "loss": 1.767, "step": 185700 }, { "epoch": 60.05171299288946, "grad_norm": 1.090348482131958, "learning_rate": 0.001, "loss": 1.7089, "step": 185800 }, { "epoch": 60.08403361344538, "grad_norm": 1.578244924545288, "learning_rate": 0.001, "loss": 1.6952, "step": 185900 }, { "epoch": 60.11635423400129, "grad_norm": 3.5454509258270264, "learning_rate": 0.001, "loss": 1.7214, "step": 186000 }, { "epoch": 60.14867485455721, "grad_norm": 1.6060007810592651, "learning_rate": 0.001, "loss": 1.717, "step": 186100 }, { "epoch": 60.18099547511312, "grad_norm": 1.4425925016403198, "learning_rate": 0.001, "loss": 1.7367, "step": 186200 }, { "epoch": 60.213316095669036, "grad_norm": 1.2362843751907349, "learning_rate": 0.001, "loss": 1.731, "step": 186300 }, { "epoch": 60.24563671622495, "grad_norm": 0.8657944202423096, "learning_rate": 0.001, "loss": 1.7191, "step": 186400 }, { "epoch": 60.277957336780865, "grad_norm": 1.0598361492156982, "learning_rate": 0.001, "loss": 1.7452, "step": 186500 }, { "epoch": 60.31027795733678, "grad_norm": 1.301330327987671, "learning_rate": 0.001, "loss": 1.731, "step": 186600 }, { "epoch": 60.342598577892694, "grad_norm": 1.5728166103363037, "learning_rate": 0.001, "loss": 1.7577, "step": 186700 }, { "epoch": 60.37491919844861, "grad_norm": 1.1844983100891113, "learning_rate": 0.001, "loss": 1.7307, "step": 186800 }, { "epoch": 60.40723981900452, "grad_norm": 1.1899166107177734, "learning_rate": 0.001, "loss": 1.7384, "step": 186900 }, { "epoch": 60.43956043956044, "grad_norm": 1.372077465057373, "learning_rate": 0.001, "loss": 1.7427, "step": 187000 }, { "epoch": 60.47188106011635, "grad_norm": 1.09188973903656, "learning_rate": 0.001, "loss": 1.7559, "step": 187100 }, { "epoch": 60.50420168067227, "grad_norm": 3.6396427154541016, "learning_rate": 0.001, "loss": 1.7638, "step": 187200 }, { "epoch": 60.53652230122818, "grad_norm": 1.3009859323501587, "learning_rate": 0.001, "loss": 1.7796, "step": 187300 }, { "epoch": 60.568842921784096, "grad_norm": 1.0982609987258911, "learning_rate": 0.001, "loss": 1.7713, "step": 187400 }, { "epoch": 60.60116354234001, "grad_norm": 1.287492036819458, "learning_rate": 0.001, "loss": 1.7668, "step": 187500 }, { "epoch": 60.633484162895925, "grad_norm": 1.0695163011550903, "learning_rate": 0.001, "loss": 1.763, "step": 187600 }, { "epoch": 60.66580478345184, "grad_norm": 1.432298183441162, "learning_rate": 0.001, "loss": 1.7717, "step": 187700 }, { "epoch": 60.698125404007754, "grad_norm": 1.2547698020935059, "learning_rate": 0.001, "loss": 1.7914, "step": 187800 }, { "epoch": 60.73044602456367, "grad_norm": 1.13438081741333, "learning_rate": 0.001, "loss": 1.7901, "step": 187900 }, { "epoch": 60.762766645119584, "grad_norm": 22.373167037963867, "learning_rate": 0.001, "loss": 1.7984, "step": 188000 }, { "epoch": 60.7950872656755, "grad_norm": 1.6620711088180542, "learning_rate": 0.001, "loss": 1.784, "step": 188100 }, { "epoch": 60.82740788623141, "grad_norm": 1.2023378610610962, "learning_rate": 0.001, "loss": 1.8002, "step": 188200 }, { "epoch": 60.85972850678733, "grad_norm": 1.770161747932434, "learning_rate": 0.001, "loss": 1.7856, "step": 188300 }, { "epoch": 60.89204912734324, "grad_norm": 1.3435028791427612, "learning_rate": 0.001, "loss": 1.8139, "step": 188400 }, { "epoch": 60.924369747899156, "grad_norm": 1.4254133701324463, "learning_rate": 0.001, "loss": 1.806, "step": 188500 }, { "epoch": 60.95669036845507, "grad_norm": 3.941981792449951, "learning_rate": 0.001, "loss": 1.8011, "step": 188600 }, { "epoch": 60.98901098901099, "grad_norm": 1.3515390157699585, "learning_rate": 0.001, "loss": 1.817, "step": 188700 }, { "epoch": 61.02133160956691, "grad_norm": 2.201411008834839, "learning_rate": 0.001, "loss": 1.7459, "step": 188800 }, { "epoch": 61.05365223012282, "grad_norm": 1.5974974632263184, "learning_rate": 0.001, "loss": 1.688, "step": 188900 }, { "epoch": 61.085972850678736, "grad_norm": 1.3335779905319214, "learning_rate": 0.001, "loss": 1.7003, "step": 189000 }, { "epoch": 61.11829347123465, "grad_norm": 1.3323640823364258, "learning_rate": 0.001, "loss": 1.716, "step": 189100 }, { "epoch": 61.150614091790565, "grad_norm": 1.2879664897918701, "learning_rate": 0.001, "loss": 1.718, "step": 189200 }, { "epoch": 61.18293471234648, "grad_norm": 1.0341460704803467, "learning_rate": 0.001, "loss": 1.7252, "step": 189300 }, { "epoch": 61.215255332902395, "grad_norm": 1.1501681804656982, "learning_rate": 0.001, "loss": 1.7391, "step": 189400 }, { "epoch": 61.24757595345831, "grad_norm": 1.5302329063415527, "learning_rate": 0.001, "loss": 1.7116, "step": 189500 }, { "epoch": 61.279896574014224, "grad_norm": 1.3631445169448853, "learning_rate": 0.001, "loss": 1.7205, "step": 189600 }, { "epoch": 61.31221719457014, "grad_norm": 2.737738847732544, "learning_rate": 0.001, "loss": 1.7574, "step": 189700 }, { "epoch": 61.34453781512605, "grad_norm": 1.1347086429595947, "learning_rate": 0.001, "loss": 1.7371, "step": 189800 }, { "epoch": 61.37685843568197, "grad_norm": 1.072685956954956, "learning_rate": 0.001, "loss": 1.739, "step": 189900 }, { "epoch": 61.40917905623788, "grad_norm": 2.100248098373413, "learning_rate": 0.001, "loss": 1.753, "step": 190000 }, { "epoch": 61.441499676793796, "grad_norm": 1.5460516214370728, "learning_rate": 0.001, "loss": 1.7613, "step": 190100 }, { "epoch": 61.47382029734971, "grad_norm": 1.5468897819519043, "learning_rate": 0.001, "loss": 1.7359, "step": 190200 }, { "epoch": 61.506140917905626, "grad_norm": 1.1832600831985474, "learning_rate": 0.001, "loss": 1.7661, "step": 190300 }, { "epoch": 61.53846153846154, "grad_norm": 1.2180004119873047, "learning_rate": 0.001, "loss": 1.7707, "step": 190400 }, { "epoch": 61.570782159017455, "grad_norm": 2.649592638015747, "learning_rate": 0.001, "loss": 1.7654, "step": 190500 }, { "epoch": 61.60310277957337, "grad_norm": 1.7301536798477173, "learning_rate": 0.001, "loss": 1.7575, "step": 190600 }, { "epoch": 61.635423400129284, "grad_norm": 1.189369559288025, "learning_rate": 0.001, "loss": 1.7615, "step": 190700 }, { "epoch": 61.6677440206852, "grad_norm": 1.301946997642517, "learning_rate": 0.001, "loss": 1.7719, "step": 190800 }, { "epoch": 61.70006464124111, "grad_norm": 1.2204264402389526, "learning_rate": 0.001, "loss": 1.7727, "step": 190900 }, { "epoch": 61.73238526179703, "grad_norm": 1.2227561473846436, "learning_rate": 0.001, "loss": 1.7662, "step": 191000 }, { "epoch": 61.76470588235294, "grad_norm": 2.258330821990967, "learning_rate": 0.001, "loss": 1.7914, "step": 191100 }, { "epoch": 61.79702650290886, "grad_norm": 1.1899852752685547, "learning_rate": 0.001, "loss": 1.7809, "step": 191200 }, { "epoch": 61.82934712346477, "grad_norm": 1.3566854000091553, "learning_rate": 0.001, "loss": 1.7988, "step": 191300 }, { "epoch": 61.861667744020686, "grad_norm": 1.8404537439346313, "learning_rate": 0.001, "loss": 1.8008, "step": 191400 }, { "epoch": 61.8939883645766, "grad_norm": 1.3132590055465698, "learning_rate": 0.001, "loss": 1.7878, "step": 191500 }, { "epoch": 61.926308985132515, "grad_norm": 1.1081135272979736, "learning_rate": 0.001, "loss": 1.7847, "step": 191600 }, { "epoch": 61.95862960568843, "grad_norm": 1.1500428915023804, "learning_rate": 0.001, "loss": 1.7956, "step": 191700 }, { "epoch": 61.990950226244344, "grad_norm": 9.663846015930176, "learning_rate": 0.001, "loss": 1.8096, "step": 191800 }, { "epoch": 62.02327084680026, "grad_norm": 1.4117316007614136, "learning_rate": 0.001, "loss": 1.7233, "step": 191900 }, { "epoch": 62.05559146735617, "grad_norm": 1.0347338914871216, "learning_rate": 0.001, "loss": 1.6951, "step": 192000 }, { "epoch": 62.08791208791209, "grad_norm": 1.5335489511489868, "learning_rate": 0.001, "loss": 1.6998, "step": 192100 }, { "epoch": 62.120232708468, "grad_norm": 1.180031418800354, "learning_rate": 0.001, "loss": 1.7056, "step": 192200 }, { "epoch": 62.15255332902392, "grad_norm": 1.3722590208053589, "learning_rate": 0.001, "loss": 1.7066, "step": 192300 }, { "epoch": 62.18487394957983, "grad_norm": 1.347764253616333, "learning_rate": 0.001, "loss": 1.7185, "step": 192400 }, { "epoch": 62.217194570135746, "grad_norm": 1.3484255075454712, "learning_rate": 0.001, "loss": 1.7125, "step": 192500 }, { "epoch": 62.24951519069166, "grad_norm": 21.83272361755371, "learning_rate": 0.001, "loss": 1.7107, "step": 192600 }, { "epoch": 62.281835811247575, "grad_norm": 0.9706798195838928, "learning_rate": 0.001, "loss": 1.7204, "step": 192700 }, { "epoch": 62.31415643180349, "grad_norm": 9.041400909423828, "learning_rate": 0.001, "loss": 1.7249, "step": 192800 }, { "epoch": 62.346477052359404, "grad_norm": 1.0994246006011963, "learning_rate": 0.001, "loss": 1.7254, "step": 192900 }, { "epoch": 62.37879767291532, "grad_norm": 1.0919663906097412, "learning_rate": 0.001, "loss": 1.7297, "step": 193000 }, { "epoch": 62.41111829347123, "grad_norm": 1.0757349729537964, "learning_rate": 0.001, "loss": 1.7329, "step": 193100 }, { "epoch": 62.44343891402715, "grad_norm": 1.0626800060272217, "learning_rate": 0.001, "loss": 1.7468, "step": 193200 }, { "epoch": 62.47575953458306, "grad_norm": 1.265570878982544, "learning_rate": 0.001, "loss": 1.7524, "step": 193300 }, { "epoch": 62.50808015513898, "grad_norm": 1.6618725061416626, "learning_rate": 0.001, "loss": 1.7334, "step": 193400 }, { "epoch": 62.54040077569489, "grad_norm": 1.353593349456787, "learning_rate": 0.001, "loss": 1.7486, "step": 193500 }, { "epoch": 62.572721396250806, "grad_norm": 1.3140637874603271, "learning_rate": 0.001, "loss": 1.761, "step": 193600 }, { "epoch": 62.60504201680672, "grad_norm": 1.0980924367904663, "learning_rate": 0.001, "loss": 1.7609, "step": 193700 }, { "epoch": 62.637362637362635, "grad_norm": 2.5228123664855957, "learning_rate": 0.001, "loss": 1.7786, "step": 193800 }, { "epoch": 62.66968325791855, "grad_norm": 1.201715350151062, "learning_rate": 0.001, "loss": 1.7754, "step": 193900 }, { "epoch": 62.702003878474464, "grad_norm": 1.1491481065750122, "learning_rate": 0.001, "loss": 1.7676, "step": 194000 }, { "epoch": 62.73432449903038, "grad_norm": 1.0263748168945312, "learning_rate": 0.001, "loss": 1.773, "step": 194100 }, { "epoch": 62.76664511958629, "grad_norm": 1.3433176279067993, "learning_rate": 0.001, "loss": 1.7855, "step": 194200 }, { "epoch": 62.79896574014221, "grad_norm": 1.397169828414917, "learning_rate": 0.001, "loss": 1.774, "step": 194300 }, { "epoch": 62.83128636069812, "grad_norm": 1.1044763326644897, "learning_rate": 0.001, "loss": 1.7795, "step": 194400 }, { "epoch": 62.86360698125404, "grad_norm": 1.9558327198028564, "learning_rate": 0.001, "loss": 1.7842, "step": 194500 }, { "epoch": 62.89592760180995, "grad_norm": 1.171716570854187, "learning_rate": 0.001, "loss": 1.7837, "step": 194600 }, { "epoch": 62.928248222365866, "grad_norm": 1.1479594707489014, "learning_rate": 0.001, "loss": 1.7962, "step": 194700 }, { "epoch": 62.96056884292178, "grad_norm": 1.2580316066741943, "learning_rate": 0.001, "loss": 1.8024, "step": 194800 }, { "epoch": 62.992889463477695, "grad_norm": 1.0664483308792114, "learning_rate": 0.001, "loss": 1.7889, "step": 194900 }, { "epoch": 63.02521008403362, "grad_norm": 1.1523609161376953, "learning_rate": 0.001, "loss": 1.7271, "step": 195000 }, { "epoch": 63.05753070458953, "grad_norm": 1.731747031211853, "learning_rate": 0.001, "loss": 1.6773, "step": 195100 }, { "epoch": 63.089851325145446, "grad_norm": 2.636916399002075, "learning_rate": 0.001, "loss": 1.702, "step": 195200 }, { "epoch": 63.12217194570136, "grad_norm": 1.4978920221328735, "learning_rate": 0.001, "loss": 1.6994, "step": 195300 }, { "epoch": 63.154492566257275, "grad_norm": 1.2944411039352417, "learning_rate": 0.001, "loss": 1.6994, "step": 195400 }, { "epoch": 63.18681318681319, "grad_norm": 6.423586845397949, "learning_rate": 0.001, "loss": 1.705, "step": 195500 }, { "epoch": 63.219133807369104, "grad_norm": 1.2924528121948242, "learning_rate": 0.001, "loss": 1.701, "step": 195600 }, { "epoch": 63.25145442792502, "grad_norm": 1.1046240329742432, "learning_rate": 0.001, "loss": 1.7143, "step": 195700 }, { "epoch": 63.28377504848093, "grad_norm": 1.193282127380371, "learning_rate": 0.001, "loss": 1.7148, "step": 195800 }, { "epoch": 63.31609566903685, "grad_norm": 0.9726769328117371, "learning_rate": 0.001, "loss": 1.7266, "step": 195900 }, { "epoch": 63.34841628959276, "grad_norm": 1.2916725873947144, "learning_rate": 0.001, "loss": 1.7209, "step": 196000 }, { "epoch": 63.38073691014868, "grad_norm": 1.3928982019424438, "learning_rate": 0.001, "loss": 1.7162, "step": 196100 }, { "epoch": 63.41305753070459, "grad_norm": 1.1976090669631958, "learning_rate": 0.001, "loss": 1.7276, "step": 196200 }, { "epoch": 63.445378151260506, "grad_norm": 1.715441107749939, "learning_rate": 0.001, "loss": 1.7324, "step": 196300 }, { "epoch": 63.47769877181642, "grad_norm": 1.1274409294128418, "learning_rate": 0.001, "loss": 1.7269, "step": 196400 }, { "epoch": 63.510019392372335, "grad_norm": 3.670372724533081, "learning_rate": 0.001, "loss": 1.7294, "step": 196500 }, { "epoch": 63.54234001292825, "grad_norm": 1.4605987071990967, "learning_rate": 0.001, "loss": 1.744, "step": 196600 }, { "epoch": 63.574660633484164, "grad_norm": 1.5037424564361572, "learning_rate": 0.001, "loss": 1.7402, "step": 196700 }, { "epoch": 63.60698125404008, "grad_norm": 1.2264634370803833, "learning_rate": 0.001, "loss": 1.7614, "step": 196800 }, { "epoch": 63.63930187459599, "grad_norm": 1.2938991785049438, "learning_rate": 0.001, "loss": 1.7814, "step": 196900 }, { "epoch": 63.67162249515191, "grad_norm": 4.204982280731201, "learning_rate": 0.001, "loss": 1.7664, "step": 197000 }, { "epoch": 63.70394311570782, "grad_norm": 1.2210767269134521, "learning_rate": 0.001, "loss": 1.7517, "step": 197100 }, { "epoch": 63.73626373626374, "grad_norm": 1.3933453559875488, "learning_rate": 0.001, "loss": 1.752, "step": 197200 }, { "epoch": 63.76858435681965, "grad_norm": 0.9905542135238647, "learning_rate": 0.001, "loss": 1.7775, "step": 197300 }, { "epoch": 63.800904977375566, "grad_norm": 1.0325452089309692, "learning_rate": 0.001, "loss": 1.7706, "step": 197400 }, { "epoch": 63.83322559793148, "grad_norm": 4.25999641418457, "learning_rate": 0.001, "loss": 1.7677, "step": 197500 }, { "epoch": 63.865546218487395, "grad_norm": 2.1255362033843994, "learning_rate": 0.001, "loss": 1.7809, "step": 197600 }, { "epoch": 63.89786683904331, "grad_norm": 1.2918190956115723, "learning_rate": 0.001, "loss": 1.7723, "step": 197700 }, { "epoch": 63.930187459599225, "grad_norm": 1.279056429862976, "learning_rate": 0.001, "loss": 1.7791, "step": 197800 }, { "epoch": 63.96250808015514, "grad_norm": 3.6256096363067627, "learning_rate": 0.001, "loss": 1.7983, "step": 197900 }, { "epoch": 63.994828700711054, "grad_norm": 1.3605314493179321, "learning_rate": 0.001, "loss": 1.7851, "step": 198000 }, { "epoch": 64.02714932126698, "grad_norm": 1.2788010835647583, "learning_rate": 0.001, "loss": 1.711, "step": 198100 }, { "epoch": 64.05946994182288, "grad_norm": 1.2851450443267822, "learning_rate": 0.001, "loss": 1.6751, "step": 198200 }, { "epoch": 64.0917905623788, "grad_norm": 1.3857053518295288, "learning_rate": 0.001, "loss": 1.6829, "step": 198300 }, { "epoch": 64.12411118293471, "grad_norm": 2.301591634750366, "learning_rate": 0.001, "loss": 1.6724, "step": 198400 }, { "epoch": 64.15643180349063, "grad_norm": 1.2448070049285889, "learning_rate": 0.001, "loss": 1.6894, "step": 198500 }, { "epoch": 64.18875242404654, "grad_norm": 1.327683448791504, "learning_rate": 0.001, "loss": 1.6949, "step": 198600 }, { "epoch": 64.22107304460246, "grad_norm": 1.6507714986801147, "learning_rate": 0.001, "loss": 1.7014, "step": 198700 }, { "epoch": 64.25339366515837, "grad_norm": 1.6871603727340698, "learning_rate": 0.001, "loss": 1.6968, "step": 198800 }, { "epoch": 64.28571428571429, "grad_norm": 1.340981364250183, "learning_rate": 0.001, "loss": 1.7249, "step": 198900 }, { "epoch": 64.3180349062702, "grad_norm": 1.4307591915130615, "learning_rate": 0.001, "loss": 1.7192, "step": 199000 }, { "epoch": 64.35035552682612, "grad_norm": 2.4930593967437744, "learning_rate": 0.001, "loss": 1.7143, "step": 199100 }, { "epoch": 64.38267614738203, "grad_norm": 1.8173329830169678, "learning_rate": 0.001, "loss": 1.708, "step": 199200 }, { "epoch": 64.41499676793795, "grad_norm": 1.0938317775726318, "learning_rate": 0.001, "loss": 1.7143, "step": 199300 }, { "epoch": 64.44731738849386, "grad_norm": 1.2078030109405518, "learning_rate": 0.001, "loss": 1.7306, "step": 199400 }, { "epoch": 64.47963800904978, "grad_norm": 1.5090831518173218, "learning_rate": 0.001, "loss": 1.7263, "step": 199500 }, { "epoch": 64.51195862960569, "grad_norm": 3.431567430496216, "learning_rate": 0.001, "loss": 1.7394, "step": 199600 }, { "epoch": 64.54427925016161, "grad_norm": 1.3453813791275024, "learning_rate": 0.001, "loss": 1.7433, "step": 199700 }, { "epoch": 64.57659987071752, "grad_norm": 1.216852068901062, "learning_rate": 0.001, "loss": 1.7441, "step": 199800 }, { "epoch": 64.60892049127344, "grad_norm": 1.1643050909042358, "learning_rate": 0.001, "loss": 1.7515, "step": 199900 }, { "epoch": 64.64124111182934, "grad_norm": 1.2936183214187622, "learning_rate": 0.001, "loss": 1.7456, "step": 200000 }, { "epoch": 64.67356173238527, "grad_norm": 1.3873388767242432, "learning_rate": 0.001, "loss": 1.7467, "step": 200100 }, { "epoch": 64.70588235294117, "grad_norm": 1.2052769660949707, "learning_rate": 0.001, "loss": 1.7621, "step": 200200 }, { "epoch": 64.7382029734971, "grad_norm": 1.3909542560577393, "learning_rate": 0.001, "loss": 1.7544, "step": 200300 }, { "epoch": 64.770523594053, "grad_norm": 3.4328532218933105, "learning_rate": 0.001, "loss": 1.7416, "step": 200400 }, { "epoch": 64.80284421460892, "grad_norm": 1.0741126537322998, "learning_rate": 0.001, "loss": 1.7834, "step": 200500 }, { "epoch": 64.83516483516483, "grad_norm": 1.397284984588623, "learning_rate": 0.001, "loss": 1.7701, "step": 200600 }, { "epoch": 64.86748545572075, "grad_norm": 1.3038510084152222, "learning_rate": 0.001, "loss": 1.7628, "step": 200700 }, { "epoch": 64.89980607627666, "grad_norm": 2.4395101070404053, "learning_rate": 0.001, "loss": 1.7695, "step": 200800 }, { "epoch": 64.93212669683258, "grad_norm": 1.3935593366622925, "learning_rate": 0.001, "loss": 1.7577, "step": 200900 }, { "epoch": 64.96444731738849, "grad_norm": 1.2486530542373657, "learning_rate": 0.001, "loss": 1.7734, "step": 201000 }, { "epoch": 64.99676793794441, "grad_norm": 1.9926509857177734, "learning_rate": 0.001, "loss": 1.7583, "step": 201100 }, { "epoch": 65.02908855850032, "grad_norm": 1.143700122833252, "learning_rate": 0.001, "loss": 1.6734, "step": 201200 }, { "epoch": 65.06140917905624, "grad_norm": 2.6879491806030273, "learning_rate": 0.001, "loss": 1.6481, "step": 201300 }, { "epoch": 65.09372979961215, "grad_norm": 1.6204843521118164, "learning_rate": 0.001, "loss": 1.6543, "step": 201400 }, { "epoch": 65.12605042016807, "grad_norm": 23.256017684936523, "learning_rate": 0.001, "loss": 1.701, "step": 201500 }, { "epoch": 65.15837104072398, "grad_norm": 1.383384108543396, "learning_rate": 0.001, "loss": 1.7043, "step": 201600 }, { "epoch": 65.1906916612799, "grad_norm": 2.114612579345703, "learning_rate": 0.001, "loss": 1.6865, "step": 201700 }, { "epoch": 65.2230122818358, "grad_norm": 1.6314489841461182, "learning_rate": 0.001, "loss": 1.6887, "step": 201800 }, { "epoch": 65.25533290239173, "grad_norm": 1.5479962825775146, "learning_rate": 0.001, "loss": 1.7054, "step": 201900 }, { "epoch": 65.28765352294764, "grad_norm": 11.797988891601562, "learning_rate": 0.001, "loss": 1.6896, "step": 202000 }, { "epoch": 65.31997414350356, "grad_norm": 1.661034107208252, "learning_rate": 0.001, "loss": 1.7042, "step": 202100 }, { "epoch": 65.35229476405947, "grad_norm": 3.0170397758483887, "learning_rate": 0.001, "loss": 1.7174, "step": 202200 }, { "epoch": 65.38461538461539, "grad_norm": 2.5798306465148926, "learning_rate": 0.001, "loss": 1.7067, "step": 202300 }, { "epoch": 65.4169360051713, "grad_norm": 1.3059483766555786, "learning_rate": 0.001, "loss": 1.6995, "step": 202400 }, { "epoch": 65.44925662572722, "grad_norm": 1.3983116149902344, "learning_rate": 0.001, "loss": 1.7127, "step": 202500 }, { "epoch": 65.48157724628312, "grad_norm": 1.3304990530014038, "learning_rate": 0.001, "loss": 1.7145, "step": 202600 }, { "epoch": 65.51389786683905, "grad_norm": 1.3966095447540283, "learning_rate": 0.001, "loss": 1.7384, "step": 202700 }, { "epoch": 65.54621848739495, "grad_norm": 1.6477863788604736, "learning_rate": 0.001, "loss": 1.7267, "step": 202800 }, { "epoch": 65.57853910795087, "grad_norm": 1.1969075202941895, "learning_rate": 0.001, "loss": 1.7408, "step": 202900 }, { "epoch": 65.61085972850678, "grad_norm": 1.6849408149719238, "learning_rate": 0.001, "loss": 1.7393, "step": 203000 }, { "epoch": 65.6431803490627, "grad_norm": 1.4380606412887573, "learning_rate": 0.001, "loss": 1.7263, "step": 203100 }, { "epoch": 65.67550096961861, "grad_norm": 2.2221877574920654, "learning_rate": 0.001, "loss": 1.7429, "step": 203200 }, { "epoch": 65.70782159017453, "grad_norm": 1.4149386882781982, "learning_rate": 0.001, "loss": 1.7469, "step": 203300 }, { "epoch": 65.74014221073044, "grad_norm": 1.7667959928512573, "learning_rate": 0.001, "loss": 1.7426, "step": 203400 }, { "epoch": 65.77246283128636, "grad_norm": 1.6515507698059082, "learning_rate": 0.001, "loss": 1.745, "step": 203500 }, { "epoch": 65.80478345184227, "grad_norm": 1.2968014478683472, "learning_rate": 0.001, "loss": 1.7365, "step": 203600 }, { "epoch": 65.83710407239819, "grad_norm": 1.4470727443695068, "learning_rate": 0.001, "loss": 1.7501, "step": 203700 }, { "epoch": 65.8694246929541, "grad_norm": 2.857924699783325, "learning_rate": 0.001, "loss": 1.7461, "step": 203800 }, { "epoch": 65.90174531351002, "grad_norm": 2.0337166786193848, "learning_rate": 0.001, "loss": 1.7596, "step": 203900 }, { "epoch": 65.93406593406593, "grad_norm": 1.32036292552948, "learning_rate": 0.001, "loss": 1.7649, "step": 204000 }, { "epoch": 65.96638655462185, "grad_norm": 3.8111095428466797, "learning_rate": 0.001, "loss": 1.7516, "step": 204100 }, { "epoch": 65.99870717517777, "grad_norm": 1.381173849105835, "learning_rate": 0.001, "loss": 1.7407, "step": 204200 }, { "epoch": 66.03102779573368, "grad_norm": 1.5071853399276733, "learning_rate": 0.001, "loss": 1.6504, "step": 204300 }, { "epoch": 66.0633484162896, "grad_norm": 1.35489022731781, "learning_rate": 0.001, "loss": 1.6526, "step": 204400 }, { "epoch": 66.0956690368455, "grad_norm": 1.7722116708755493, "learning_rate": 0.001, "loss": 1.6729, "step": 204500 }, { "epoch": 66.12798965740143, "grad_norm": 1.4123698472976685, "learning_rate": 0.001, "loss": 1.6782, "step": 204600 }, { "epoch": 66.16031027795734, "grad_norm": 8.012266159057617, "learning_rate": 0.001, "loss": 1.686, "step": 204700 }, { "epoch": 66.19263089851326, "grad_norm": 1.810020089149475, "learning_rate": 0.001, "loss": 1.6937, "step": 204800 }, { "epoch": 66.22495151906917, "grad_norm": 1.4879013299942017, "learning_rate": 0.001, "loss": 1.6663, "step": 204900 }, { "epoch": 66.25727213962509, "grad_norm": 1.4885514974594116, "learning_rate": 0.001, "loss": 1.6973, "step": 205000 }, { "epoch": 66.289592760181, "grad_norm": 1.3149235248565674, "learning_rate": 0.001, "loss": 1.6745, "step": 205100 }, { "epoch": 66.32191338073692, "grad_norm": 1.691293478012085, "learning_rate": 0.001, "loss": 1.7, "step": 205200 }, { "epoch": 66.35423400129282, "grad_norm": 1.4186275005340576, "learning_rate": 0.001, "loss": 1.6989, "step": 205300 }, { "epoch": 66.38655462184875, "grad_norm": 2.0087668895721436, "learning_rate": 0.001, "loss": 1.6902, "step": 205400 }, { "epoch": 66.41887524240465, "grad_norm": 1.5317788124084473, "learning_rate": 0.001, "loss": 1.7082, "step": 205500 }, { "epoch": 66.45119586296057, "grad_norm": 2.583038091659546, "learning_rate": 0.001, "loss": 1.7158, "step": 205600 }, { "epoch": 66.48351648351648, "grad_norm": 1.5106029510498047, "learning_rate": 0.001, "loss": 1.6892, "step": 205700 }, { "epoch": 66.5158371040724, "grad_norm": 1.7874497175216675, "learning_rate": 0.001, "loss": 1.7038, "step": 205800 }, { "epoch": 66.54815772462831, "grad_norm": 1.4286956787109375, "learning_rate": 0.001, "loss": 1.7182, "step": 205900 }, { "epoch": 66.58047834518423, "grad_norm": 6.411563396453857, "learning_rate": 0.001, "loss": 1.7214, "step": 206000 }, { "epoch": 66.61279896574014, "grad_norm": 1.3428066968917847, "learning_rate": 0.001, "loss": 1.7231, "step": 206100 }, { "epoch": 66.64511958629606, "grad_norm": 1.625878930091858, "learning_rate": 0.001, "loss": 1.7194, "step": 206200 }, { "epoch": 66.67744020685197, "grad_norm": 1.6592307090759277, "learning_rate": 0.001, "loss": 1.7387, "step": 206300 }, { "epoch": 66.70976082740789, "grad_norm": 5.545716285705566, "learning_rate": 0.001, "loss": 1.7252, "step": 206400 }, { "epoch": 66.7420814479638, "grad_norm": 1.4463566541671753, "learning_rate": 0.001, "loss": 1.735, "step": 206500 }, { "epoch": 66.77440206851972, "grad_norm": 1.408359169960022, "learning_rate": 0.001, "loss": 1.7384, "step": 206600 }, { "epoch": 66.80672268907563, "grad_norm": 1.6587141752243042, "learning_rate": 0.001, "loss": 1.7268, "step": 206700 }, { "epoch": 66.83904330963155, "grad_norm": 1.2845450639724731, "learning_rate": 0.001, "loss": 1.7416, "step": 206800 }, { "epoch": 66.87136393018746, "grad_norm": 1.8237658739089966, "learning_rate": 0.001, "loss": 1.7525, "step": 206900 }, { "epoch": 66.90368455074338, "grad_norm": 1.6631615161895752, "learning_rate": 0.001, "loss": 1.7573, "step": 207000 }, { "epoch": 66.93600517129929, "grad_norm": 1.653330683708191, "learning_rate": 0.001, "loss": 1.7356, "step": 207100 }, { "epoch": 66.96832579185521, "grad_norm": 1.5796672105789185, "learning_rate": 0.001, "loss": 1.7639, "step": 207200 }, { "epoch": 67.00064641241111, "grad_norm": 1.4352092742919922, "learning_rate": 0.001, "loss": 1.7354, "step": 207300 }, { "epoch": 67.03296703296704, "grad_norm": 1.8501193523406982, "learning_rate": 0.001, "loss": 1.6267, "step": 207400 }, { "epoch": 67.06528765352294, "grad_norm": 2.198028326034546, "learning_rate": 0.001, "loss": 1.6398, "step": 207500 }, { "epoch": 67.09760827407887, "grad_norm": 1.6293267011642456, "learning_rate": 0.001, "loss": 1.6564, "step": 207600 }, { "epoch": 67.12992889463477, "grad_norm": 1.6070120334625244, "learning_rate": 0.001, "loss": 1.6514, "step": 207700 }, { "epoch": 67.1622495151907, "grad_norm": 5.038152694702148, "learning_rate": 0.001, "loss": 1.6541, "step": 207800 }, { "epoch": 67.1945701357466, "grad_norm": 2.076296091079712, "learning_rate": 0.001, "loss": 1.6725, "step": 207900 }, { "epoch": 67.22689075630252, "grad_norm": 1.1969019174575806, "learning_rate": 0.001, "loss": 1.681, "step": 208000 }, { "epoch": 67.25921137685843, "grad_norm": 1.7842698097229004, "learning_rate": 0.001, "loss": 1.6743, "step": 208100 }, { "epoch": 67.29153199741435, "grad_norm": 2.4045143127441406, "learning_rate": 0.001, "loss": 1.6783, "step": 208200 }, { "epoch": 67.32385261797026, "grad_norm": 2.3869144916534424, "learning_rate": 0.001, "loss": 1.683, "step": 208300 }, { "epoch": 67.35617323852618, "grad_norm": 2.204524040222168, "learning_rate": 0.001, "loss": 1.6796, "step": 208400 }, { "epoch": 67.38849385908209, "grad_norm": 2.9409334659576416, "learning_rate": 0.001, "loss": 1.6909, "step": 208500 }, { "epoch": 67.42081447963801, "grad_norm": 2.302457094192505, "learning_rate": 0.001, "loss": 1.7117, "step": 208600 }, { "epoch": 67.45313510019392, "grad_norm": 1.7671762704849243, "learning_rate": 0.001, "loss": 1.7016, "step": 208700 }, { "epoch": 67.48545572074984, "grad_norm": 2.432321786880493, "learning_rate": 0.001, "loss": 1.7143, "step": 208800 }, { "epoch": 67.51777634130575, "grad_norm": 1.410377860069275, "learning_rate": 0.001, "loss": 1.7172, "step": 208900 }, { "epoch": 67.55009696186167, "grad_norm": 1.3431810140609741, "learning_rate": 0.001, "loss": 1.7084, "step": 209000 }, { "epoch": 67.58241758241758, "grad_norm": 1.7906945943832397, "learning_rate": 0.001, "loss": 1.7228, "step": 209100 }, { "epoch": 67.6147382029735, "grad_norm": 1.8193438053131104, "learning_rate": 0.001, "loss": 1.7116, "step": 209200 }, { "epoch": 67.6470588235294, "grad_norm": 1.5851320028305054, "learning_rate": 0.001, "loss": 1.7095, "step": 209300 }, { "epoch": 67.67937944408533, "grad_norm": 1.255328893661499, "learning_rate": 0.001, "loss": 1.7334, "step": 209400 }, { "epoch": 67.71170006464124, "grad_norm": 3.4931864738464355, "learning_rate": 0.001, "loss": 1.7147, "step": 209500 }, { "epoch": 67.74402068519716, "grad_norm": 2.888726234436035, "learning_rate": 0.001, "loss": 1.7276, "step": 209600 }, { "epoch": 67.77634130575306, "grad_norm": 3.189572811126709, "learning_rate": 0.001, "loss": 1.7399, "step": 209700 }, { "epoch": 67.80866192630899, "grad_norm": 1.5399153232574463, "learning_rate": 0.001, "loss": 1.717, "step": 209800 }, { "epoch": 67.8409825468649, "grad_norm": 1.7836410999298096, "learning_rate": 0.001, "loss": 1.7355, "step": 209900 }, { "epoch": 67.87330316742081, "grad_norm": 1.2533148527145386, "learning_rate": 0.001, "loss": 1.7584, "step": 210000 }, { "epoch": 67.90562378797672, "grad_norm": 2.204423666000366, "learning_rate": 0.001, "loss": 1.7277, "step": 210100 }, { "epoch": 67.93794440853264, "grad_norm": 1.6323609352111816, "learning_rate": 0.001, "loss": 1.7431, "step": 210200 }, { "epoch": 67.97026502908855, "grad_norm": 2.064483165740967, "learning_rate": 0.001, "loss": 1.752, "step": 210300 }, { "epoch": 68.00258564964447, "grad_norm": 3.117135524749756, "learning_rate": 0.001, "loss": 1.7634, "step": 210400 }, { "epoch": 68.0349062702004, "grad_norm": 1.3706958293914795, "learning_rate": 0.001, "loss": 1.6232, "step": 210500 }, { "epoch": 68.0672268907563, "grad_norm": 1.2764952182769775, "learning_rate": 0.001, "loss": 1.6507, "step": 210600 }, { "epoch": 68.09954751131222, "grad_norm": 1.4521559476852417, "learning_rate": 0.001, "loss": 1.6566, "step": 210700 }, { "epoch": 68.13186813186813, "grad_norm": 5.054378509521484, "learning_rate": 0.001, "loss": 1.6786, "step": 210800 }, { "epoch": 68.16418875242405, "grad_norm": 2.2506182193756104, "learning_rate": 0.001, "loss": 1.6704, "step": 210900 }, { "epoch": 68.19650937297996, "grad_norm": 1.3772581815719604, "learning_rate": 0.001, "loss": 1.68, "step": 211000 }, { "epoch": 68.22882999353588, "grad_norm": 24.603910446166992, "learning_rate": 0.001, "loss": 1.6753, "step": 211100 }, { "epoch": 68.26115061409179, "grad_norm": 1.4491392374038696, "learning_rate": 0.001, "loss": 1.6833, "step": 211200 }, { "epoch": 68.29347123464771, "grad_norm": 1.2256361246109009, "learning_rate": 0.001, "loss": 1.6743, "step": 211300 }, { "epoch": 68.32579185520362, "grad_norm": 1.2443642616271973, "learning_rate": 0.001, "loss": 1.6642, "step": 211400 }, { "epoch": 68.35811247575954, "grad_norm": 1.6390190124511719, "learning_rate": 0.001, "loss": 1.6712, "step": 211500 }, { "epoch": 68.39043309631545, "grad_norm": 1.4033647775650024, "learning_rate": 0.001, "loss": 1.6874, "step": 211600 }, { "epoch": 68.42275371687137, "grad_norm": 1.8148821592330933, "learning_rate": 0.001, "loss": 1.6855, "step": 211700 }, { "epoch": 68.45507433742728, "grad_norm": 1.2021799087524414, "learning_rate": 0.001, "loss": 1.6961, "step": 211800 }, { "epoch": 68.4873949579832, "grad_norm": 2.0194091796875, "learning_rate": 0.001, "loss": 1.7003, "step": 211900 }, { "epoch": 68.5197155785391, "grad_norm": 4.131607532501221, "learning_rate": 0.001, "loss": 1.6944, "step": 212000 }, { "epoch": 68.55203619909503, "grad_norm": 1.238875150680542, "learning_rate": 0.001, "loss": 1.7083, "step": 212100 }, { "epoch": 68.58435681965094, "grad_norm": 1.3660683631896973, "learning_rate": 0.001, "loss": 1.6943, "step": 212200 }, { "epoch": 68.61667744020686, "grad_norm": 2.309957265853882, "learning_rate": 0.001, "loss": 1.7176, "step": 212300 }, { "epoch": 68.64899806076276, "grad_norm": 1.5615170001983643, "learning_rate": 0.001, "loss": 1.709, "step": 212400 }, { "epoch": 68.68131868131869, "grad_norm": 1.5342909097671509, "learning_rate": 0.001, "loss": 1.7116, "step": 212500 }, { "epoch": 68.7136393018746, "grad_norm": 1.7574206590652466, "learning_rate": 0.001, "loss": 1.71, "step": 212600 }, { "epoch": 68.74595992243052, "grad_norm": 1.4010884761810303, "learning_rate": 0.001, "loss": 1.7186, "step": 212700 }, { "epoch": 68.77828054298642, "grad_norm": 1.1147279739379883, "learning_rate": 0.001, "loss": 1.7046, "step": 212800 }, { "epoch": 68.81060116354234, "grad_norm": 3.066675901412964, "learning_rate": 0.001, "loss": 1.7229, "step": 212900 }, { "epoch": 68.84292178409825, "grad_norm": 1.5234066247940063, "learning_rate": 0.001, "loss": 1.7396, "step": 213000 }, { "epoch": 68.87524240465417, "grad_norm": 1.305245041847229, "learning_rate": 0.001, "loss": 1.748, "step": 213100 }, { "epoch": 68.90756302521008, "grad_norm": 1.2917845249176025, "learning_rate": 0.001, "loss": 1.7434, "step": 213200 }, { "epoch": 68.939883645766, "grad_norm": 4.782052993774414, "learning_rate": 0.001, "loss": 1.7428, "step": 213300 }, { "epoch": 68.97220426632191, "grad_norm": 1.1211791038513184, "learning_rate": 0.001, "loss": 1.7336, "step": 213400 }, { "epoch": 69.00452488687783, "grad_norm": 1.2593681812286377, "learning_rate": 0.001, "loss": 1.727, "step": 213500 }, { "epoch": 69.03684550743374, "grad_norm": 1.118612289428711, "learning_rate": 0.001, "loss": 1.641, "step": 213600 }, { "epoch": 69.06916612798966, "grad_norm": 0.9750691056251526, "learning_rate": 0.001, "loss": 1.6398, "step": 213700 }, { "epoch": 69.10148674854557, "grad_norm": 1.389286756515503, "learning_rate": 0.001, "loss": 1.6506, "step": 213800 }, { "epoch": 69.13380736910149, "grad_norm": 2.4819223880767822, "learning_rate": 0.001, "loss": 1.6478, "step": 213900 }, { "epoch": 69.1661279896574, "grad_norm": 1.3489699363708496, "learning_rate": 0.001, "loss": 1.6439, "step": 214000 }, { "epoch": 69.19844861021332, "grad_norm": 1.2240403890609741, "learning_rate": 0.001, "loss": 1.647, "step": 214100 }, { "epoch": 69.23076923076923, "grad_norm": 1.3537955284118652, "learning_rate": 0.001, "loss": 1.6567, "step": 214200 }, { "epoch": 69.26308985132515, "grad_norm": 1.283801555633545, "learning_rate": 0.001, "loss": 1.6587, "step": 214300 }, { "epoch": 69.29541047188106, "grad_norm": 1.1543352603912354, "learning_rate": 0.001, "loss": 1.655, "step": 214400 }, { "epoch": 69.32773109243698, "grad_norm": 1.5478595495224, "learning_rate": 0.001, "loss": 1.689, "step": 214500 }, { "epoch": 69.36005171299288, "grad_norm": 1.3403937816619873, "learning_rate": 0.001, "loss": 1.6652, "step": 214600 }, { "epoch": 69.3923723335488, "grad_norm": 1.7895828485488892, "learning_rate": 0.001, "loss": 1.6652, "step": 214700 }, { "epoch": 69.42469295410471, "grad_norm": 1.236633062362671, "learning_rate": 0.001, "loss": 1.6669, "step": 214800 }, { "epoch": 69.45701357466064, "grad_norm": 1.4815711975097656, "learning_rate": 0.001, "loss": 1.6761, "step": 214900 }, { "epoch": 69.48933419521654, "grad_norm": 1.3084063529968262, "learning_rate": 0.001, "loss": 1.6956, "step": 215000 }, { "epoch": 69.52165481577246, "grad_norm": 1.2972139120101929, "learning_rate": 0.001, "loss": 1.6955, "step": 215100 }, { "epoch": 69.55397543632837, "grad_norm": 1.1472169160842896, "learning_rate": 0.001, "loss": 1.6997, "step": 215200 }, { "epoch": 69.5862960568843, "grad_norm": 1.652106523513794, "learning_rate": 0.001, "loss": 1.7098, "step": 215300 }, { "epoch": 69.6186166774402, "grad_norm": 1.639631748199463, "learning_rate": 0.001, "loss": 1.7113, "step": 215400 }, { "epoch": 69.65093729799612, "grad_norm": 1.3509989976882935, "learning_rate": 0.001, "loss": 1.7204, "step": 215500 }, { "epoch": 69.68325791855203, "grad_norm": 1.791174054145813, "learning_rate": 0.001, "loss": 1.712, "step": 215600 }, { "epoch": 69.71557853910795, "grad_norm": 1.2299721240997314, "learning_rate": 0.001, "loss": 1.7175, "step": 215700 }, { "epoch": 69.74789915966386, "grad_norm": 1.7063227891921997, "learning_rate": 0.001, "loss": 1.7101, "step": 215800 }, { "epoch": 69.78021978021978, "grad_norm": 1.2766379117965698, "learning_rate": 0.001, "loss": 1.7011, "step": 215900 }, { "epoch": 69.81254040077569, "grad_norm": 1.545007348060608, "learning_rate": 0.001, "loss": 1.7053, "step": 216000 }, { "epoch": 69.84486102133161, "grad_norm": 1.0024380683898926, "learning_rate": 0.001, "loss": 1.724, "step": 216100 }, { "epoch": 69.87718164188752, "grad_norm": 1.2399967908859253, "learning_rate": 0.001, "loss": 1.7059, "step": 216200 }, { "epoch": 69.90950226244344, "grad_norm": 1.293257713317871, "learning_rate": 0.001, "loss": 1.7167, "step": 216300 }, { "epoch": 69.94182288299935, "grad_norm": 1.1719609498977661, "learning_rate": 0.001, "loss": 1.73, "step": 216400 }, { "epoch": 69.97414350355527, "grad_norm": 1.3594120740890503, "learning_rate": 0.001, "loss": 1.7201, "step": 216500 }, { "epoch": 70.00646412411119, "grad_norm": 2.52458119392395, "learning_rate": 0.001, "loss": 1.7218, "step": 216600 }, { "epoch": 70.0387847446671, "grad_norm": 1.1373826265335083, "learning_rate": 0.001, "loss": 1.6057, "step": 216700 }, { "epoch": 70.07110536522302, "grad_norm": 1.2097384929656982, "learning_rate": 0.001, "loss": 1.6139, "step": 216800 }, { "epoch": 70.10342598577893, "grad_norm": 1.0671385526657104, "learning_rate": 0.001, "loss": 1.627, "step": 216900 }, { "epoch": 70.13574660633485, "grad_norm": 1.7715823650360107, "learning_rate": 0.001, "loss": 1.6437, "step": 217000 }, { "epoch": 70.16806722689076, "grad_norm": 1.5620737075805664, "learning_rate": 0.001, "loss": 1.6393, "step": 217100 }, { "epoch": 70.20038784744668, "grad_norm": 0.9991287589073181, "learning_rate": 0.001, "loss": 1.636, "step": 217200 }, { "epoch": 70.23270846800258, "grad_norm": 1.3396512269973755, "learning_rate": 0.001, "loss": 1.6554, "step": 217300 }, { "epoch": 70.2650290885585, "grad_norm": 1.6593924760818481, "learning_rate": 0.001, "loss": 1.6509, "step": 217400 }, { "epoch": 70.29734970911441, "grad_norm": 1.319351315498352, "learning_rate": 0.001, "loss": 1.6667, "step": 217500 }, { "epoch": 70.32967032967034, "grad_norm": 1.2740057706832886, "learning_rate": 0.001, "loss": 1.681, "step": 217600 }, { "epoch": 70.36199095022624, "grad_norm": 1.2626289129257202, "learning_rate": 0.001, "loss": 1.6603, "step": 217700 }, { "epoch": 70.39431157078216, "grad_norm": 2.1013336181640625, "learning_rate": 0.001, "loss": 1.6691, "step": 217800 }, { "epoch": 70.42663219133807, "grad_norm": 1.1560178995132446, "learning_rate": 0.001, "loss": 1.6691, "step": 217900 }, { "epoch": 70.458952811894, "grad_norm": 1.0711314678192139, "learning_rate": 0.001, "loss": 1.6695, "step": 218000 }, { "epoch": 70.4912734324499, "grad_norm": 1.3318216800689697, "learning_rate": 0.001, "loss": 1.6826, "step": 218100 }, { "epoch": 70.52359405300582, "grad_norm": 1.299968957901001, "learning_rate": 0.001, "loss": 1.6844, "step": 218200 }, { "epoch": 70.55591467356173, "grad_norm": 1.351518988609314, "learning_rate": 0.001, "loss": 1.6969, "step": 218300 }, { "epoch": 70.58823529411765, "grad_norm": 1.5017919540405273, "learning_rate": 0.001, "loss": 1.7116, "step": 218400 }, { "epoch": 70.62055591467356, "grad_norm": 1.4329875707626343, "learning_rate": 0.001, "loss": 1.6721, "step": 218500 }, { "epoch": 70.65287653522948, "grad_norm": 1.2385377883911133, "learning_rate": 0.001, "loss": 1.6894, "step": 218600 }, { "epoch": 70.68519715578539, "grad_norm": 1.3504729270935059, "learning_rate": 0.001, "loss": 1.6824, "step": 218700 }, { "epoch": 70.71751777634131, "grad_norm": 1.3063488006591797, "learning_rate": 0.001, "loss": 1.7004, "step": 218800 }, { "epoch": 70.74983839689722, "grad_norm": 1.1546604633331299, "learning_rate": 0.001, "loss": 1.6939, "step": 218900 }, { "epoch": 70.78215901745314, "grad_norm": 1.101382851600647, "learning_rate": 0.001, "loss": 1.7072, "step": 219000 }, { "epoch": 70.81447963800905, "grad_norm": 1.9860423803329468, "learning_rate": 0.001, "loss": 1.7077, "step": 219100 }, { "epoch": 70.84680025856497, "grad_norm": 2.092599391937256, "learning_rate": 0.001, "loss": 1.7182, "step": 219200 }, { "epoch": 70.87912087912088, "grad_norm": 1.1592332124710083, "learning_rate": 0.001, "loss": 1.7133, "step": 219300 }, { "epoch": 70.9114414996768, "grad_norm": 1.461661696434021, "learning_rate": 0.001, "loss": 1.7312, "step": 219400 }, { "epoch": 70.9437621202327, "grad_norm": 2.932982921600342, "learning_rate": 0.001, "loss": 1.7121, "step": 219500 }, { "epoch": 70.97608274078863, "grad_norm": 1.674523949623108, "learning_rate": 0.001, "loss": 1.7013, "step": 219600 }, { "epoch": 71.00840336134453, "grad_norm": 1.3590933084487915, "learning_rate": 0.001, "loss": 1.7116, "step": 219700 }, { "epoch": 71.04072398190046, "grad_norm": 2.575207233428955, "learning_rate": 0.001, "loss": 1.5935, "step": 219800 }, { "epoch": 71.07304460245636, "grad_norm": 1.4465466737747192, "learning_rate": 0.001, "loss": 1.6228, "step": 219900 }, { "epoch": 71.10536522301229, "grad_norm": 1.8533459901809692, "learning_rate": 0.001, "loss": 1.6001, "step": 220000 }, { "epoch": 71.13768584356819, "grad_norm": 1.0849183797836304, "learning_rate": 0.001, "loss": 1.622, "step": 220100 }, { "epoch": 71.17000646412411, "grad_norm": 1.2012532949447632, "learning_rate": 0.001, "loss": 1.6266, "step": 220200 }, { "epoch": 71.20232708468002, "grad_norm": 1.368564248085022, "learning_rate": 0.001, "loss": 1.6378, "step": 220300 }, { "epoch": 71.23464770523594, "grad_norm": 1.320887565612793, "learning_rate": 0.001, "loss": 1.6485, "step": 220400 }, { "epoch": 71.26696832579185, "grad_norm": 1.1533743143081665, "learning_rate": 0.001, "loss": 1.6222, "step": 220500 }, { "epoch": 71.29928894634777, "grad_norm": 1.2094136476516724, "learning_rate": 0.001, "loss": 1.65, "step": 220600 }, { "epoch": 71.33160956690368, "grad_norm": 1.243307113647461, "learning_rate": 0.001, "loss": 1.6507, "step": 220700 }, { "epoch": 71.3639301874596, "grad_norm": 1.2314996719360352, "learning_rate": 0.001, "loss": 1.6434, "step": 220800 }, { "epoch": 71.39625080801551, "grad_norm": 2.293916940689087, "learning_rate": 0.001, "loss": 1.6711, "step": 220900 }, { "epoch": 71.42857142857143, "grad_norm": 1.0958737134933472, "learning_rate": 0.001, "loss": 1.6777, "step": 221000 }, { "epoch": 71.46089204912734, "grad_norm": 1.4391621351242065, "learning_rate": 0.001, "loss": 1.6521, "step": 221100 }, { "epoch": 71.49321266968326, "grad_norm": 1.2949024438858032, "learning_rate": 0.001, "loss": 1.6692, "step": 221200 }, { "epoch": 71.52553329023917, "grad_norm": 1.2754753828048706, "learning_rate": 0.001, "loss": 1.6672, "step": 221300 }, { "epoch": 71.55785391079509, "grad_norm": 1.1098119020462036, "learning_rate": 0.001, "loss": 1.6815, "step": 221400 }, { "epoch": 71.590174531351, "grad_norm": 1.8240190744400024, "learning_rate": 0.001, "loss": 1.6843, "step": 221500 }, { "epoch": 71.62249515190692, "grad_norm": 1.3991206884384155, "learning_rate": 0.001, "loss": 1.6901, "step": 221600 }, { "epoch": 71.65481577246283, "grad_norm": 1.2723743915557861, "learning_rate": 0.001, "loss": 1.692, "step": 221700 }, { "epoch": 71.68713639301875, "grad_norm": 1.0649892091751099, "learning_rate": 0.001, "loss": 1.6872, "step": 221800 }, { "epoch": 71.71945701357465, "grad_norm": 2.1344380378723145, "learning_rate": 0.001, "loss": 1.6774, "step": 221900 }, { "epoch": 71.75177763413058, "grad_norm": 1.3315361738204956, "learning_rate": 0.001, "loss": 1.6936, "step": 222000 }, { "epoch": 71.78409825468648, "grad_norm": 1.2613959312438965, "learning_rate": 0.001, "loss": 1.715, "step": 222100 }, { "epoch": 71.8164188752424, "grad_norm": 1.051647663116455, "learning_rate": 0.001, "loss": 1.6982, "step": 222200 }, { "epoch": 71.84873949579831, "grad_norm": 1.278030514717102, "learning_rate": 0.001, "loss": 1.7195, "step": 222300 }, { "epoch": 71.88106011635423, "grad_norm": 1.0906519889831543, "learning_rate": 0.001, "loss": 1.7031, "step": 222400 }, { "epoch": 71.91338073691014, "grad_norm": 1.5682449340820312, "learning_rate": 0.001, "loss": 1.7064, "step": 222500 }, { "epoch": 71.94570135746606, "grad_norm": 1.8833974599838257, "learning_rate": 0.001, "loss": 1.7121, "step": 222600 }, { "epoch": 71.97802197802197, "grad_norm": 1.471835732460022, "learning_rate": 0.001, "loss": 1.7221, "step": 222700 }, { "epoch": 72.01034259857789, "grad_norm": 1.2024946212768555, "learning_rate": 0.001, "loss": 1.665, "step": 222800 }, { "epoch": 72.04266321913381, "grad_norm": 1.4671399593353271, "learning_rate": 0.001, "loss": 1.6024, "step": 222900 }, { "epoch": 72.07498383968972, "grad_norm": 1.211439847946167, "learning_rate": 0.001, "loss": 1.6023, "step": 223000 }, { "epoch": 72.10730446024564, "grad_norm": 1.124714732170105, "learning_rate": 0.001, "loss": 1.6006, "step": 223100 }, { "epoch": 72.13962508080155, "grad_norm": 1.3618842363357544, "learning_rate": 0.001, "loss": 1.619, "step": 223200 }, { "epoch": 72.17194570135747, "grad_norm": 1.145764946937561, "learning_rate": 0.001, "loss": 1.6186, "step": 223300 }, { "epoch": 72.20426632191338, "grad_norm": 1.2488354444503784, "learning_rate": 0.001, "loss": 1.638, "step": 223400 }, { "epoch": 72.2365869424693, "grad_norm": 4.003416061401367, "learning_rate": 0.001, "loss": 1.6335, "step": 223500 }, { "epoch": 72.26890756302521, "grad_norm": 1.0246009826660156, "learning_rate": 0.001, "loss": 1.6404, "step": 223600 }, { "epoch": 72.30122818358113, "grad_norm": 1.2234944105148315, "learning_rate": 0.001, "loss": 1.6439, "step": 223700 }, { "epoch": 72.33354880413704, "grad_norm": 22.026561737060547, "learning_rate": 0.001, "loss": 1.6299, "step": 223800 }, { "epoch": 72.36586942469296, "grad_norm": 3.3484604358673096, "learning_rate": 0.001, "loss": 1.6503, "step": 223900 }, { "epoch": 72.39819004524887, "grad_norm": 1.3204076290130615, "learning_rate": 0.001, "loss": 1.6589, "step": 224000 }, { "epoch": 72.43051066580479, "grad_norm": 1.194342017173767, "learning_rate": 0.001, "loss": 1.6655, "step": 224100 }, { "epoch": 72.4628312863607, "grad_norm": 1.8662569522857666, "learning_rate": 0.001, "loss": 1.6737, "step": 224200 }, { "epoch": 72.49515190691662, "grad_norm": 2.274613618850708, "learning_rate": 0.001, "loss": 1.67, "step": 224300 }, { "epoch": 72.52747252747253, "grad_norm": 1.424433946609497, "learning_rate": 0.001, "loss": 1.6667, "step": 224400 }, { "epoch": 72.55979314802845, "grad_norm": 2.3770639896392822, "learning_rate": 0.001, "loss": 1.6474, "step": 224500 }, { "epoch": 72.59211376858435, "grad_norm": 1.7472200393676758, "learning_rate": 0.001, "loss": 1.6703, "step": 224600 }, { "epoch": 72.62443438914028, "grad_norm": 1.600048303604126, "learning_rate": 0.001, "loss": 1.6708, "step": 224700 }, { "epoch": 72.65675500969618, "grad_norm": 1.4091856479644775, "learning_rate": 0.001, "loss": 1.6748, "step": 224800 }, { "epoch": 72.6890756302521, "grad_norm": 1.3631477355957031, "learning_rate": 0.001, "loss": 1.6772, "step": 224900 }, { "epoch": 72.72139625080801, "grad_norm": 1.6795262098312378, "learning_rate": 0.001, "loss": 1.6896, "step": 225000 }, { "epoch": 72.75371687136393, "grad_norm": 1.4507970809936523, "learning_rate": 0.001, "loss": 1.6817, "step": 225100 }, { "epoch": 72.78603749191984, "grad_norm": 2.346508741378784, "learning_rate": 0.001, "loss": 1.6873, "step": 225200 }, { "epoch": 72.81835811247576, "grad_norm": 1.9142025709152222, "learning_rate": 0.001, "loss": 1.7015, "step": 225300 }, { "epoch": 72.85067873303167, "grad_norm": 1.3443036079406738, "learning_rate": 0.001, "loss": 1.6939, "step": 225400 }, { "epoch": 72.88299935358759, "grad_norm": 1.8560032844543457, "learning_rate": 0.001, "loss": 1.6849, "step": 225500 }, { "epoch": 72.9153199741435, "grad_norm": 1.5492615699768066, "learning_rate": 0.001, "loss": 1.6815, "step": 225600 }, { "epoch": 72.94764059469942, "grad_norm": 1.1060559749603271, "learning_rate": 0.001, "loss": 1.6979, "step": 225700 }, { "epoch": 72.97996121525533, "grad_norm": 1.3237814903259277, "learning_rate": 0.001, "loss": 1.7099, "step": 225800 }, { "epoch": 73.01228183581125, "grad_norm": 1.341617226600647, "learning_rate": 0.001, "loss": 1.6512, "step": 225900 }, { "epoch": 73.04460245636716, "grad_norm": 1.0855339765548706, "learning_rate": 0.001, "loss": 1.5837, "step": 226000 }, { "epoch": 73.07692307692308, "grad_norm": 3.5869863033294678, "learning_rate": 0.001, "loss": 1.6096, "step": 226100 }, { "epoch": 73.10924369747899, "grad_norm": 3.6815438270568848, "learning_rate": 0.001, "loss": 1.6107, "step": 226200 }, { "epoch": 73.14156431803491, "grad_norm": 1.9906809329986572, "learning_rate": 0.001, "loss": 1.6149, "step": 226300 }, { "epoch": 73.17388493859082, "grad_norm": 1.130189299583435, "learning_rate": 0.001, "loss": 1.6162, "step": 226400 }, { "epoch": 73.20620555914674, "grad_norm": 5.166330337524414, "learning_rate": 0.001, "loss": 1.6247, "step": 226500 }, { "epoch": 73.23852617970265, "grad_norm": 1.6495802402496338, "learning_rate": 0.001, "loss": 1.6185, "step": 226600 }, { "epoch": 73.27084680025857, "grad_norm": 7.619762420654297, "learning_rate": 0.001, "loss": 1.6239, "step": 226700 }, { "epoch": 73.30316742081448, "grad_norm": 2.5824763774871826, "learning_rate": 0.001, "loss": 1.6372, "step": 226800 }, { "epoch": 73.3354880413704, "grad_norm": 1.2857626676559448, "learning_rate": 0.001, "loss": 1.6318, "step": 226900 }, { "epoch": 73.3678086619263, "grad_norm": 1.2268927097320557, "learning_rate": 0.001, "loss": 1.6373, "step": 227000 }, { "epoch": 73.40012928248223, "grad_norm": 1.6116132736206055, "learning_rate": 0.001, "loss": 1.6426, "step": 227100 }, { "epoch": 73.43244990303813, "grad_norm": 1.1261813640594482, "learning_rate": 0.001, "loss": 1.6493, "step": 227200 }, { "epoch": 73.46477052359405, "grad_norm": 5.699511528015137, "learning_rate": 0.001, "loss": 1.6472, "step": 227300 }, { "epoch": 73.49709114414996, "grad_norm": 1.5950987339019775, "learning_rate": 0.001, "loss": 1.6586, "step": 227400 }, { "epoch": 73.52941176470588, "grad_norm": 2.5280821323394775, "learning_rate": 0.001, "loss": 1.6694, "step": 227500 }, { "epoch": 73.56173238526179, "grad_norm": 1.0161173343658447, "learning_rate": 0.001, "loss": 1.6747, "step": 227600 }, { "epoch": 73.59405300581771, "grad_norm": 1.232656478881836, "learning_rate": 0.001, "loss": 1.6591, "step": 227700 }, { "epoch": 73.62637362637362, "grad_norm": 1.547879695892334, "learning_rate": 0.001, "loss": 1.6715, "step": 227800 }, { "epoch": 73.65869424692954, "grad_norm": 1.225738286972046, "learning_rate": 0.001, "loss": 1.6724, "step": 227900 }, { "epoch": 73.69101486748545, "grad_norm": 1.384613275527954, "learning_rate": 0.001, "loss": 1.6715, "step": 228000 }, { "epoch": 73.72333548804137, "grad_norm": 1.5414756536483765, "learning_rate": 0.001, "loss": 1.6884, "step": 228100 }, { "epoch": 73.75565610859728, "grad_norm": 1.2540833950042725, "learning_rate": 0.001, "loss": 1.6898, "step": 228200 }, { "epoch": 73.7879767291532, "grad_norm": 4.087057590484619, "learning_rate": 0.001, "loss": 1.6827, "step": 228300 }, { "epoch": 73.82029734970911, "grad_norm": 1.7578486204147339, "learning_rate": 0.001, "loss": 1.6864, "step": 228400 }, { "epoch": 73.85261797026503, "grad_norm": 1.295963168144226, "learning_rate": 0.001, "loss": 1.6958, "step": 228500 }, { "epoch": 73.88493859082094, "grad_norm": 1.6729366779327393, "learning_rate": 0.001, "loss": 1.6956, "step": 228600 }, { "epoch": 73.91725921137686, "grad_norm": 1.308285117149353, "learning_rate": 0.001, "loss": 1.6989, "step": 228700 }, { "epoch": 73.94957983193277, "grad_norm": 2.5056684017181396, "learning_rate": 0.001, "loss": 1.694, "step": 228800 }, { "epoch": 73.98190045248869, "grad_norm": 1.699945092201233, "learning_rate": 0.001, "loss": 1.7071, "step": 228900 }, { "epoch": 74.01422107304461, "grad_norm": 1.5779298543930054, "learning_rate": 0.001, "loss": 1.6266, "step": 229000 }, { "epoch": 74.04654169360052, "grad_norm": 1.8912166357040405, "learning_rate": 0.001, "loss": 1.5898, "step": 229100 }, { "epoch": 74.07886231415644, "grad_norm": 1.4678560495376587, "learning_rate": 0.001, "loss": 1.5809, "step": 229200 }, { "epoch": 74.11118293471235, "grad_norm": 14.011090278625488, "learning_rate": 0.001, "loss": 1.6171, "step": 229300 }, { "epoch": 74.14350355526827, "grad_norm": 1.271886944770813, "learning_rate": 0.001, "loss": 1.6057, "step": 229400 }, { "epoch": 74.17582417582418, "grad_norm": 1.321671724319458, "learning_rate": 0.001, "loss": 1.6208, "step": 229500 }, { "epoch": 74.2081447963801, "grad_norm": 1.2986078262329102, "learning_rate": 0.001, "loss": 1.6182, "step": 229600 }, { "epoch": 74.240465416936, "grad_norm": 1.3105615377426147, "learning_rate": 0.001, "loss": 1.6134, "step": 229700 }, { "epoch": 74.27278603749193, "grad_norm": 1.8321210145950317, "learning_rate": 0.001, "loss": 1.6319, "step": 229800 }, { "epoch": 74.30510665804783, "grad_norm": 1.7344890832901, "learning_rate": 0.001, "loss": 1.6285, "step": 229900 }, { "epoch": 74.33742727860376, "grad_norm": 1.2795850038528442, "learning_rate": 0.001, "loss": 1.6439, "step": 230000 }, { "epoch": 74.36974789915966, "grad_norm": 1.376267671585083, "learning_rate": 0.001, "loss": 1.6379, "step": 230100 }, { "epoch": 74.40206851971558, "grad_norm": 1.4366533756256104, "learning_rate": 0.001, "loss": 1.6509, "step": 230200 }, { "epoch": 74.43438914027149, "grad_norm": 1.608836054801941, "learning_rate": 0.001, "loss": 1.6605, "step": 230300 }, { "epoch": 74.46670976082741, "grad_norm": 4.8717498779296875, "learning_rate": 0.001, "loss": 1.6533, "step": 230400 }, { "epoch": 74.49903038138332, "grad_norm": 1.5786211490631104, "learning_rate": 0.001, "loss": 1.6702, "step": 230500 }, { "epoch": 74.53135100193924, "grad_norm": 1.3575888872146606, "learning_rate": 0.001, "loss": 1.6557, "step": 230600 }, { "epoch": 74.56367162249515, "grad_norm": 1.3210697174072266, "learning_rate": 0.001, "loss": 1.6658, "step": 230700 }, { "epoch": 74.59599224305107, "grad_norm": 1.4199596643447876, "learning_rate": 0.001, "loss": 1.6716, "step": 230800 }, { "epoch": 74.62831286360698, "grad_norm": 1.5141711235046387, "learning_rate": 0.001, "loss": 1.6722, "step": 230900 }, { "epoch": 74.6606334841629, "grad_norm": 2.153641939163208, "learning_rate": 0.001, "loss": 1.6682, "step": 231000 }, { "epoch": 74.69295410471881, "grad_norm": 1.3360282182693481, "learning_rate": 0.001, "loss": 1.6684, "step": 231100 }, { "epoch": 74.72527472527473, "grad_norm": 1.2317759990692139, "learning_rate": 0.001, "loss": 1.6753, "step": 231200 }, { "epoch": 74.75759534583064, "grad_norm": 1.548261046409607, "learning_rate": 0.001, "loss": 1.6883, "step": 231300 }, { "epoch": 74.78991596638656, "grad_norm": 2.0865676403045654, "learning_rate": 0.001, "loss": 1.6786, "step": 231400 }, { "epoch": 74.82223658694247, "grad_norm": 1.3899306058883667, "learning_rate": 0.001, "loss": 1.6898, "step": 231500 }, { "epoch": 74.85455720749839, "grad_norm": 2.213381052017212, "learning_rate": 0.001, "loss": 1.6762, "step": 231600 }, { "epoch": 74.8868778280543, "grad_norm": 2.42106294631958, "learning_rate": 0.001, "loss": 1.6918, "step": 231700 }, { "epoch": 74.91919844861022, "grad_norm": 1.6212127208709717, "learning_rate": 0.001, "loss": 1.6975, "step": 231800 }, { "epoch": 74.95151906916612, "grad_norm": 7.834531307220459, "learning_rate": 0.001, "loss": 1.6997, "step": 231900 }, { "epoch": 74.98383968972205, "grad_norm": 1.783002495765686, "learning_rate": 0.001, "loss": 1.7085, "step": 232000 }, { "epoch": 75.01616031027795, "grad_norm": 2.0303685665130615, "learning_rate": 0.001, "loss": 1.6093, "step": 232100 }, { "epoch": 75.04848093083388, "grad_norm": 3.4877805709838867, "learning_rate": 0.001, "loss": 1.5878, "step": 232200 }, { "epoch": 75.08080155138978, "grad_norm": 2.5266225337982178, "learning_rate": 0.001, "loss": 1.5705, "step": 232300 }, { "epoch": 75.1131221719457, "grad_norm": 2.4773268699645996, "learning_rate": 0.001, "loss": 1.5982, "step": 232400 }, { "epoch": 75.14544279250161, "grad_norm": 2.5392026901245117, "learning_rate": 0.001, "loss": 1.61, "step": 232500 }, { "epoch": 75.17776341305753, "grad_norm": 3.246748447418213, "learning_rate": 0.001, "loss": 1.5964, "step": 232600 }, { "epoch": 75.21008403361344, "grad_norm": 2.7645561695098877, "learning_rate": 0.001, "loss": 1.6055, "step": 232700 }, { "epoch": 75.24240465416936, "grad_norm": 2.0532710552215576, "learning_rate": 0.001, "loss": 1.605, "step": 232800 }, { "epoch": 75.27472527472527, "grad_norm": 1.7273861169815063, "learning_rate": 0.001, "loss": 1.6418, "step": 232900 }, { "epoch": 75.30704589528119, "grad_norm": 2.118966817855835, "learning_rate": 0.001, "loss": 1.6254, "step": 233000 }, { "epoch": 75.3393665158371, "grad_norm": 2.6865553855895996, "learning_rate": 0.001, "loss": 1.6182, "step": 233100 }, { "epoch": 75.37168713639302, "grad_norm": 1.691421389579773, "learning_rate": 0.001, "loss": 1.6242, "step": 233200 }, { "epoch": 75.40400775694893, "grad_norm": 2.505297899246216, "learning_rate": 0.001, "loss": 1.629, "step": 233300 }, { "epoch": 75.43632837750485, "grad_norm": 1.8659807443618774, "learning_rate": 0.001, "loss": 1.635, "step": 233400 }, { "epoch": 75.46864899806076, "grad_norm": 2.5097386837005615, "learning_rate": 0.001, "loss": 1.6518, "step": 233500 }, { "epoch": 75.50096961861668, "grad_norm": 1.844234585762024, "learning_rate": 0.001, "loss": 1.6401, "step": 233600 }, { "epoch": 75.53329023917259, "grad_norm": 1.9169886112213135, "learning_rate": 0.001, "loss": 1.6504, "step": 233700 }, { "epoch": 75.56561085972851, "grad_norm": 2.82342267036438, "learning_rate": 0.001, "loss": 1.6573, "step": 233800 }, { "epoch": 75.59793148028442, "grad_norm": 2.2056808471679688, "learning_rate": 0.001, "loss": 1.6447, "step": 233900 }, { "epoch": 75.63025210084034, "grad_norm": 2.383005380630493, "learning_rate": 0.001, "loss": 1.6415, "step": 234000 }, { "epoch": 75.66257272139624, "grad_norm": 5.495401382446289, "learning_rate": 0.001, "loss": 1.6589, "step": 234100 }, { "epoch": 75.69489334195217, "grad_norm": 1.9846906661987305, "learning_rate": 0.001, "loss": 1.6511, "step": 234200 }, { "epoch": 75.72721396250807, "grad_norm": 1.8586468696594238, "learning_rate": 0.001, "loss": 1.6669, "step": 234300 }, { "epoch": 75.759534583064, "grad_norm": 1.6170953512191772, "learning_rate": 0.001, "loss": 1.6876, "step": 234400 }, { "epoch": 75.7918552036199, "grad_norm": 1.8270747661590576, "learning_rate": 0.001, "loss": 1.6645, "step": 234500 }, { "epoch": 75.82417582417582, "grad_norm": 1.5273594856262207, "learning_rate": 0.001, "loss": 1.6605, "step": 234600 }, { "epoch": 75.85649644473173, "grad_norm": 2.08854341506958, "learning_rate": 0.001, "loss": 1.686, "step": 234700 }, { "epoch": 75.88881706528765, "grad_norm": 12.589361190795898, "learning_rate": 0.001, "loss": 1.6915, "step": 234800 }, { "epoch": 75.92113768584356, "grad_norm": 2.1758835315704346, "learning_rate": 0.001, "loss": 1.6877, "step": 234900 }, { "epoch": 75.95345830639948, "grad_norm": 3.0793302059173584, "learning_rate": 0.001, "loss": 1.6986, "step": 235000 }, { "epoch": 75.98577892695539, "grad_norm": 3.08424973487854, "learning_rate": 0.001, "loss": 1.6969, "step": 235100 }, { "epoch": 76.01809954751131, "grad_norm": 1.4017972946166992, "learning_rate": 0.001, "loss": 1.6427, "step": 235200 }, { "epoch": 76.05042016806723, "grad_norm": 6.161618232727051, "learning_rate": 0.001, "loss": 1.5743, "step": 235300 }, { "epoch": 76.08274078862314, "grad_norm": 1.1618300676345825, "learning_rate": 0.001, "loss": 1.5794, "step": 235400 }, { "epoch": 76.11506140917906, "grad_norm": 1.3072526454925537, "learning_rate": 0.001, "loss": 1.5897, "step": 235500 }, { "epoch": 76.14738202973497, "grad_norm": 1.1187994480133057, "learning_rate": 0.001, "loss": 1.5868, "step": 235600 }, { "epoch": 76.17970265029089, "grad_norm": 1.209040641784668, "learning_rate": 0.001, "loss": 1.5997, "step": 235700 }, { "epoch": 76.2120232708468, "grad_norm": 1.976312518119812, "learning_rate": 0.001, "loss": 1.6078, "step": 235800 }, { "epoch": 76.24434389140272, "grad_norm": 2.169402837753296, "learning_rate": 0.001, "loss": 1.5961, "step": 235900 }, { "epoch": 76.27666451195863, "grad_norm": 5.769019603729248, "learning_rate": 0.001, "loss": 1.6029, "step": 236000 }, { "epoch": 76.30898513251455, "grad_norm": 1.7772659063339233, "learning_rate": 0.001, "loss": 1.6193, "step": 236100 }, { "epoch": 76.34130575307046, "grad_norm": 1.4675816297531128, "learning_rate": 0.001, "loss": 1.6075, "step": 236200 }, { "epoch": 76.37362637362638, "grad_norm": 1.8209795951843262, "learning_rate": 0.001, "loss": 1.631, "step": 236300 }, { "epoch": 76.40594699418229, "grad_norm": 1.7870666980743408, "learning_rate": 0.001, "loss": 1.6318, "step": 236400 }, { "epoch": 76.43826761473821, "grad_norm": 1.806114912033081, "learning_rate": 0.001, "loss": 1.6417, "step": 236500 }, { "epoch": 76.47058823529412, "grad_norm": 1.4530296325683594, "learning_rate": 0.001, "loss": 1.6228, "step": 236600 }, { "epoch": 76.50290885585004, "grad_norm": 1.2128962278366089, "learning_rate": 0.001, "loss": 1.6369, "step": 236700 }, { "epoch": 76.53522947640595, "grad_norm": 1.590721845626831, "learning_rate": 0.001, "loss": 1.6399, "step": 236800 }, { "epoch": 76.56755009696187, "grad_norm": 1.3944813013076782, "learning_rate": 0.001, "loss": 1.6533, "step": 236900 }, { "epoch": 76.59987071751777, "grad_norm": 2.8855082988739014, "learning_rate": 0.001, "loss": 1.6589, "step": 237000 }, { "epoch": 76.6321913380737, "grad_norm": 1.5853347778320312, "learning_rate": 0.001, "loss": 1.6502, "step": 237100 }, { "epoch": 76.6645119586296, "grad_norm": 2.086627244949341, "learning_rate": 0.001, "loss": 1.6386, "step": 237200 }, { "epoch": 76.69683257918552, "grad_norm": 1.6242945194244385, "learning_rate": 0.001, "loss": 1.6529, "step": 237300 }, { "epoch": 76.72915319974143, "grad_norm": 1.409888744354248, "learning_rate": 0.001, "loss": 1.6683, "step": 237400 }, { "epoch": 76.76147382029735, "grad_norm": 1.2718594074249268, "learning_rate": 0.001, "loss": 1.6358, "step": 237500 }, { "epoch": 76.79379444085326, "grad_norm": 1.0313142538070679, "learning_rate": 0.001, "loss": 1.6714, "step": 237600 }, { "epoch": 76.82611506140918, "grad_norm": 1.281502604484558, "learning_rate": 0.001, "loss": 1.6578, "step": 237700 }, { "epoch": 76.85843568196509, "grad_norm": 1.4038314819335938, "learning_rate": 0.001, "loss": 1.6812, "step": 237800 }, { "epoch": 76.89075630252101, "grad_norm": 1.111031413078308, "learning_rate": 0.001, "loss": 1.6721, "step": 237900 }, { "epoch": 76.92307692307692, "grad_norm": 2.1092193126678467, "learning_rate": 0.001, "loss": 1.6766, "step": 238000 }, { "epoch": 76.95539754363284, "grad_norm": 1.484304428100586, "learning_rate": 0.001, "loss": 1.6899, "step": 238100 }, { "epoch": 76.98771816418875, "grad_norm": 1.763668417930603, "learning_rate": 0.001, "loss": 1.6666, "step": 238200 }, { "epoch": 77.02003878474467, "grad_norm": 2.121307373046875, "learning_rate": 0.001, "loss": 1.6301, "step": 238300 }, { "epoch": 77.05235940530058, "grad_norm": 1.3344942331314087, "learning_rate": 0.001, "loss": 1.5756, "step": 238400 }, { "epoch": 77.0846800258565, "grad_norm": 1.743501901626587, "learning_rate": 0.001, "loss": 1.5888, "step": 238500 }, { "epoch": 77.11700064641241, "grad_norm": 1.5982078313827515, "learning_rate": 0.001, "loss": 1.5897, "step": 238600 }, { "epoch": 77.14932126696833, "grad_norm": 1.2819818258285522, "learning_rate": 0.001, "loss": 1.5894, "step": 238700 }, { "epoch": 77.18164188752424, "grad_norm": 2.0731258392333984, "learning_rate": 0.001, "loss": 1.5944, "step": 238800 }, { "epoch": 77.21396250808016, "grad_norm": 1.3270511627197266, "learning_rate": 0.001, "loss": 1.6024, "step": 238900 }, { "epoch": 77.24628312863607, "grad_norm": 1.2472208738327026, "learning_rate": 0.001, "loss": 1.5898, "step": 239000 }, { "epoch": 77.27860374919199, "grad_norm": 1.5197229385375977, "learning_rate": 0.001, "loss": 1.5989, "step": 239100 }, { "epoch": 77.3109243697479, "grad_norm": 4.184530735015869, "learning_rate": 0.001, "loss": 1.6168, "step": 239200 }, { "epoch": 77.34324499030382, "grad_norm": 1.4768939018249512, "learning_rate": 0.001, "loss": 1.5959, "step": 239300 }, { "epoch": 77.37556561085972, "grad_norm": 1.2789695262908936, "learning_rate": 0.001, "loss": 1.6271, "step": 239400 }, { "epoch": 77.40788623141565, "grad_norm": 1.0838463306427002, "learning_rate": 0.001, "loss": 1.6175, "step": 239500 }, { "epoch": 77.44020685197155, "grad_norm": 1.1696804761886597, "learning_rate": 0.001, "loss": 1.6304, "step": 239600 }, { "epoch": 77.47252747252747, "grad_norm": 1.1669255495071411, "learning_rate": 0.001, "loss": 1.6285, "step": 239700 }, { "epoch": 77.50484809308338, "grad_norm": 1.0003498792648315, "learning_rate": 0.001, "loss": 1.6155, "step": 239800 }, { "epoch": 77.5371687136393, "grad_norm": 1.1938159465789795, "learning_rate": 0.001, "loss": 1.6274, "step": 239900 }, { "epoch": 77.56948933419521, "grad_norm": 2.1750431060791016, "learning_rate": 0.001, "loss": 1.6365, "step": 240000 }, { "epoch": 77.60180995475113, "grad_norm": 1.2943141460418701, "learning_rate": 0.001, "loss": 1.6272, "step": 240100 }, { "epoch": 77.63413057530704, "grad_norm": 1.361413598060608, "learning_rate": 0.001, "loss": 1.6508, "step": 240200 }, { "epoch": 77.66645119586296, "grad_norm": 1.1791378259658813, "learning_rate": 0.001, "loss": 1.657, "step": 240300 }, { "epoch": 77.69877181641887, "grad_norm": 1.3610637187957764, "learning_rate": 0.001, "loss": 1.641, "step": 240400 }, { "epoch": 77.73109243697479, "grad_norm": 1.4122557640075684, "learning_rate": 0.001, "loss": 1.6517, "step": 240500 }, { "epoch": 77.7634130575307, "grad_norm": 1.3932771682739258, "learning_rate": 0.001, "loss": 1.6415, "step": 240600 }, { "epoch": 77.79573367808662, "grad_norm": 1.4181333780288696, "learning_rate": 0.001, "loss": 1.6537, "step": 240700 }, { "epoch": 77.82805429864253, "grad_norm": 1.1598999500274658, "learning_rate": 0.001, "loss": 1.6624, "step": 240800 }, { "epoch": 77.86037491919845, "grad_norm": 1.7837715148925781, "learning_rate": 0.001, "loss": 1.6493, "step": 240900 }, { "epoch": 77.89269553975436, "grad_norm": 1.9749995470046997, "learning_rate": 0.001, "loss": 1.6518, "step": 241000 }, { "epoch": 77.92501616031028, "grad_norm": 1.265308141708374, "learning_rate": 0.001, "loss": 1.6632, "step": 241100 }, { "epoch": 77.95733678086619, "grad_norm": 12.52112865447998, "learning_rate": 0.001, "loss": 1.6589, "step": 241200 }, { "epoch": 77.98965740142211, "grad_norm": 1.4199000597000122, "learning_rate": 0.001, "loss": 1.6582, "step": 241300 }, { "epoch": 78.02197802197803, "grad_norm": 1.0380791425704956, "learning_rate": 0.001, "loss": 1.5951, "step": 241400 }, { "epoch": 78.05429864253394, "grad_norm": 1.1607108116149902, "learning_rate": 0.001, "loss": 1.55, "step": 241500 }, { "epoch": 78.08661926308986, "grad_norm": 1.1586447954177856, "learning_rate": 0.001, "loss": 1.5547, "step": 241600 }, { "epoch": 78.11893988364577, "grad_norm": 1.426910638809204, "learning_rate": 0.001, "loss": 1.5625, "step": 241700 }, { "epoch": 78.15126050420169, "grad_norm": 1.4128625392913818, "learning_rate": 0.001, "loss": 1.5643, "step": 241800 }, { "epoch": 78.1835811247576, "grad_norm": 1.4470938444137573, "learning_rate": 0.001, "loss": 1.5783, "step": 241900 }, { "epoch": 78.21590174531352, "grad_norm": 1.0999311208724976, "learning_rate": 0.001, "loss": 1.5634, "step": 242000 }, { "epoch": 78.24822236586942, "grad_norm": 1.6111093759536743, "learning_rate": 0.001, "loss": 1.5889, "step": 242100 }, { "epoch": 78.28054298642535, "grad_norm": 4.553422451019287, "learning_rate": 0.001, "loss": 1.5951, "step": 242200 }, { "epoch": 78.31286360698125, "grad_norm": 1.6084586381912231, "learning_rate": 0.001, "loss": 1.5895, "step": 242300 }, { "epoch": 78.34518422753717, "grad_norm": 1.5105477571487427, "learning_rate": 0.001, "loss": 1.6008, "step": 242400 }, { "epoch": 78.37750484809308, "grad_norm": 1.2176487445831299, "learning_rate": 0.001, "loss": 1.6143, "step": 242500 }, { "epoch": 78.409825468649, "grad_norm": 1.632798671722412, "learning_rate": 0.001, "loss": 1.5969, "step": 242600 }, { "epoch": 78.44214608920491, "grad_norm": 1.1538586616516113, "learning_rate": 0.001, "loss": 1.6028, "step": 242700 }, { "epoch": 78.47446670976083, "grad_norm": 1.3611412048339844, "learning_rate": 0.001, "loss": 1.6102, "step": 242800 }, { "epoch": 78.50678733031674, "grad_norm": 1.2141348123550415, "learning_rate": 0.001, "loss": 1.5974, "step": 242900 }, { "epoch": 78.53910795087266, "grad_norm": 0.9487605094909668, "learning_rate": 0.001, "loss": 1.6311, "step": 243000 }, { "epoch": 78.57142857142857, "grad_norm": 1.5475208759307861, "learning_rate": 0.001, "loss": 1.6249, "step": 243100 }, { "epoch": 78.60374919198449, "grad_norm": 1.3245047330856323, "learning_rate": 0.001, "loss": 1.6246, "step": 243200 }, { "epoch": 78.6360698125404, "grad_norm": 1.3520212173461914, "learning_rate": 0.001, "loss": 1.6301, "step": 243300 }, { "epoch": 78.66839043309632, "grad_norm": 1.177619218826294, "learning_rate": 0.001, "loss": 1.6324, "step": 243400 }, { "epoch": 78.70071105365223, "grad_norm": 1.1308724880218506, "learning_rate": 0.001, "loss": 1.6408, "step": 243500 }, { "epoch": 78.73303167420815, "grad_norm": 1.2490086555480957, "learning_rate": 0.001, "loss": 1.6472, "step": 243600 }, { "epoch": 78.76535229476406, "grad_norm": 1.8594279289245605, "learning_rate": 0.001, "loss": 1.6386, "step": 243700 }, { "epoch": 78.79767291531998, "grad_norm": 1.3722211122512817, "learning_rate": 0.001, "loss": 1.6473, "step": 243800 }, { "epoch": 78.82999353587589, "grad_norm": 1.6551728248596191, "learning_rate": 0.001, "loss": 1.6512, "step": 243900 }, { "epoch": 78.86231415643181, "grad_norm": 1.1714541912078857, "learning_rate": 0.001, "loss": 1.6653, "step": 244000 }, { "epoch": 78.89463477698771, "grad_norm": 1.7687879800796509, "learning_rate": 0.001, "loss": 1.6416, "step": 244100 }, { "epoch": 78.92695539754364, "grad_norm": 1.8298914432525635, "learning_rate": 0.001, "loss": 1.646, "step": 244200 }, { "epoch": 78.95927601809954, "grad_norm": 1.2797116041183472, "learning_rate": 0.001, "loss": 1.6708, "step": 244300 }, { "epoch": 78.99159663865547, "grad_norm": 1.3034850358963013, "learning_rate": 0.001, "loss": 1.6526, "step": 244400 }, { "epoch": 79.02391725921137, "grad_norm": 1.067244291305542, "learning_rate": 0.001, "loss": 1.5846, "step": 244500 }, { "epoch": 79.0562378797673, "grad_norm": 1.2020647525787354, "learning_rate": 0.001, "loss": 1.5449, "step": 244600 }, { "epoch": 79.0885585003232, "grad_norm": 13.07027530670166, "learning_rate": 0.001, "loss": 1.5269, "step": 244700 }, { "epoch": 79.12087912087912, "grad_norm": 4.447683811187744, "learning_rate": 0.001, "loss": 1.5564, "step": 244800 }, { "epoch": 79.15319974143503, "grad_norm": 28.382280349731445, "learning_rate": 0.001, "loss": 1.5777, "step": 244900 }, { "epoch": 79.18552036199095, "grad_norm": 3.7045793533325195, "learning_rate": 0.001, "loss": 1.5819, "step": 245000 }, { "epoch": 79.21784098254686, "grad_norm": 1.0404257774353027, "learning_rate": 0.001, "loss": 1.5954, "step": 245100 }, { "epoch": 79.25016160310278, "grad_norm": 1.217283010482788, "learning_rate": 0.001, "loss": 1.5791, "step": 245200 }, { "epoch": 79.28248222365869, "grad_norm": 9.760650634765625, "learning_rate": 0.001, "loss": 1.585, "step": 245300 }, { "epoch": 79.31480284421461, "grad_norm": 1.4118096828460693, "learning_rate": 0.001, "loss": 1.5806, "step": 245400 }, { "epoch": 79.34712346477052, "grad_norm": 1.0972450971603394, "learning_rate": 0.001, "loss": 1.5889, "step": 245500 }, { "epoch": 79.37944408532644, "grad_norm": 1.2753347158432007, "learning_rate": 0.001, "loss": 1.5862, "step": 245600 }, { "epoch": 79.41176470588235, "grad_norm": 2.0151355266571045, "learning_rate": 0.001, "loss": 1.6046, "step": 245700 }, { "epoch": 79.44408532643827, "grad_norm": 7.013357639312744, "learning_rate": 0.001, "loss": 1.6063, "step": 245800 }, { "epoch": 79.47640594699418, "grad_norm": 1.5569343566894531, "learning_rate": 0.001, "loss": 1.6039, "step": 245900 }, { "epoch": 79.5087265675501, "grad_norm": 1.5924434661865234, "learning_rate": 0.001, "loss": 1.5983, "step": 246000 }, { "epoch": 79.541047188106, "grad_norm": 5.646777629852295, "learning_rate": 0.001, "loss": 1.6113, "step": 246100 }, { "epoch": 79.57336780866193, "grad_norm": 1.4677931070327759, "learning_rate": 0.001, "loss": 1.6169, "step": 246200 }, { "epoch": 79.60568842921784, "grad_norm": 1.2087472677230835, "learning_rate": 0.001, "loss": 1.6274, "step": 246300 }, { "epoch": 79.63800904977376, "grad_norm": 1.1089204549789429, "learning_rate": 0.001, "loss": 1.629, "step": 246400 }, { "epoch": 79.67032967032966, "grad_norm": 1.6383118629455566, "learning_rate": 0.001, "loss": 1.622, "step": 246500 }, { "epoch": 79.70265029088559, "grad_norm": 1.0716074705123901, "learning_rate": 0.001, "loss": 1.6227, "step": 246600 }, { "epoch": 79.7349709114415, "grad_norm": 1.0975455045700073, "learning_rate": 0.001, "loss": 1.6217, "step": 246700 }, { "epoch": 79.76729153199742, "grad_norm": 1.2421058416366577, "learning_rate": 0.001, "loss": 1.618, "step": 246800 }, { "epoch": 79.79961215255332, "grad_norm": 1.091660499572754, "learning_rate": 0.001, "loss": 1.6506, "step": 246900 }, { "epoch": 79.83193277310924, "grad_norm": 1.3408653736114502, "learning_rate": 0.001, "loss": 1.6432, "step": 247000 }, { "epoch": 79.86425339366515, "grad_norm": 1.313957691192627, "learning_rate": 0.001, "loss": 1.6442, "step": 247100 }, { "epoch": 79.89657401422107, "grad_norm": 1.483573317527771, "learning_rate": 0.001, "loss": 1.6531, "step": 247200 }, { "epoch": 79.92889463477698, "grad_norm": 8.41736888885498, "learning_rate": 0.001, "loss": 1.6552, "step": 247300 }, { "epoch": 79.9612152553329, "grad_norm": 1.258069396018982, "learning_rate": 0.001, "loss": 1.6651, "step": 247400 }, { "epoch": 79.99353587588882, "grad_norm": 1.2524051666259766, "learning_rate": 0.001, "loss": 1.6617, "step": 247500 }, { "epoch": 80.02585649644473, "grad_norm": 1.1321851015090942, "learning_rate": 0.001, "loss": 1.5642, "step": 247600 }, { "epoch": 80.05817711700065, "grad_norm": 15.498127937316895, "learning_rate": 0.001, "loss": 1.5431, "step": 247700 }, { "epoch": 80.09049773755656, "grad_norm": 1.25615656375885, "learning_rate": 0.001, "loss": 1.5443, "step": 247800 }, { "epoch": 80.12281835811248, "grad_norm": 1.4304814338684082, "learning_rate": 0.001, "loss": 1.5569, "step": 247900 }, { "epoch": 80.15513897866839, "grad_norm": 1.6404039859771729, "learning_rate": 0.001, "loss": 1.5647, "step": 248000 }, { "epoch": 80.18745959922431, "grad_norm": 1.1385866403579712, "learning_rate": 0.001, "loss": 1.5595, "step": 248100 }, { "epoch": 80.21978021978022, "grad_norm": 1.2329909801483154, "learning_rate": 0.001, "loss": 1.5797, "step": 248200 }, { "epoch": 80.25210084033614, "grad_norm": 1.29256010055542, "learning_rate": 0.001, "loss": 1.5727, "step": 248300 }, { "epoch": 80.28442146089205, "grad_norm": 1.2657992839813232, "learning_rate": 0.001, "loss": 1.597, "step": 248400 }, { "epoch": 80.31674208144797, "grad_norm": 1.3083691596984863, "learning_rate": 0.001, "loss": 1.5756, "step": 248500 }, { "epoch": 80.34906270200388, "grad_norm": 3.7851808071136475, "learning_rate": 0.001, "loss": 1.5913, "step": 248600 }, { "epoch": 80.3813833225598, "grad_norm": 1.15293288230896, "learning_rate": 0.001, "loss": 1.6001, "step": 248700 }, { "epoch": 80.4137039431157, "grad_norm": 1.7484172582626343, "learning_rate": 0.001, "loss": 1.6007, "step": 248800 }, { "epoch": 80.44602456367163, "grad_norm": 1.6484850645065308, "learning_rate": 0.001, "loss": 1.5915, "step": 248900 }, { "epoch": 80.47834518422754, "grad_norm": 2.8596177101135254, "learning_rate": 0.001, "loss": 1.598, "step": 249000 }, { "epoch": 80.51066580478346, "grad_norm": 3.3007023334503174, "learning_rate": 0.001, "loss": 1.5949, "step": 249100 }, { "epoch": 80.54298642533936, "grad_norm": 1.4551092386245728, "learning_rate": 0.001, "loss": 1.5991, "step": 249200 }, { "epoch": 80.57530704589529, "grad_norm": 1.7301362752914429, "learning_rate": 0.001, "loss": 1.6142, "step": 249300 }, { "epoch": 80.6076276664512, "grad_norm": 3.4815661907196045, "learning_rate": 0.001, "loss": 1.6271, "step": 249400 }, { "epoch": 80.63994828700712, "grad_norm": 2.5372705459594727, "learning_rate": 0.001, "loss": 1.6088, "step": 249500 }, { "epoch": 80.67226890756302, "grad_norm": 2.65199875831604, "learning_rate": 0.001, "loss": 1.6201, "step": 249600 }, { "epoch": 80.70458952811894, "grad_norm": 2.5470521450042725, "learning_rate": 0.001, "loss": 1.6071, "step": 249700 }, { "epoch": 80.73691014867485, "grad_norm": 1.7027024030685425, "learning_rate": 0.001, "loss": 1.6282, "step": 249800 }, { "epoch": 80.76923076923077, "grad_norm": 1.9036833047866821, "learning_rate": 0.001, "loss": 1.6219, "step": 249900 }, { "epoch": 80.80155138978668, "grad_norm": 1.9514585733413696, "learning_rate": 0.001, "loss": 1.6261, "step": 250000 }, { "epoch": 80.8338720103426, "grad_norm": 1.1746447086334229, "learning_rate": 0.001, "loss": 1.6337, "step": 250100 }, { "epoch": 80.86619263089851, "grad_norm": 1.421815276145935, "learning_rate": 0.001, "loss": 1.6421, "step": 250200 }, { "epoch": 80.89851325145443, "grad_norm": 1.3356549739837646, "learning_rate": 0.001, "loss": 1.6543, "step": 250300 }, { "epoch": 80.93083387201034, "grad_norm": 2.1185178756713867, "learning_rate": 0.001, "loss": 1.6495, "step": 250400 }, { "epoch": 80.96315449256626, "grad_norm": 1.6299835443496704, "learning_rate": 0.001, "loss": 1.6538, "step": 250500 }, { "epoch": 80.99547511312217, "grad_norm": 1.219862461090088, "learning_rate": 0.001, "loss": 1.6615, "step": 250600 }, { "epoch": 81.02779573367809, "grad_norm": 1.0790340900421143, "learning_rate": 0.001, "loss": 1.548, "step": 250700 }, { "epoch": 81.060116354234, "grad_norm": 2.312631368637085, "learning_rate": 0.001, "loss": 1.5601, "step": 250800 }, { "epoch": 81.09243697478992, "grad_norm": 5.61232852935791, "learning_rate": 0.001, "loss": 1.5443, "step": 250900 }, { "epoch": 81.12475759534583, "grad_norm": 1.2224292755126953, "learning_rate": 0.001, "loss": 1.5576, "step": 251000 }, { "epoch": 81.15707821590175, "grad_norm": 3.8390119075775146, "learning_rate": 0.001, "loss": 1.5608, "step": 251100 }, { "epoch": 81.18939883645766, "grad_norm": 1.3383690118789673, "learning_rate": 0.001, "loss": 1.5706, "step": 251200 }, { "epoch": 81.22171945701358, "grad_norm": 1.3198847770690918, "learning_rate": 0.001, "loss": 1.5538, "step": 251300 }, { "epoch": 81.25404007756948, "grad_norm": 1.3999378681182861, "learning_rate": 0.001, "loss": 1.5591, "step": 251400 }, { "epoch": 81.2863606981254, "grad_norm": 1.3665369749069214, "learning_rate": 0.001, "loss": 1.5957, "step": 251500 }, { "epoch": 81.31868131868131, "grad_norm": 1.3465888500213623, "learning_rate": 0.001, "loss": 1.5889, "step": 251600 }, { "epoch": 81.35100193923724, "grad_norm": 1.778459072113037, "learning_rate": 0.001, "loss": 1.5934, "step": 251700 }, { "epoch": 81.38332255979314, "grad_norm": 1.2961868047714233, "learning_rate": 0.001, "loss": 1.5878, "step": 251800 }, { "epoch": 81.41564318034906, "grad_norm": 1.9177812337875366, "learning_rate": 0.001, "loss": 1.5731, "step": 251900 }, { "epoch": 81.44796380090497, "grad_norm": 1.4644643068313599, "learning_rate": 0.001, "loss": 1.6133, "step": 252000 }, { "epoch": 81.4802844214609, "grad_norm": 2.4014549255371094, "learning_rate": 0.001, "loss": 1.6087, "step": 252100 }, { "epoch": 81.5126050420168, "grad_norm": 2.424337387084961, "learning_rate": 0.001, "loss": 1.6089, "step": 252200 }, { "epoch": 81.54492566257272, "grad_norm": 2.41424822807312, "learning_rate": 0.001, "loss": 1.6145, "step": 252300 }, { "epoch": 81.57724628312863, "grad_norm": 1.4580813646316528, "learning_rate": 0.001, "loss": 1.6301, "step": 252400 }, { "epoch": 81.60956690368455, "grad_norm": 1.5222786664962769, "learning_rate": 0.001, "loss": 1.6302, "step": 252500 }, { "epoch": 81.64188752424046, "grad_norm": 2.365048408508301, "learning_rate": 0.001, "loss": 1.6083, "step": 252600 }, { "epoch": 81.67420814479638, "grad_norm": 2.286128044128418, "learning_rate": 0.001, "loss": 1.6209, "step": 252700 }, { "epoch": 81.70652876535229, "grad_norm": 2.4110453128814697, "learning_rate": 0.001, "loss": 1.6246, "step": 252800 }, { "epoch": 81.73884938590821, "grad_norm": 1.415271520614624, "learning_rate": 0.001, "loss": 1.6356, "step": 252900 }, { "epoch": 81.77117000646412, "grad_norm": 1.7912102937698364, "learning_rate": 0.001, "loss": 1.6314, "step": 253000 }, { "epoch": 81.80349062702004, "grad_norm": 2.1513283252716064, "learning_rate": 0.001, "loss": 1.6585, "step": 253100 }, { "epoch": 81.83581124757595, "grad_norm": 1.3145391941070557, "learning_rate": 0.001, "loss": 1.6549, "step": 253200 }, { "epoch": 81.86813186813187, "grad_norm": 1.5342000722885132, "learning_rate": 0.001, "loss": 1.6504, "step": 253300 }, { "epoch": 81.90045248868778, "grad_norm": 1.8358556032180786, "learning_rate": 0.001, "loss": 1.642, "step": 253400 }, { "epoch": 81.9327731092437, "grad_norm": 1.547221064567566, "learning_rate": 0.001, "loss": 1.6757, "step": 253500 }, { "epoch": 81.9650937297996, "grad_norm": 1.3905847072601318, "learning_rate": 0.001, "loss": 1.6645, "step": 253600 }, { "epoch": 81.99741435035553, "grad_norm": 1.6543893814086914, "learning_rate": 0.001, "loss": 1.6539, "step": 253700 }, { "epoch": 82.02973497091145, "grad_norm": 1.3796234130859375, "learning_rate": 0.001, "loss": 1.5526, "step": 253800 }, { "epoch": 82.06205559146736, "grad_norm": 2.4896278381347656, "learning_rate": 0.001, "loss": 1.5428, "step": 253900 }, { "epoch": 82.09437621202328, "grad_norm": 1.3422685861587524, "learning_rate": 0.001, "loss": 1.558, "step": 254000 }, { "epoch": 82.12669683257919, "grad_norm": 1.805012583732605, "learning_rate": 0.001, "loss": 1.5475, "step": 254100 }, { "epoch": 82.1590174531351, "grad_norm": 2.8302295207977295, "learning_rate": 0.001, "loss": 1.5774, "step": 254200 }, { "epoch": 82.19133807369101, "grad_norm": 1.5857865810394287, "learning_rate": 0.001, "loss": 1.5755, "step": 254300 }, { "epoch": 82.22365869424694, "grad_norm": 1.4486957788467407, "learning_rate": 0.001, "loss": 1.5748, "step": 254400 }, { "epoch": 82.25597931480284, "grad_norm": 1.4831637144088745, "learning_rate": 0.001, "loss": 1.5714, "step": 254500 }, { "epoch": 82.28829993535876, "grad_norm": 1.4155056476593018, "learning_rate": 0.001, "loss": 1.5776, "step": 254600 }, { "epoch": 82.32062055591467, "grad_norm": 2.597184181213379, "learning_rate": 0.001, "loss": 1.5847, "step": 254700 }, { "epoch": 82.3529411764706, "grad_norm": 1.5585006475448608, "learning_rate": 0.001, "loss": 1.5985, "step": 254800 }, { "epoch": 82.3852617970265, "grad_norm": 8.193127632141113, "learning_rate": 0.001, "loss": 1.6003, "step": 254900 }, { "epoch": 82.41758241758242, "grad_norm": 1.3331634998321533, "learning_rate": 0.001, "loss": 1.5867, "step": 255000 }, { "epoch": 82.44990303813833, "grad_norm": 2.130753755569458, "learning_rate": 0.001, "loss": 1.579, "step": 255100 }, { "epoch": 82.48222365869425, "grad_norm": 1.3062959909439087, "learning_rate": 0.001, "loss": 1.591, "step": 255200 }, { "epoch": 82.51454427925016, "grad_norm": 1.4880868196487427, "learning_rate": 0.001, "loss": 1.6037, "step": 255300 }, { "epoch": 82.54686489980608, "grad_norm": 1.628580093383789, "learning_rate": 0.001, "loss": 1.6135, "step": 255400 }, { "epoch": 82.57918552036199, "grad_norm": 16.428266525268555, "learning_rate": 0.001, "loss": 1.6053, "step": 255500 }, { "epoch": 82.61150614091791, "grad_norm": 1.4747267961502075, "learning_rate": 0.001, "loss": 1.6248, "step": 255600 }, { "epoch": 82.64382676147382, "grad_norm": 2.0591182708740234, "learning_rate": 0.001, "loss": 1.6151, "step": 255700 }, { "epoch": 82.67614738202974, "grad_norm": 2.2506515979766846, "learning_rate": 0.001, "loss": 1.6258, "step": 255800 }, { "epoch": 82.70846800258565, "grad_norm": 1.3912527561187744, "learning_rate": 0.001, "loss": 1.644, "step": 255900 }, { "epoch": 82.74078862314157, "grad_norm": 1.3549816608428955, "learning_rate": 0.001, "loss": 1.6287, "step": 256000 }, { "epoch": 82.77310924369748, "grad_norm": 1.2930629253387451, "learning_rate": 0.001, "loss": 1.6343, "step": 256100 }, { "epoch": 82.8054298642534, "grad_norm": 1.5010454654693604, "learning_rate": 0.001, "loss": 1.628, "step": 256200 }, { "epoch": 82.8377504848093, "grad_norm": 1.7856627702713013, "learning_rate": 0.001, "loss": 1.6389, "step": 256300 }, { "epoch": 82.87007110536523, "grad_norm": 1.5400707721710205, "learning_rate": 0.001, "loss": 1.6428, "step": 256400 }, { "epoch": 82.90239172592113, "grad_norm": 1.4425150156021118, "learning_rate": 0.001, "loss": 1.6589, "step": 256500 }, { "epoch": 82.93471234647706, "grad_norm": 1.5632411241531372, "learning_rate": 0.001, "loss": 1.6427, "step": 256600 }, { "epoch": 82.96703296703296, "grad_norm": 1.5587055683135986, "learning_rate": 0.001, "loss": 1.6477, "step": 256700 }, { "epoch": 82.99935358758889, "grad_norm": 2.5419251918792725, "learning_rate": 0.001, "loss": 1.6311, "step": 256800 }, { "epoch": 83.03167420814479, "grad_norm": 2.4348669052124023, "learning_rate": 0.001, "loss": 1.527, "step": 256900 }, { "epoch": 83.06399482870071, "grad_norm": 1.5489720106124878, "learning_rate": 0.001, "loss": 1.536, "step": 257000 }, { "epoch": 83.09631544925662, "grad_norm": 1.732118010520935, "learning_rate": 0.001, "loss": 1.5475, "step": 257100 }, { "epoch": 83.12863606981254, "grad_norm": 1.5908674001693726, "learning_rate": 0.001, "loss": 1.5496, "step": 257200 }, { "epoch": 83.16095669036845, "grad_norm": 1.878757357597351, "learning_rate": 0.001, "loss": 1.5567, "step": 257300 }, { "epoch": 83.19327731092437, "grad_norm": 2.1242942810058594, "learning_rate": 0.001, "loss": 1.5516, "step": 257400 }, { "epoch": 83.22559793148028, "grad_norm": 1.9912755489349365, "learning_rate": 0.001, "loss": 1.5564, "step": 257500 }, { "epoch": 83.2579185520362, "grad_norm": 1.6771790981292725, "learning_rate": 0.001, "loss": 1.5671, "step": 257600 }, { "epoch": 83.29023917259211, "grad_norm": 1.4958374500274658, "learning_rate": 0.001, "loss": 1.5742, "step": 257700 }, { "epoch": 83.32255979314803, "grad_norm": 3.4434475898742676, "learning_rate": 0.001, "loss": 1.5674, "step": 257800 }, { "epoch": 83.35488041370394, "grad_norm": 1.4015775918960571, "learning_rate": 0.001, "loss": 1.5732, "step": 257900 }, { "epoch": 83.38720103425986, "grad_norm": 9.781081199645996, "learning_rate": 0.001, "loss": 1.5709, "step": 258000 }, { "epoch": 83.41952165481577, "grad_norm": 2.189282178878784, "learning_rate": 0.001, "loss": 1.573, "step": 258100 }, { "epoch": 83.45184227537169, "grad_norm": 2.8172805309295654, "learning_rate": 0.001, "loss": 1.5836, "step": 258200 }, { "epoch": 83.4841628959276, "grad_norm": 9.957198143005371, "learning_rate": 0.001, "loss": 1.5949, "step": 258300 }, { "epoch": 83.51648351648352, "grad_norm": 2.227811813354492, "learning_rate": 0.001, "loss": 1.5982, "step": 258400 }, { "epoch": 83.54880413703943, "grad_norm": 2.4435055255889893, "learning_rate": 0.001, "loss": 1.5878, "step": 258500 }, { "epoch": 83.58112475759535, "grad_norm": 2.036557674407959, "learning_rate": 0.001, "loss": 1.6022, "step": 258600 }, { "epoch": 83.61344537815125, "grad_norm": 1.511379599571228, "learning_rate": 0.001, "loss": 1.6237, "step": 258700 }, { "epoch": 83.64576599870718, "grad_norm": 1.346367597579956, "learning_rate": 0.001, "loss": 1.6182, "step": 258800 }, { "epoch": 83.67808661926308, "grad_norm": 2.0597476959228516, "learning_rate": 0.001, "loss": 1.6073, "step": 258900 }, { "epoch": 83.710407239819, "grad_norm": 2.1488609313964844, "learning_rate": 0.001, "loss": 1.613, "step": 259000 }, { "epoch": 83.74272786037491, "grad_norm": 2.5246944427490234, "learning_rate": 0.001, "loss": 1.6281, "step": 259100 }, { "epoch": 83.77504848093083, "grad_norm": 1.966814637184143, "learning_rate": 0.001, "loss": 1.6153, "step": 259200 }, { "epoch": 83.80736910148674, "grad_norm": 2.2585253715515137, "learning_rate": 0.001, "loss": 1.6314, "step": 259300 }, { "epoch": 83.83968972204266, "grad_norm": 1.4614840745925903, "learning_rate": 0.001, "loss": 1.633, "step": 259400 }, { "epoch": 83.87201034259857, "grad_norm": 1.4048881530761719, "learning_rate": 0.001, "loss": 1.6323, "step": 259500 }, { "epoch": 83.9043309631545, "grad_norm": 7.812989234924316, "learning_rate": 0.001, "loss": 1.6429, "step": 259600 }, { "epoch": 83.9366515837104, "grad_norm": 1.3376606702804565, "learning_rate": 0.001, "loss": 1.6485, "step": 259700 }, { "epoch": 83.96897220426632, "grad_norm": 2.1219332218170166, "learning_rate": 0.001, "loss": 1.6646, "step": 259800 }, { "epoch": 84.00129282482224, "grad_norm": 1.7400177717208862, "learning_rate": 0.001, "loss": 1.6335, "step": 259900 }, { "epoch": 84.03361344537815, "grad_norm": 1.9422446489334106, "learning_rate": 0.001, "loss": 1.5242, "step": 260000 }, { "epoch": 84.06593406593407, "grad_norm": 1.347244143486023, "learning_rate": 0.001, "loss": 1.5399, "step": 260100 }, { "epoch": 84.09825468648998, "grad_norm": 4.632789134979248, "learning_rate": 0.001, "loss": 1.5131, "step": 260200 }, { "epoch": 84.1305753070459, "grad_norm": 1.5854743719100952, "learning_rate": 0.001, "loss": 1.553, "step": 260300 }, { "epoch": 84.16289592760181, "grad_norm": 1.5655384063720703, "learning_rate": 0.001, "loss": 1.5437, "step": 260400 }, { "epoch": 84.19521654815773, "grad_norm": 1.5425963401794434, "learning_rate": 0.001, "loss": 1.5542, "step": 260500 }, { "epoch": 84.22753716871364, "grad_norm": 1.953409194946289, "learning_rate": 0.001, "loss": 1.5558, "step": 260600 }, { "epoch": 84.25985778926956, "grad_norm": 2.21527361869812, "learning_rate": 0.001, "loss": 1.5577, "step": 260700 }, { "epoch": 84.29217840982547, "grad_norm": 4.152698516845703, "learning_rate": 0.001, "loss": 1.5494, "step": 260800 }, { "epoch": 84.32449903038139, "grad_norm": 1.4578876495361328, "learning_rate": 0.001, "loss": 1.5619, "step": 260900 }, { "epoch": 84.3568196509373, "grad_norm": 3.5490121841430664, "learning_rate": 0.001, "loss": 1.5835, "step": 261000 }, { "epoch": 84.38914027149322, "grad_norm": 1.3849040269851685, "learning_rate": 0.001, "loss": 1.5581, "step": 261100 }, { "epoch": 84.42146089204913, "grad_norm": 2.739593029022217, "learning_rate": 0.001, "loss": 1.5893, "step": 261200 }, { "epoch": 84.45378151260505, "grad_norm": 1.5605545043945312, "learning_rate": 0.001, "loss": 1.572, "step": 261300 }, { "epoch": 84.48610213316095, "grad_norm": 1.4092210531234741, "learning_rate": 0.001, "loss": 1.5798, "step": 261400 }, { "epoch": 84.51842275371688, "grad_norm": 2.9716763496398926, "learning_rate": 0.001, "loss": 1.5883, "step": 261500 }, { "epoch": 84.55074337427278, "grad_norm": 2.138232946395874, "learning_rate": 0.001, "loss": 1.6016, "step": 261600 }, { "epoch": 84.5830639948287, "grad_norm": 1.201174259185791, "learning_rate": 0.001, "loss": 1.6048, "step": 261700 }, { "epoch": 84.61538461538461, "grad_norm": 1.3958754539489746, "learning_rate": 0.001, "loss": 1.5905, "step": 261800 }, { "epoch": 84.64770523594053, "grad_norm": 1.3445004224777222, "learning_rate": 0.001, "loss": 1.5849, "step": 261900 }, { "epoch": 84.68002585649644, "grad_norm": 1.4751265048980713, "learning_rate": 0.001, "loss": 1.5975, "step": 262000 }, { "epoch": 84.71234647705236, "grad_norm": 1.3639497756958008, "learning_rate": 0.001, "loss": 1.6085, "step": 262100 }, { "epoch": 84.74466709760827, "grad_norm": 2.3812975883483887, "learning_rate": 0.001, "loss": 1.5979, "step": 262200 }, { "epoch": 84.7769877181642, "grad_norm": 1.2370624542236328, "learning_rate": 0.001, "loss": 1.6158, "step": 262300 }, { "epoch": 84.8093083387201, "grad_norm": 1.1955881118774414, "learning_rate": 0.001, "loss": 1.6258, "step": 262400 }, { "epoch": 84.84162895927602, "grad_norm": 1.1861573457717896, "learning_rate": 0.001, "loss": 1.6166, "step": 262500 }, { "epoch": 84.87394957983193, "grad_norm": 1.4542728662490845, "learning_rate": 0.001, "loss": 1.6238, "step": 262600 }, { "epoch": 84.90627020038785, "grad_norm": 1.4606815576553345, "learning_rate": 0.001, "loss": 1.6262, "step": 262700 }, { "epoch": 84.93859082094376, "grad_norm": 1.5385528802871704, "learning_rate": 0.001, "loss": 1.636, "step": 262800 }, { "epoch": 84.97091144149968, "grad_norm": 1.3929976224899292, "learning_rate": 0.001, "loss": 1.6428, "step": 262900 }, { "epoch": 85.00323206205559, "grad_norm": 1.0263078212738037, "learning_rate": 0.001, "loss": 1.6313, "step": 263000 }, { "epoch": 85.03555268261151, "grad_norm": 1.1972743272781372, "learning_rate": 0.001, "loss": 1.5096, "step": 263100 }, { "epoch": 85.06787330316742, "grad_norm": 1.4424039125442505, "learning_rate": 0.001, "loss": 1.521, "step": 263200 }, { "epoch": 85.10019392372334, "grad_norm": 2.704977512359619, "learning_rate": 0.001, "loss": 1.5292, "step": 263300 }, { "epoch": 85.13251454427925, "grad_norm": 1.576430320739746, "learning_rate": 0.001, "loss": 1.5419, "step": 263400 }, { "epoch": 85.16483516483517, "grad_norm": 1.650712013244629, "learning_rate": 0.001, "loss": 1.5374, "step": 263500 }, { "epoch": 85.19715578539108, "grad_norm": 1.4409042596817017, "learning_rate": 0.001, "loss": 1.5368, "step": 263600 }, { "epoch": 85.229476405947, "grad_norm": 2.5067646503448486, "learning_rate": 0.001, "loss": 1.5361, "step": 263700 }, { "epoch": 85.2617970265029, "grad_norm": 1.4953469038009644, "learning_rate": 0.001, "loss": 1.549, "step": 263800 }, { "epoch": 85.29411764705883, "grad_norm": 1.273050308227539, "learning_rate": 0.001, "loss": 1.5448, "step": 263900 }, { "epoch": 85.32643826761473, "grad_norm": 2.0424065589904785, "learning_rate": 0.001, "loss": 1.5592, "step": 264000 }, { "epoch": 85.35875888817066, "grad_norm": 3.479045867919922, "learning_rate": 0.001, "loss": 1.5732, "step": 264100 }, { "epoch": 85.39107950872656, "grad_norm": 1.223633050918579, "learning_rate": 0.001, "loss": 1.5574, "step": 264200 }, { "epoch": 85.42340012928248, "grad_norm": 1.2373672723770142, "learning_rate": 0.001, "loss": 1.5717, "step": 264300 }, { "epoch": 85.45572074983839, "grad_norm": 1.027571201324463, "learning_rate": 0.001, "loss": 1.5674, "step": 264400 }, { "epoch": 85.48804137039431, "grad_norm": 1.2528064250946045, "learning_rate": 0.001, "loss": 1.5803, "step": 264500 }, { "epoch": 85.52036199095022, "grad_norm": 1.0824642181396484, "learning_rate": 0.001, "loss": 1.5687, "step": 264600 }, { "epoch": 85.55268261150614, "grad_norm": 1.1377485990524292, "learning_rate": 0.001, "loss": 1.5951, "step": 264700 }, { "epoch": 85.58500323206205, "grad_norm": 1.507084608078003, "learning_rate": 0.001, "loss": 1.5845, "step": 264800 }, { "epoch": 85.61732385261797, "grad_norm": 1.3310047388076782, "learning_rate": 0.001, "loss": 1.5956, "step": 264900 }, { "epoch": 85.64964447317388, "grad_norm": 1.16881263256073, "learning_rate": 0.001, "loss": 1.6, "step": 265000 }, { "epoch": 85.6819650937298, "grad_norm": 1.2248928546905518, "learning_rate": 0.001, "loss": 1.5866, "step": 265100 }, { "epoch": 85.71428571428571, "grad_norm": 1.2413365840911865, "learning_rate": 0.001, "loss": 1.6074, "step": 265200 }, { "epoch": 85.74660633484163, "grad_norm": 1.9377590417861938, "learning_rate": 0.001, "loss": 1.5962, "step": 265300 }, { "epoch": 85.77892695539754, "grad_norm": 1.4636646509170532, "learning_rate": 0.001, "loss": 1.5984, "step": 265400 }, { "epoch": 85.81124757595346, "grad_norm": 1.6694648265838623, "learning_rate": 0.001, "loss": 1.6261, "step": 265500 }, { "epoch": 85.84356819650937, "grad_norm": 1.4691592454910278, "learning_rate": 0.001, "loss": 1.5949, "step": 265600 }, { "epoch": 85.87588881706529, "grad_norm": 1.880424976348877, "learning_rate": 0.001, "loss": 1.6072, "step": 265700 }, { "epoch": 85.9082094376212, "grad_norm": 1.4752488136291504, "learning_rate": 0.001, "loss": 1.6148, "step": 265800 }, { "epoch": 85.94053005817712, "grad_norm": 1.400773286819458, "learning_rate": 0.001, "loss": 1.6246, "step": 265900 }, { "epoch": 85.97285067873302, "grad_norm": 1.2714358568191528, "learning_rate": 0.001, "loss": 1.6158, "step": 266000 }, { "epoch": 86.00517129928895, "grad_norm": 1.2563971281051636, "learning_rate": 0.001, "loss": 1.6169, "step": 266100 }, { "epoch": 86.03749191984487, "grad_norm": 1.088991641998291, "learning_rate": 0.001, "loss": 1.5164, "step": 266200 }, { "epoch": 86.06981254040078, "grad_norm": 1.339421033859253, "learning_rate": 0.001, "loss": 1.5305, "step": 266300 }, { "epoch": 86.1021331609567, "grad_norm": 3.483553647994995, "learning_rate": 0.001, "loss": 1.5259, "step": 266400 }, { "epoch": 86.1344537815126, "grad_norm": 1.8737198114395142, "learning_rate": 0.001, "loss": 1.5405, "step": 266500 }, { "epoch": 86.16677440206853, "grad_norm": 1.6179026365280151, "learning_rate": 0.001, "loss": 1.5359, "step": 266600 }, { "epoch": 86.19909502262443, "grad_norm": 1.313471794128418, "learning_rate": 0.001, "loss": 1.532, "step": 266700 }, { "epoch": 86.23141564318036, "grad_norm": 1.0954697132110596, "learning_rate": 0.001, "loss": 1.5525, "step": 266800 }, { "epoch": 86.26373626373626, "grad_norm": 1.0163735151290894, "learning_rate": 0.001, "loss": 1.5575, "step": 266900 }, { "epoch": 86.29605688429218, "grad_norm": 1.2303900718688965, "learning_rate": 0.001, "loss": 1.5569, "step": 267000 }, { "epoch": 86.32837750484809, "grad_norm": 1.2684378623962402, "learning_rate": 0.001, "loss": 1.5513, "step": 267100 }, { "epoch": 86.36069812540401, "grad_norm": 1.2811442613601685, "learning_rate": 0.001, "loss": 1.5526, "step": 267200 }, { "epoch": 86.39301874595992, "grad_norm": 1.667109727859497, "learning_rate": 0.001, "loss": 1.5724, "step": 267300 }, { "epoch": 86.42533936651584, "grad_norm": 1.1560689210891724, "learning_rate": 0.001, "loss": 1.5681, "step": 267400 }, { "epoch": 86.45765998707175, "grad_norm": 1.6124846935272217, "learning_rate": 0.001, "loss": 1.559, "step": 267500 }, { "epoch": 86.48998060762767, "grad_norm": 14.792082786560059, "learning_rate": 0.001, "loss": 1.5805, "step": 267600 }, { "epoch": 86.52230122818358, "grad_norm": 1.4856150150299072, "learning_rate": 0.001, "loss": 1.581, "step": 267700 }, { "epoch": 86.5546218487395, "grad_norm": 2.700788736343384, "learning_rate": 0.001, "loss": 1.5878, "step": 267800 }, { "epoch": 86.58694246929541, "grad_norm": 2.2360267639160156, "learning_rate": 0.001, "loss": 1.5869, "step": 267900 }, { "epoch": 86.61926308985133, "grad_norm": 1.5913814306259155, "learning_rate": 0.001, "loss": 1.5805, "step": 268000 }, { "epoch": 86.65158371040724, "grad_norm": 1.1521620750427246, "learning_rate": 0.001, "loss": 1.5979, "step": 268100 }, { "epoch": 86.68390433096316, "grad_norm": 1.7014782428741455, "learning_rate": 0.001, "loss": 1.599, "step": 268200 }, { "epoch": 86.71622495151907, "grad_norm": 1.042919635772705, "learning_rate": 0.001, "loss": 1.6032, "step": 268300 }, { "epoch": 86.74854557207499, "grad_norm": 1.1052900552749634, "learning_rate": 0.001, "loss": 1.5957, "step": 268400 }, { "epoch": 86.7808661926309, "grad_norm": 1.3181344270706177, "learning_rate": 0.001, "loss": 1.5895, "step": 268500 }, { "epoch": 86.81318681318682, "grad_norm": 4.746265411376953, "learning_rate": 0.001, "loss": 1.6074, "step": 268600 }, { "epoch": 86.84550743374272, "grad_norm": 1.1402766704559326, "learning_rate": 0.001, "loss": 1.5906, "step": 268700 }, { "epoch": 86.87782805429865, "grad_norm": 1.5800384283065796, "learning_rate": 0.001, "loss": 1.6008, "step": 268800 }, { "epoch": 86.91014867485455, "grad_norm": 1.712095022201538, "learning_rate": 0.001, "loss": 1.6107, "step": 268900 }, { "epoch": 86.94246929541048, "grad_norm": 1.3141100406646729, "learning_rate": 0.001, "loss": 1.6075, "step": 269000 }, { "epoch": 86.97478991596638, "grad_norm": 1.104447841644287, "learning_rate": 0.001, "loss": 1.6219, "step": 269100 }, { "epoch": 87.0071105365223, "grad_norm": 6.549962997436523, "learning_rate": 0.001, "loss": 1.5949, "step": 269200 }, { "epoch": 87.03943115707821, "grad_norm": 1.4453755617141724, "learning_rate": 0.001, "loss": 1.4987, "step": 269300 }, { "epoch": 87.07175177763413, "grad_norm": 1.2086541652679443, "learning_rate": 0.001, "loss": 1.5164, "step": 269400 }, { "epoch": 87.10407239819004, "grad_norm": 1.1379637718200684, "learning_rate": 0.001, "loss": 1.5278, "step": 269500 }, { "epoch": 87.13639301874596, "grad_norm": 1.0296906232833862, "learning_rate": 0.001, "loss": 1.5306, "step": 269600 }, { "epoch": 87.16871363930187, "grad_norm": 1.0902446508407593, "learning_rate": 0.001, "loss": 1.5089, "step": 269700 }, { "epoch": 87.20103425985779, "grad_norm": 1.2932292222976685, "learning_rate": 0.001, "loss": 1.5348, "step": 269800 }, { "epoch": 87.2333548804137, "grad_norm": 1.1966147422790527, "learning_rate": 0.001, "loss": 1.5299, "step": 269900 }, { "epoch": 87.26567550096962, "grad_norm": 3.919849157333374, "learning_rate": 0.001, "loss": 1.5363, "step": 270000 }, { "epoch": 87.29799612152553, "grad_norm": 1.6433371305465698, "learning_rate": 0.001, "loss": 1.5507, "step": 270100 }, { "epoch": 87.33031674208145, "grad_norm": 1.2449594736099243, "learning_rate": 0.001, "loss": 1.5427, "step": 270200 }, { "epoch": 87.36263736263736, "grad_norm": 1.1835765838623047, "learning_rate": 0.001, "loss": 1.5392, "step": 270300 }, { "epoch": 87.39495798319328, "grad_norm": 1.4599188566207886, "learning_rate": 0.001, "loss": 1.5497, "step": 270400 }, { "epoch": 87.42727860374919, "grad_norm": 1.7066702842712402, "learning_rate": 0.001, "loss": 1.5513, "step": 270500 }, { "epoch": 87.45959922430511, "grad_norm": 1.1744284629821777, "learning_rate": 0.001, "loss": 1.5618, "step": 270600 }, { "epoch": 87.49191984486102, "grad_norm": 1.4466147422790527, "learning_rate": 0.001, "loss": 1.5807, "step": 270700 }, { "epoch": 87.52424046541694, "grad_norm": 1.4953688383102417, "learning_rate": 0.001, "loss": 1.5471, "step": 270800 }, { "epoch": 87.55656108597285, "grad_norm": 1.0553315877914429, "learning_rate": 0.001, "loss": 1.5662, "step": 270900 }, { "epoch": 87.58888170652877, "grad_norm": 1.8200896978378296, "learning_rate": 0.001, "loss": 1.5672, "step": 271000 }, { "epoch": 87.62120232708467, "grad_norm": 1.230963110923767, "learning_rate": 0.001, "loss": 1.5764, "step": 271100 }, { "epoch": 87.6535229476406, "grad_norm": 1.8962764739990234, "learning_rate": 0.001, "loss": 1.5653, "step": 271200 }, { "epoch": 87.6858435681965, "grad_norm": 1.1913377046585083, "learning_rate": 0.001, "loss": 1.5857, "step": 271300 }, { "epoch": 87.71816418875243, "grad_norm": 1.2903592586517334, "learning_rate": 0.001, "loss": 1.6013, "step": 271400 }, { "epoch": 87.75048480930833, "grad_norm": 1.0618078708648682, "learning_rate": 0.001, "loss": 1.583, "step": 271500 }, { "epoch": 87.78280542986425, "grad_norm": 6.36294412612915, "learning_rate": 0.001, "loss": 1.5876, "step": 271600 }, { "epoch": 87.81512605042016, "grad_norm": 1.5404661893844604, "learning_rate": 0.001, "loss": 1.5969, "step": 271700 }, { "epoch": 87.84744667097608, "grad_norm": 0.9629808068275452, "learning_rate": 0.001, "loss": 1.6025, "step": 271800 }, { "epoch": 87.87976729153199, "grad_norm": 1.139944314956665, "learning_rate": 0.001, "loss": 1.6166, "step": 271900 }, { "epoch": 87.91208791208791, "grad_norm": 1.3761736154556274, "learning_rate": 0.001, "loss": 1.615, "step": 272000 }, { "epoch": 87.94440853264382, "grad_norm": 1.5524358749389648, "learning_rate": 0.001, "loss": 1.6104, "step": 272100 }, { "epoch": 87.97672915319974, "grad_norm": 1.1004067659378052, "learning_rate": 0.001, "loss": 1.6239, "step": 272200 }, { "epoch": 88.00904977375566, "grad_norm": 1.237992286682129, "learning_rate": 0.001, "loss": 1.58, "step": 272300 }, { "epoch": 88.04137039431157, "grad_norm": 1.20441472530365, "learning_rate": 0.001, "loss": 1.4957, "step": 272400 }, { "epoch": 88.07369101486749, "grad_norm": 1.3444381952285767, "learning_rate": 0.001, "loss": 1.5008, "step": 272500 }, { "epoch": 88.1060116354234, "grad_norm": 2.7051949501037598, "learning_rate": 0.001, "loss": 1.5029, "step": 272600 }, { "epoch": 88.13833225597932, "grad_norm": 1.0657232999801636, "learning_rate": 0.001, "loss": 1.5011, "step": 272700 }, { "epoch": 88.17065287653523, "grad_norm": 1.1717073917388916, "learning_rate": 0.001, "loss": 1.5187, "step": 272800 }, { "epoch": 88.20297349709115, "grad_norm": 1.227364182472229, "learning_rate": 0.001, "loss": 1.5118, "step": 272900 }, { "epoch": 88.23529411764706, "grad_norm": 1.5910488367080688, "learning_rate": 0.001, "loss": 1.5209, "step": 273000 }, { "epoch": 88.26761473820298, "grad_norm": 1.3864065408706665, "learning_rate": 0.001, "loss": 1.5312, "step": 273100 }, { "epoch": 88.29993535875889, "grad_norm": 1.0840407609939575, "learning_rate": 0.001, "loss": 1.5516, "step": 273200 }, { "epoch": 88.33225597931481, "grad_norm": 1.3705962896347046, "learning_rate": 0.001, "loss": 1.549, "step": 273300 }, { "epoch": 88.36457659987072, "grad_norm": 1.1575437784194946, "learning_rate": 0.001, "loss": 1.5433, "step": 273400 }, { "epoch": 88.39689722042664, "grad_norm": 2.5851569175720215, "learning_rate": 0.001, "loss": 1.5568, "step": 273500 }, { "epoch": 88.42921784098255, "grad_norm": 1.192538857460022, "learning_rate": 0.001, "loss": 1.5558, "step": 273600 }, { "epoch": 88.46153846153847, "grad_norm": 1.5158814191818237, "learning_rate": 0.001, "loss": 1.552, "step": 273700 }, { "epoch": 88.49385908209437, "grad_norm": 1.4009217023849487, "learning_rate": 0.001, "loss": 1.5611, "step": 273800 }, { "epoch": 88.5261797026503, "grad_norm": 1.4784737825393677, "learning_rate": 0.001, "loss": 1.5697, "step": 273900 }, { "epoch": 88.5585003232062, "grad_norm": 1.5612084865570068, "learning_rate": 0.001, "loss": 1.5587, "step": 274000 }, { "epoch": 88.59082094376213, "grad_norm": 1.4232614040374756, "learning_rate": 0.001, "loss": 1.5665, "step": 274100 }, { "epoch": 88.62314156431803, "grad_norm": 1.0883997678756714, "learning_rate": 0.001, "loss": 1.5793, "step": 274200 }, { "epoch": 88.65546218487395, "grad_norm": 1.4076792001724243, "learning_rate": 0.001, "loss": 1.5776, "step": 274300 }, { "epoch": 88.68778280542986, "grad_norm": 3.784848690032959, "learning_rate": 0.001, "loss": 1.5771, "step": 274400 }, { "epoch": 88.72010342598578, "grad_norm": 1.2785910367965698, "learning_rate": 0.001, "loss": 1.5889, "step": 274500 }, { "epoch": 88.75242404654169, "grad_norm": 2.665822982788086, "learning_rate": 0.001, "loss": 1.5733, "step": 274600 }, { "epoch": 88.78474466709761, "grad_norm": 1.8011435270309448, "learning_rate": 0.001, "loss": 1.5905, "step": 274700 }, { "epoch": 88.81706528765352, "grad_norm": 1.3323118686676025, "learning_rate": 0.001, "loss": 1.594, "step": 274800 }, { "epoch": 88.84938590820944, "grad_norm": 1.4857550859451294, "learning_rate": 0.001, "loss": 1.6001, "step": 274900 }, { "epoch": 88.88170652876535, "grad_norm": 1.7187902927398682, "learning_rate": 0.001, "loss": 1.6046, "step": 275000 }, { "epoch": 88.91402714932127, "grad_norm": 1.232437014579773, "learning_rate": 0.001, "loss": 1.6025, "step": 275100 }, { "epoch": 88.94634776987718, "grad_norm": 1.3023126125335693, "learning_rate": 0.001, "loss": 1.5877, "step": 275200 }, { "epoch": 88.9786683904331, "grad_norm": 1.6707267761230469, "learning_rate": 0.001, "loss": 1.6081, "step": 275300 }, { "epoch": 89.01098901098901, "grad_norm": 2.031981945037842, "learning_rate": 0.001, "loss": 1.5731, "step": 275400 }, { "epoch": 89.04330963154493, "grad_norm": 1.5525537729263306, "learning_rate": 0.001, "loss": 1.5093, "step": 275500 }, { "epoch": 89.07563025210084, "grad_norm": 1.2762835025787354, "learning_rate": 0.001, "loss": 1.4992, "step": 275600 }, { "epoch": 89.10795087265676, "grad_norm": 1.9132212400436401, "learning_rate": 0.001, "loss": 1.525, "step": 275700 }, { "epoch": 89.14027149321267, "grad_norm": 1.0615112781524658, "learning_rate": 0.001, "loss": 1.5166, "step": 275800 }, { "epoch": 89.17259211376859, "grad_norm": 1.597930908203125, "learning_rate": 0.001, "loss": 1.5277, "step": 275900 }, { "epoch": 89.2049127343245, "grad_norm": 1.8162572383880615, "learning_rate": 0.001, "loss": 1.5265, "step": 276000 }, { "epoch": 89.23723335488042, "grad_norm": 1.2844468355178833, "learning_rate": 0.001, "loss": 1.5307, "step": 276100 }, { "epoch": 89.26955397543632, "grad_norm": 1.8334487676620483, "learning_rate": 0.001, "loss": 1.5283, "step": 276200 }, { "epoch": 89.30187459599225, "grad_norm": 1.2169145345687866, "learning_rate": 0.001, "loss": 1.5155, "step": 276300 }, { "epoch": 89.33419521654815, "grad_norm": 1.4725152254104614, "learning_rate": 0.001, "loss": 1.5388, "step": 276400 }, { "epoch": 89.36651583710407, "grad_norm": 217.5257568359375, "learning_rate": 0.001, "loss": 1.5603, "step": 276500 }, { "epoch": 89.39883645765998, "grad_norm": 1.5311933755874634, "learning_rate": 0.001, "loss": 1.5507, "step": 276600 }, { "epoch": 89.4311570782159, "grad_norm": 1.5628539323806763, "learning_rate": 0.001, "loss": 1.5459, "step": 276700 }, { "epoch": 89.46347769877181, "grad_norm": 1.5481292009353638, "learning_rate": 0.001, "loss": 1.5556, "step": 276800 }, { "epoch": 89.49579831932773, "grad_norm": 2.8963723182678223, "learning_rate": 0.001, "loss": 1.5657, "step": 276900 }, { "epoch": 89.52811893988364, "grad_norm": 1.3551946878433228, "learning_rate": 0.001, "loss": 1.5638, "step": 277000 }, { "epoch": 89.56043956043956, "grad_norm": 1.8870315551757812, "learning_rate": 0.001, "loss": 1.5575, "step": 277100 }, { "epoch": 89.59276018099547, "grad_norm": 1.4216389656066895, "learning_rate": 0.001, "loss": 1.5628, "step": 277200 }, { "epoch": 89.62508080155139, "grad_norm": 2.0780081748962402, "learning_rate": 0.001, "loss": 1.5739, "step": 277300 }, { "epoch": 89.6574014221073, "grad_norm": 2.3340582847595215, "learning_rate": 0.001, "loss": 1.5726, "step": 277400 }, { "epoch": 89.68972204266322, "grad_norm": 1.559397578239441, "learning_rate": 0.001, "loss": 1.556, "step": 277500 }, { "epoch": 89.72204266321913, "grad_norm": 1.2108732461929321, "learning_rate": 0.001, "loss": 1.5819, "step": 277600 }, { "epoch": 89.75436328377505, "grad_norm": 1.3424098491668701, "learning_rate": 0.001, "loss": 1.564, "step": 277700 }, { "epoch": 89.78668390433096, "grad_norm": 1.5999326705932617, "learning_rate": 0.001, "loss": 1.5639, "step": 277800 }, { "epoch": 89.81900452488688, "grad_norm": 1.7643208503723145, "learning_rate": 0.001, "loss": 1.6032, "step": 277900 }, { "epoch": 89.85132514544279, "grad_norm": 3.424647569656372, "learning_rate": 0.001, "loss": 1.5695, "step": 278000 }, { "epoch": 89.88364576599871, "grad_norm": 1.208179235458374, "learning_rate": 0.001, "loss": 1.592, "step": 278100 }, { "epoch": 89.91596638655462, "grad_norm": 1.9032607078552246, "learning_rate": 0.001, "loss": 1.58, "step": 278200 }, { "epoch": 89.94828700711054, "grad_norm": 1.8172322511672974, "learning_rate": 0.001, "loss": 1.5997, "step": 278300 }, { "epoch": 89.98060762766644, "grad_norm": 1.4448333978652954, "learning_rate": 0.001, "loss": 1.5819, "step": 278400 }, { "epoch": 90.01292824822237, "grad_norm": 1.3944969177246094, "learning_rate": 0.001, "loss": 1.5363, "step": 278500 }, { "epoch": 90.04524886877829, "grad_norm": 1.9101866483688354, "learning_rate": 0.001, "loss": 1.4939, "step": 278600 }, { "epoch": 90.0775694893342, "grad_norm": 23.48341941833496, "learning_rate": 0.001, "loss": 1.495, "step": 278700 }, { "epoch": 90.10989010989012, "grad_norm": 1.7094855308532715, "learning_rate": 0.001, "loss": 1.4952, "step": 278800 }, { "epoch": 90.14221073044602, "grad_norm": 1.7329328060150146, "learning_rate": 0.001, "loss": 1.5116, "step": 278900 }, { "epoch": 90.17453135100195, "grad_norm": 1.2925130128860474, "learning_rate": 0.001, "loss": 1.5056, "step": 279000 }, { "epoch": 90.20685197155785, "grad_norm": 1.5081841945648193, "learning_rate": 0.001, "loss": 1.4986, "step": 279100 }, { "epoch": 90.23917259211377, "grad_norm": 1.3987611532211304, "learning_rate": 0.001, "loss": 1.5258, "step": 279200 }, { "epoch": 90.27149321266968, "grad_norm": 3.8875410556793213, "learning_rate": 0.001, "loss": 1.5219, "step": 279300 }, { "epoch": 90.3038138332256, "grad_norm": 1.2745881080627441, "learning_rate": 0.001, "loss": 1.5096, "step": 279400 }, { "epoch": 90.33613445378151, "grad_norm": 1.8634778261184692, "learning_rate": 0.001, "loss": 1.5247, "step": 279500 }, { "epoch": 90.36845507433743, "grad_norm": 1.3581221103668213, "learning_rate": 0.001, "loss": 1.5284, "step": 279600 }, { "epoch": 90.40077569489334, "grad_norm": 1.7779121398925781, "learning_rate": 0.001, "loss": 1.5341, "step": 279700 }, { "epoch": 90.43309631544926, "grad_norm": 1.7008558511734009, "learning_rate": 0.001, "loss": 1.5236, "step": 279800 }, { "epoch": 90.46541693600517, "grad_norm": 1.343550443649292, "learning_rate": 0.001, "loss": 1.5519, "step": 279900 }, { "epoch": 90.49773755656109, "grad_norm": 1.9962193965911865, "learning_rate": 0.001, "loss": 1.5581, "step": 280000 }, { "epoch": 90.530058177117, "grad_norm": 1.8611301183700562, "learning_rate": 0.001, "loss": 1.5488, "step": 280100 }, { "epoch": 90.56237879767292, "grad_norm": 1.986572504043579, "learning_rate": 0.001, "loss": 1.5519, "step": 280200 }, { "epoch": 90.59469941822883, "grad_norm": 1.2757246494293213, "learning_rate": 0.001, "loss": 1.5503, "step": 280300 }, { "epoch": 90.62702003878475, "grad_norm": 1.2695149183273315, "learning_rate": 0.001, "loss": 1.5592, "step": 280400 }, { "epoch": 90.65934065934066, "grad_norm": 1.465623140335083, "learning_rate": 0.001, "loss": 1.5447, "step": 280500 }, { "epoch": 90.69166127989658, "grad_norm": 2.0182888507843018, "learning_rate": 0.001, "loss": 1.5671, "step": 280600 }, { "epoch": 90.72398190045249, "grad_norm": 1.159236192703247, "learning_rate": 0.001, "loss": 1.5669, "step": 280700 }, { "epoch": 90.75630252100841, "grad_norm": 1.2519042491912842, "learning_rate": 0.001, "loss": 1.5649, "step": 280800 }, { "epoch": 90.78862314156432, "grad_norm": 5.749959468841553, "learning_rate": 0.001, "loss": 1.5607, "step": 280900 }, { "epoch": 90.82094376212024, "grad_norm": 1.618669033050537, "learning_rate": 0.001, "loss": 1.5847, "step": 281000 }, { "epoch": 90.85326438267614, "grad_norm": 1.5149770975112915, "learning_rate": 0.001, "loss": 1.6015, "step": 281100 }, { "epoch": 90.88558500323207, "grad_norm": 1.1263158321380615, "learning_rate": 0.001, "loss": 1.5942, "step": 281200 }, { "epoch": 90.91790562378797, "grad_norm": 1.3343770503997803, "learning_rate": 0.001, "loss": 1.5994, "step": 281300 }, { "epoch": 90.9502262443439, "grad_norm": 1.6666364669799805, "learning_rate": 0.001, "loss": 1.5839, "step": 281400 }, { "epoch": 90.9825468648998, "grad_norm": 1.3916020393371582, "learning_rate": 0.001, "loss": 1.5899, "step": 281500 }, { "epoch": 91.01486748545572, "grad_norm": 1.511621117591858, "learning_rate": 0.001, "loss": 1.5231, "step": 281600 }, { "epoch": 91.04718810601163, "grad_norm": 1.4699344635009766, "learning_rate": 0.001, "loss": 1.4796, "step": 281700 }, { "epoch": 91.07950872656755, "grad_norm": 1.820284128189087, "learning_rate": 0.001, "loss": 1.4839, "step": 281800 }, { "epoch": 91.11182934712346, "grad_norm": 1.72976815700531, "learning_rate": 0.001, "loss": 1.5033, "step": 281900 }, { "epoch": 91.14414996767938, "grad_norm": 3.27559232711792, "learning_rate": 0.001, "loss": 1.5076, "step": 282000 }, { "epoch": 91.17647058823529, "grad_norm": 1.3507790565490723, "learning_rate": 0.001, "loss": 1.5092, "step": 282100 }, { "epoch": 91.20879120879121, "grad_norm": 2.2038185596466064, "learning_rate": 0.001, "loss": 1.5022, "step": 282200 }, { "epoch": 91.24111182934712, "grad_norm": 4.079566478729248, "learning_rate": 0.001, "loss": 1.5253, "step": 282300 }, { "epoch": 91.27343244990304, "grad_norm": 1.7138832807540894, "learning_rate": 0.001, "loss": 1.5193, "step": 282400 }, { "epoch": 91.30575307045895, "grad_norm": 3.061126947402954, "learning_rate": 0.001, "loss": 1.5281, "step": 282500 }, { "epoch": 91.33807369101487, "grad_norm": 1.6333290338516235, "learning_rate": 0.001, "loss": 1.5239, "step": 282600 }, { "epoch": 91.37039431157078, "grad_norm": 1.461121916770935, "learning_rate": 0.001, "loss": 1.528, "step": 282700 }, { "epoch": 91.4027149321267, "grad_norm": 1.7948272228240967, "learning_rate": 0.001, "loss": 1.5344, "step": 282800 }, { "epoch": 91.4350355526826, "grad_norm": 1.5370502471923828, "learning_rate": 0.001, "loss": 1.5255, "step": 282900 }, { "epoch": 91.46735617323853, "grad_norm": 2.596872329711914, "learning_rate": 0.001, "loss": 1.5314, "step": 283000 }, { "epoch": 91.49967679379444, "grad_norm": 1.4660226106643677, "learning_rate": 0.001, "loss": 1.5493, "step": 283100 }, { "epoch": 91.53199741435036, "grad_norm": 1.8447295427322388, "learning_rate": 0.001, "loss": 1.5397, "step": 283200 }, { "epoch": 91.56431803490626, "grad_norm": 1.8207217454910278, "learning_rate": 0.001, "loss": 1.5347, "step": 283300 }, { "epoch": 91.59663865546219, "grad_norm": 1.2521451711654663, "learning_rate": 0.001, "loss": 1.5552, "step": 283400 }, { "epoch": 91.6289592760181, "grad_norm": 2.3209328651428223, "learning_rate": 0.001, "loss": 1.5508, "step": 283500 }, { "epoch": 91.66127989657402, "grad_norm": 2.414494037628174, "learning_rate": 0.001, "loss": 1.5667, "step": 283600 }, { "epoch": 91.69360051712992, "grad_norm": 1.2644875049591064, "learning_rate": 0.001, "loss": 1.565, "step": 283700 }, { "epoch": 91.72592113768584, "grad_norm": 1.420069932937622, "learning_rate": 0.001, "loss": 1.5703, "step": 283800 }, { "epoch": 91.75824175824175, "grad_norm": 1.4925800561904907, "learning_rate": 0.001, "loss": 1.5635, "step": 283900 }, { "epoch": 91.79056237879767, "grad_norm": 1.7937685251235962, "learning_rate": 0.001, "loss": 1.5845, "step": 284000 }, { "epoch": 91.82288299935358, "grad_norm": 1.8882492780685425, "learning_rate": 0.001, "loss": 1.588, "step": 284100 }, { "epoch": 91.8552036199095, "grad_norm": 1.6433968544006348, "learning_rate": 0.001, "loss": 1.6096, "step": 284200 }, { "epoch": 91.88752424046541, "grad_norm": 2.372130870819092, "learning_rate": 0.001, "loss": 1.5935, "step": 284300 }, { "epoch": 91.91984486102133, "grad_norm": 1.65177321434021, "learning_rate": 0.001, "loss": 1.5818, "step": 284400 }, { "epoch": 91.95216548157724, "grad_norm": 1.5147981643676758, "learning_rate": 0.001, "loss": 1.586, "step": 284500 }, { "epoch": 91.98448610213316, "grad_norm": 1.6249573230743408, "learning_rate": 0.001, "loss": 1.5689, "step": 284600 }, { "epoch": 92.01680672268908, "grad_norm": 1.7770642042160034, "learning_rate": 0.001, "loss": 1.4999, "step": 284700 }, { "epoch": 92.04912734324499, "grad_norm": 2.947965383529663, "learning_rate": 0.001, "loss": 1.4805, "step": 284800 }, { "epoch": 92.08144796380091, "grad_norm": 1.8464159965515137, "learning_rate": 0.001, "loss": 1.4837, "step": 284900 }, { "epoch": 92.11376858435682, "grad_norm": 2.2017343044281006, "learning_rate": 0.001, "loss": 1.4908, "step": 285000 }, { "epoch": 92.14608920491274, "grad_norm": 1.3652074337005615, "learning_rate": 0.001, "loss": 1.4991, "step": 285100 }, { "epoch": 92.17840982546865, "grad_norm": 2.181473731994629, "learning_rate": 0.001, "loss": 1.5039, "step": 285200 }, { "epoch": 92.21073044602457, "grad_norm": 2.8671841621398926, "learning_rate": 0.001, "loss": 1.4953, "step": 285300 }, { "epoch": 92.24305106658048, "grad_norm": 2.080866575241089, "learning_rate": 0.001, "loss": 1.5041, "step": 285400 }, { "epoch": 92.2753716871364, "grad_norm": 3.580744743347168, "learning_rate": 0.001, "loss": 1.5197, "step": 285500 }, { "epoch": 92.3076923076923, "grad_norm": 3.968400001525879, "learning_rate": 0.001, "loss": 1.5154, "step": 285600 }, { "epoch": 92.34001292824823, "grad_norm": 2.9830777645111084, "learning_rate": 0.001, "loss": 1.5146, "step": 285700 }, { "epoch": 92.37233354880414, "grad_norm": 2.238168954849243, "learning_rate": 0.001, "loss": 1.5187, "step": 285800 }, { "epoch": 92.40465416936006, "grad_norm": 2.62433123588562, "learning_rate": 0.001, "loss": 1.5227, "step": 285900 }, { "epoch": 92.43697478991596, "grad_norm": 2.033294200897217, "learning_rate": 0.001, "loss": 1.5281, "step": 286000 }, { "epoch": 92.46929541047189, "grad_norm": 1.712694764137268, "learning_rate": 0.001, "loss": 1.5316, "step": 286100 }, { "epoch": 92.5016160310278, "grad_norm": 2.31351637840271, "learning_rate": 0.001, "loss": 1.5252, "step": 286200 }, { "epoch": 92.53393665158372, "grad_norm": 1.2480380535125732, "learning_rate": 0.001, "loss": 1.5477, "step": 286300 }, { "epoch": 92.56625727213962, "grad_norm": 3.5549066066741943, "learning_rate": 0.001, "loss": 1.5364, "step": 286400 }, { "epoch": 92.59857789269554, "grad_norm": 1.6658905744552612, "learning_rate": 0.001, "loss": 1.5427, "step": 286500 }, { "epoch": 92.63089851325145, "grad_norm": 10.299586296081543, "learning_rate": 0.001, "loss": 1.5405, "step": 286600 }, { "epoch": 92.66321913380737, "grad_norm": 1.520553469657898, "learning_rate": 0.001, "loss": 1.5576, "step": 286700 }, { "epoch": 92.69553975436328, "grad_norm": 1.7324973344802856, "learning_rate": 0.001, "loss": 1.5528, "step": 286800 }, { "epoch": 92.7278603749192, "grad_norm": 17.62340545654297, "learning_rate": 0.001, "loss": 1.5422, "step": 286900 }, { "epoch": 92.76018099547511, "grad_norm": 4.262084007263184, "learning_rate": 0.001, "loss": 1.5672, "step": 287000 }, { "epoch": 92.79250161603103, "grad_norm": 1.6276400089263916, "learning_rate": 0.001, "loss": 1.5525, "step": 287100 }, { "epoch": 92.82482223658694, "grad_norm": 1.894785761833191, "learning_rate": 0.001, "loss": 1.5727, "step": 287200 }, { "epoch": 92.85714285714286, "grad_norm": 4.065331935882568, "learning_rate": 0.001, "loss": 1.5622, "step": 287300 }, { "epoch": 92.88946347769877, "grad_norm": 3.7710416316986084, "learning_rate": 0.001, "loss": 1.5618, "step": 287400 }, { "epoch": 92.92178409825469, "grad_norm": 5.200353622436523, "learning_rate": 0.001, "loss": 1.5603, "step": 287500 }, { "epoch": 92.9541047188106, "grad_norm": 1.8001126050949097, "learning_rate": 0.001, "loss": 1.5734, "step": 287600 }, { "epoch": 92.98642533936652, "grad_norm": 2.6428303718566895, "learning_rate": 0.001, "loss": 1.5901, "step": 287700 }, { "epoch": 93.01874595992243, "grad_norm": 1.0616384744644165, "learning_rate": 0.001, "loss": 1.516, "step": 287800 }, { "epoch": 93.05106658047835, "grad_norm": 1.8911588191986084, "learning_rate": 0.001, "loss": 1.4607, "step": 287900 }, { "epoch": 93.08338720103426, "grad_norm": 1.6450949907302856, "learning_rate": 0.001, "loss": 1.4712, "step": 288000 }, { "epoch": 93.11570782159018, "grad_norm": 2.2752041816711426, "learning_rate": 0.001, "loss": 1.4882, "step": 288100 }, { "epoch": 93.14802844214609, "grad_norm": 1.4457811117172241, "learning_rate": 0.001, "loss": 1.4799, "step": 288200 }, { "epoch": 93.180349062702, "grad_norm": 1.8913570642471313, "learning_rate": 0.001, "loss": 1.4768, "step": 288300 }, { "epoch": 93.21266968325791, "grad_norm": 1.287611484527588, "learning_rate": 0.001, "loss": 1.4844, "step": 288400 }, { "epoch": 93.24499030381384, "grad_norm": 14.385737419128418, "learning_rate": 0.001, "loss": 1.489, "step": 288500 }, { "epoch": 93.27731092436974, "grad_norm": 1.964044213294983, "learning_rate": 0.001, "loss": 1.5109, "step": 288600 }, { "epoch": 93.30963154492567, "grad_norm": 1.5661853551864624, "learning_rate": 0.001, "loss": 1.4959, "step": 288700 }, { "epoch": 93.34195216548157, "grad_norm": 1.3720322847366333, "learning_rate": 0.001, "loss": 1.5097, "step": 288800 }, { "epoch": 93.3742727860375, "grad_norm": 1.8044131994247437, "learning_rate": 0.001, "loss": 1.5086, "step": 288900 }, { "epoch": 93.4065934065934, "grad_norm": 1.7699639797210693, "learning_rate": 0.001, "loss": 1.5119, "step": 289000 }, { "epoch": 93.43891402714932, "grad_norm": 1.7888864278793335, "learning_rate": 0.001, "loss": 1.5277, "step": 289100 }, { "epoch": 93.47123464770523, "grad_norm": 3.153106451034546, "learning_rate": 0.001, "loss": 1.5231, "step": 289200 }, { "epoch": 93.50355526826115, "grad_norm": 1.2186968326568604, "learning_rate": 0.001, "loss": 1.5291, "step": 289300 }, { "epoch": 93.53587588881706, "grad_norm": 1.7386680841445923, "learning_rate": 0.001, "loss": 1.5315, "step": 289400 }, { "epoch": 93.56819650937298, "grad_norm": 1.2839548587799072, "learning_rate": 0.001, "loss": 1.5515, "step": 289500 }, { "epoch": 93.60051712992889, "grad_norm": 1.1249257326126099, "learning_rate": 0.001, "loss": 1.5347, "step": 289600 }, { "epoch": 93.63283775048481, "grad_norm": 1.6158490180969238, "learning_rate": 0.001, "loss": 1.5282, "step": 289700 }, { "epoch": 93.66515837104072, "grad_norm": 1.560380458831787, "learning_rate": 0.001, "loss": 1.5457, "step": 289800 }, { "epoch": 93.69747899159664, "grad_norm": 1.6123809814453125, "learning_rate": 0.001, "loss": 1.5609, "step": 289900 }, { "epoch": 93.72979961215255, "grad_norm": 2.866273880004883, "learning_rate": 0.001, "loss": 1.5528, "step": 290000 }, { "epoch": 93.76212023270847, "grad_norm": 1.1382124423980713, "learning_rate": 0.001, "loss": 1.5645, "step": 290100 }, { "epoch": 93.79444085326438, "grad_norm": 1.6119617223739624, "learning_rate": 0.001, "loss": 1.5801, "step": 290200 }, { "epoch": 93.8267614738203, "grad_norm": 1.4229826927185059, "learning_rate": 0.001, "loss": 1.5839, "step": 290300 }, { "epoch": 93.8590820943762, "grad_norm": 1.4128005504608154, "learning_rate": 0.001, "loss": 1.5589, "step": 290400 }, { "epoch": 93.89140271493213, "grad_norm": 1.1807141304016113, "learning_rate": 0.001, "loss": 1.5723, "step": 290500 }, { "epoch": 93.92372333548803, "grad_norm": 1.508898377418518, "learning_rate": 0.001, "loss": 1.5575, "step": 290600 }, { "epoch": 93.95604395604396, "grad_norm": 3.8235645294189453, "learning_rate": 0.001, "loss": 1.6003, "step": 290700 }, { "epoch": 93.98836457659988, "grad_norm": 1.2994366884231567, "learning_rate": 0.001, "loss": 1.5701, "step": 290800 }, { "epoch": 94.02068519715579, "grad_norm": 1.1258060932159424, "learning_rate": 0.001, "loss": 1.5219, "step": 290900 }, { "epoch": 94.0530058177117, "grad_norm": 1.4557178020477295, "learning_rate": 0.001, "loss": 1.4606, "step": 291000 }, { "epoch": 94.08532643826761, "grad_norm": 1.7534699440002441, "learning_rate": 0.001, "loss": 1.4757, "step": 291100 }, { "epoch": 94.11764705882354, "grad_norm": 1.1879892349243164, "learning_rate": 0.001, "loss": 1.4728, "step": 291200 }, { "epoch": 94.14996767937944, "grad_norm": 2.654309034347534, "learning_rate": 0.001, "loss": 1.4921, "step": 291300 }, { "epoch": 94.18228829993537, "grad_norm": 1.359208583831787, "learning_rate": 0.001, "loss": 1.4774, "step": 291400 }, { "epoch": 94.21460892049127, "grad_norm": 1.804179072380066, "learning_rate": 0.001, "loss": 1.4992, "step": 291500 }, { "epoch": 94.2469295410472, "grad_norm": 1.1498416662216187, "learning_rate": 0.001, "loss": 1.488, "step": 291600 }, { "epoch": 94.2792501616031, "grad_norm": 1.0733956098556519, "learning_rate": 0.001, "loss": 1.4976, "step": 291700 }, { "epoch": 94.31157078215902, "grad_norm": 1.9312669038772583, "learning_rate": 0.001, "loss": 1.5023, "step": 291800 }, { "epoch": 94.34389140271493, "grad_norm": 1.4573562145233154, "learning_rate": 0.001, "loss": 1.509, "step": 291900 }, { "epoch": 94.37621202327085, "grad_norm": 2.203289270401001, "learning_rate": 0.001, "loss": 1.5101, "step": 292000 }, { "epoch": 94.40853264382676, "grad_norm": 1.3194336891174316, "learning_rate": 0.001, "loss": 1.5105, "step": 292100 }, { "epoch": 94.44085326438268, "grad_norm": 1.8146227598190308, "learning_rate": 0.001, "loss": 1.5255, "step": 292200 }, { "epoch": 94.47317388493859, "grad_norm": 1.2981555461883545, "learning_rate": 0.001, "loss": 1.5267, "step": 292300 }, { "epoch": 94.50549450549451, "grad_norm": 1.9002580642700195, "learning_rate": 0.001, "loss": 1.5303, "step": 292400 }, { "epoch": 94.53781512605042, "grad_norm": 1.2495265007019043, "learning_rate": 0.001, "loss": 1.531, "step": 292500 }, { "epoch": 94.57013574660634, "grad_norm": 1.7268157005310059, "learning_rate": 0.001, "loss": 1.5137, "step": 292600 }, { "epoch": 94.60245636716225, "grad_norm": 1.19248628616333, "learning_rate": 0.001, "loss": 1.5324, "step": 292700 }, { "epoch": 94.63477698771817, "grad_norm": 1.4950668811798096, "learning_rate": 0.001, "loss": 1.534, "step": 292800 }, { "epoch": 94.66709760827408, "grad_norm": 1.1229387521743774, "learning_rate": 0.001, "loss": 1.5452, "step": 292900 }, { "epoch": 94.69941822883, "grad_norm": 1.457218885421753, "learning_rate": 0.001, "loss": 1.5479, "step": 293000 }, { "epoch": 94.7317388493859, "grad_norm": 1.3838504552841187, "learning_rate": 0.001, "loss": 1.5495, "step": 293100 }, { "epoch": 94.76405946994183, "grad_norm": 1.253285527229309, "learning_rate": 0.001, "loss": 1.5585, "step": 293200 }, { "epoch": 94.79638009049773, "grad_norm": 3.510382890701294, "learning_rate": 0.001, "loss": 1.5611, "step": 293300 }, { "epoch": 94.82870071105366, "grad_norm": 1.5860843658447266, "learning_rate": 0.001, "loss": 1.5411, "step": 293400 }, { "epoch": 94.86102133160956, "grad_norm": 1.9746053218841553, "learning_rate": 0.001, "loss": 1.5383, "step": 293500 }, { "epoch": 94.89334195216549, "grad_norm": 1.5255277156829834, "learning_rate": 0.001, "loss": 1.5507, "step": 293600 }, { "epoch": 94.9256625727214, "grad_norm": 1.4601233005523682, "learning_rate": 0.001, "loss": 1.5676, "step": 293700 }, { "epoch": 94.95798319327731, "grad_norm": 1.1030539274215698, "learning_rate": 0.001, "loss": 1.5648, "step": 293800 }, { "epoch": 94.99030381383322, "grad_norm": 1.2187821865081787, "learning_rate": 0.001, "loss": 1.5638, "step": 293900 }, { "epoch": 95.02262443438914, "grad_norm": 1.2317875623703003, "learning_rate": 0.001, "loss": 1.5085, "step": 294000 }, { "epoch": 95.05494505494505, "grad_norm": 1.274288535118103, "learning_rate": 0.001, "loss": 1.467, "step": 294100 }, { "epoch": 95.08726567550097, "grad_norm": 1.1086032390594482, "learning_rate": 0.001, "loss": 1.4645, "step": 294200 }, { "epoch": 95.11958629605688, "grad_norm": 1.248243808746338, "learning_rate": 0.001, "loss": 1.4612, "step": 294300 }, { "epoch": 95.1519069166128, "grad_norm": 1.2834889888763428, "learning_rate": 0.001, "loss": 1.4953, "step": 294400 }, { "epoch": 95.18422753716871, "grad_norm": 1.0837520360946655, "learning_rate": 0.001, "loss": 1.481, "step": 294500 }, { "epoch": 95.21654815772463, "grad_norm": 2.000903606414795, "learning_rate": 0.001, "loss": 1.4878, "step": 294600 }, { "epoch": 95.24886877828054, "grad_norm": 1.5144456624984741, "learning_rate": 0.001, "loss": 1.4832, "step": 294700 }, { "epoch": 95.28118939883646, "grad_norm": 1.162752389907837, "learning_rate": 0.001, "loss": 1.4945, "step": 294800 }, { "epoch": 95.31351001939237, "grad_norm": 1.051884651184082, "learning_rate": 0.001, "loss": 1.4976, "step": 294900 }, { "epoch": 95.34583063994829, "grad_norm": 1.5698506832122803, "learning_rate": 0.001, "loss": 1.5025, "step": 295000 }, { "epoch": 95.3781512605042, "grad_norm": 1.6360477209091187, "learning_rate": 0.001, "loss": 1.5177, "step": 295100 }, { "epoch": 95.41047188106012, "grad_norm": 2.0160601139068604, "learning_rate": 0.001, "loss": 1.499, "step": 295200 }, { "epoch": 95.44279250161603, "grad_norm": 1.1620807647705078, "learning_rate": 0.001, "loss": 1.5127, "step": 295300 }, { "epoch": 95.47511312217195, "grad_norm": 4.118252277374268, "learning_rate": 0.001, "loss": 1.5229, "step": 295400 }, { "epoch": 95.50743374272786, "grad_norm": 1.8773397207260132, "learning_rate": 0.001, "loss": 1.5148, "step": 295500 }, { "epoch": 95.53975436328378, "grad_norm": 1.1756994724273682, "learning_rate": 0.001, "loss": 1.5321, "step": 295600 }, { "epoch": 95.57207498383968, "grad_norm": 2.7820475101470947, "learning_rate": 0.001, "loss": 1.5148, "step": 295700 }, { "epoch": 95.6043956043956, "grad_norm": 2.846459150314331, "learning_rate": 0.001, "loss": 1.5176, "step": 295800 }, { "epoch": 95.63671622495151, "grad_norm": 1.0948617458343506, "learning_rate": 0.001, "loss": 1.5338, "step": 295900 }, { "epoch": 95.66903684550743, "grad_norm": 1.1277252435684204, "learning_rate": 0.001, "loss": 1.5221, "step": 296000 }, { "epoch": 95.70135746606334, "grad_norm": 1.5752462148666382, "learning_rate": 0.001, "loss": 1.5305, "step": 296100 }, { "epoch": 95.73367808661926, "grad_norm": 1.4704670906066895, "learning_rate": 0.001, "loss": 1.5523, "step": 296200 }, { "epoch": 95.76599870717517, "grad_norm": 1.4931970834732056, "learning_rate": 0.001, "loss": 1.5167, "step": 296300 }, { "epoch": 95.7983193277311, "grad_norm": 1.249324917793274, "learning_rate": 0.001, "loss": 1.5508, "step": 296400 }, { "epoch": 95.830639948287, "grad_norm": 1.6873565912246704, "learning_rate": 0.001, "loss": 1.5408, "step": 296500 }, { "epoch": 95.86296056884292, "grad_norm": 3.573498010635376, "learning_rate": 0.001, "loss": 1.5481, "step": 296600 }, { "epoch": 95.89528118939883, "grad_norm": 1.6778451204299927, "learning_rate": 0.001, "loss": 1.5519, "step": 296700 }, { "epoch": 95.92760180995475, "grad_norm": 1.3831006288528442, "learning_rate": 0.001, "loss": 1.5566, "step": 296800 }, { "epoch": 95.95992243051066, "grad_norm": 1.2231361865997314, "learning_rate": 0.001, "loss": 1.548, "step": 296900 }, { "epoch": 95.99224305106658, "grad_norm": 1.9604476690292358, "learning_rate": 0.001, "loss": 1.5526, "step": 297000 }, { "epoch": 96.0245636716225, "grad_norm": 1.22735595703125, "learning_rate": 0.001, "loss": 1.497, "step": 297100 }, { "epoch": 96.05688429217841, "grad_norm": 1.4512308835983276, "learning_rate": 0.001, "loss": 1.4481, "step": 297200 }, { "epoch": 96.08920491273433, "grad_norm": 1.299473524093628, "learning_rate": 0.001, "loss": 1.4675, "step": 297300 }, { "epoch": 96.12152553329024, "grad_norm": 1.5478742122650146, "learning_rate": 0.001, "loss": 1.4539, "step": 297400 }, { "epoch": 96.15384615384616, "grad_norm": 1.1718240976333618, "learning_rate": 0.001, "loss": 1.4662, "step": 297500 }, { "epoch": 96.18616677440207, "grad_norm": 1.4514905214309692, "learning_rate": 0.001, "loss": 1.4692, "step": 297600 }, { "epoch": 96.21848739495799, "grad_norm": 1.2682327032089233, "learning_rate": 0.001, "loss": 1.4845, "step": 297700 }, { "epoch": 96.2508080155139, "grad_norm": 1.6107807159423828, "learning_rate": 0.001, "loss": 1.4666, "step": 297800 }, { "epoch": 96.28312863606982, "grad_norm": 1.1162203550338745, "learning_rate": 0.001, "loss": 1.4586, "step": 297900 }, { "epoch": 96.31544925662573, "grad_norm": 1.250571608543396, "learning_rate": 0.001, "loss": 1.4944, "step": 298000 }, { "epoch": 96.34776987718165, "grad_norm": 1.0143648386001587, "learning_rate": 0.001, "loss": 1.5023, "step": 298100 }, { "epoch": 96.38009049773756, "grad_norm": 5.6002726554870605, "learning_rate": 0.001, "loss": 1.5165, "step": 298200 }, { "epoch": 96.41241111829348, "grad_norm": 1.7196699380874634, "learning_rate": 0.001, "loss": 1.4961, "step": 298300 }, { "epoch": 96.44473173884938, "grad_norm": 1.0650442838668823, "learning_rate": 0.001, "loss": 1.5027, "step": 298400 }, { "epoch": 96.4770523594053, "grad_norm": 1.1044974327087402, "learning_rate": 0.001, "loss": 1.5034, "step": 298500 }, { "epoch": 96.50937297996121, "grad_norm": 1.2249104976654053, "learning_rate": 0.001, "loss": 1.5094, "step": 298600 }, { "epoch": 96.54169360051714, "grad_norm": 1.6643072366714478, "learning_rate": 0.001, "loss": 1.5139, "step": 298700 }, { "epoch": 96.57401422107304, "grad_norm": 2.548504590988159, "learning_rate": 0.001, "loss": 1.518, "step": 298800 }, { "epoch": 96.60633484162896, "grad_norm": 19.77634048461914, "learning_rate": 0.001, "loss": 1.5228, "step": 298900 }, { "epoch": 96.63865546218487, "grad_norm": 1.6740444898605347, "learning_rate": 0.001, "loss": 1.5006, "step": 299000 }, { "epoch": 96.6709760827408, "grad_norm": 1.2908892631530762, "learning_rate": 0.001, "loss": 1.5274, "step": 299100 }, { "epoch": 96.7032967032967, "grad_norm": 1.1333355903625488, "learning_rate": 0.001, "loss": 1.5378, "step": 299200 }, { "epoch": 96.73561732385262, "grad_norm": 1.468159794807434, "learning_rate": 0.001, "loss": 1.53, "step": 299300 }, { "epoch": 96.76793794440853, "grad_norm": 1.3962353467941284, "learning_rate": 0.001, "loss": 1.5491, "step": 299400 }, { "epoch": 96.80025856496445, "grad_norm": 1.4682116508483887, "learning_rate": 0.001, "loss": 1.5329, "step": 299500 }, { "epoch": 96.83257918552036, "grad_norm": 1.6541014909744263, "learning_rate": 0.001, "loss": 1.5488, "step": 299600 }, { "epoch": 96.86489980607628, "grad_norm": 1.6824743747711182, "learning_rate": 0.001, "loss": 1.5417, "step": 299700 }, { "epoch": 96.89722042663219, "grad_norm": 1.380151391029358, "learning_rate": 0.001, "loss": 1.5467, "step": 299800 }, { "epoch": 96.92954104718811, "grad_norm": 1.4443453550338745, "learning_rate": 0.001, "loss": 1.5333, "step": 299900 }, { "epoch": 96.96186166774402, "grad_norm": 1.9009063243865967, "learning_rate": 0.001, "loss": 1.5504, "step": 300000 }, { "epoch": 96.99418228829994, "grad_norm": 1.912052035331726, "learning_rate": 0.001, "loss": 1.5451, "step": 300100 }, { "epoch": 97.02650290885585, "grad_norm": 1.2144694328308105, "learning_rate": 0.001, "loss": 1.4753, "step": 300200 }, { "epoch": 97.05882352941177, "grad_norm": 1.595928430557251, "learning_rate": 0.001, "loss": 1.4629, "step": 300300 }, { "epoch": 97.09114414996768, "grad_norm": 11.068997383117676, "learning_rate": 0.001, "loss": 1.4617, "step": 300400 }, { "epoch": 97.1234647705236, "grad_norm": 1.9974979162216187, "learning_rate": 0.001, "loss": 1.4549, "step": 300500 }, { "epoch": 97.1557853910795, "grad_norm": 0.9468052387237549, "learning_rate": 0.001, "loss": 1.4782, "step": 300600 }, { "epoch": 97.18810601163543, "grad_norm": 2.352173089981079, "learning_rate": 0.001, "loss": 1.4667, "step": 300700 }, { "epoch": 97.22042663219133, "grad_norm": 1.6588690280914307, "learning_rate": 0.001, "loss": 1.4719, "step": 300800 }, { "epoch": 97.25274725274726, "grad_norm": 1.9931819438934326, "learning_rate": 0.001, "loss": 1.4831, "step": 300900 }, { "epoch": 97.28506787330316, "grad_norm": 1.5477044582366943, "learning_rate": 0.001, "loss": 1.4857, "step": 301000 }, { "epoch": 97.31738849385908, "grad_norm": 1.6596499681472778, "learning_rate": 0.001, "loss": 1.4959, "step": 301100 }, { "epoch": 97.34970911441499, "grad_norm": 1.1618753671646118, "learning_rate": 0.001, "loss": 1.4943, "step": 301200 }, { "epoch": 97.38202973497091, "grad_norm": 2.6695642471313477, "learning_rate": 0.001, "loss": 1.5055, "step": 301300 }, { "epoch": 97.41435035552682, "grad_norm": 1.0368062257766724, "learning_rate": 0.001, "loss": 1.4933, "step": 301400 }, { "epoch": 97.44667097608274, "grad_norm": 1.2256377935409546, "learning_rate": 0.001, "loss": 1.517, "step": 301500 }, { "epoch": 97.47899159663865, "grad_norm": 1.6616265773773193, "learning_rate": 0.001, "loss": 1.5051, "step": 301600 }, { "epoch": 97.51131221719457, "grad_norm": 1.5869675874710083, "learning_rate": 0.001, "loss": 1.5293, "step": 301700 }, { "epoch": 97.54363283775048, "grad_norm": 4.01141357421875, "learning_rate": 0.001, "loss": 1.5166, "step": 301800 }, { "epoch": 97.5759534583064, "grad_norm": 1.0902081727981567, "learning_rate": 0.001, "loss": 1.5092, "step": 301900 }, { "epoch": 97.60827407886231, "grad_norm": 1.3080886602401733, "learning_rate": 0.001, "loss": 1.5211, "step": 302000 }, { "epoch": 97.64059469941823, "grad_norm": 1.4427684545516968, "learning_rate": 0.001, "loss": 1.5193, "step": 302100 }, { "epoch": 97.67291531997414, "grad_norm": 2.3125712871551514, "learning_rate": 0.001, "loss": 1.5075, "step": 302200 }, { "epoch": 97.70523594053006, "grad_norm": 1.625456690788269, "learning_rate": 0.001, "loss": 1.5157, "step": 302300 }, { "epoch": 97.73755656108597, "grad_norm": 1.6599856615066528, "learning_rate": 0.001, "loss": 1.5175, "step": 302400 }, { "epoch": 97.76987718164189, "grad_norm": 1.2360403537750244, "learning_rate": 0.001, "loss": 1.5325, "step": 302500 }, { "epoch": 97.8021978021978, "grad_norm": 1.0689626932144165, "learning_rate": 0.001, "loss": 1.5394, "step": 302600 }, { "epoch": 97.83451842275372, "grad_norm": 5.014745712280273, "learning_rate": 0.001, "loss": 1.5362, "step": 302700 }, { "epoch": 97.86683904330962, "grad_norm": 1.2347824573516846, "learning_rate": 0.001, "loss": 1.54, "step": 302800 }, { "epoch": 97.89915966386555, "grad_norm": 1.6215710639953613, "learning_rate": 0.001, "loss": 1.5375, "step": 302900 }, { "epoch": 97.93148028442145, "grad_norm": 1.7682123184204102, "learning_rate": 0.001, "loss": 1.5566, "step": 303000 }, { "epoch": 97.96380090497738, "grad_norm": 3.7246806621551514, "learning_rate": 0.001, "loss": 1.5549, "step": 303100 }, { "epoch": 97.99612152553328, "grad_norm": 1.6651161909103394, "learning_rate": 0.001, "loss": 1.5453, "step": 303200 }, { "epoch": 98.0284421460892, "grad_norm": 1.456674337387085, "learning_rate": 0.001, "loss": 1.444, "step": 303300 }, { "epoch": 98.06076276664513, "grad_norm": 1.295353651046753, "learning_rate": 0.001, "loss": 1.4472, "step": 303400 }, { "epoch": 98.09308338720103, "grad_norm": 2.2868764400482178, "learning_rate": 0.001, "loss": 1.4587, "step": 303500 }, { "epoch": 98.12540400775696, "grad_norm": 1.641650915145874, "learning_rate": 0.001, "loss": 1.4617, "step": 303600 }, { "epoch": 98.15772462831286, "grad_norm": 1.4647761583328247, "learning_rate": 0.001, "loss": 1.4633, "step": 303700 }, { "epoch": 98.19004524886878, "grad_norm": 16.74369239807129, "learning_rate": 0.001, "loss": 1.4791, "step": 303800 }, { "epoch": 98.22236586942469, "grad_norm": 2.228388786315918, "learning_rate": 0.001, "loss": 1.4782, "step": 303900 }, { "epoch": 98.25468648998061, "grad_norm": 1.9309481382369995, "learning_rate": 0.001, "loss": 1.4816, "step": 304000 }, { "epoch": 98.28700711053652, "grad_norm": 1.4247316122055054, "learning_rate": 0.001, "loss": 1.4828, "step": 304100 }, { "epoch": 98.31932773109244, "grad_norm": 3.0181186199188232, "learning_rate": 0.001, "loss": 1.4918, "step": 304200 }, { "epoch": 98.35164835164835, "grad_norm": 1.4976110458374023, "learning_rate": 0.001, "loss": 1.4856, "step": 304300 }, { "epoch": 98.38396897220427, "grad_norm": 1.6173464059829712, "learning_rate": 0.001, "loss": 1.5006, "step": 304400 }, { "epoch": 98.41628959276018, "grad_norm": 1.695127248764038, "learning_rate": 0.001, "loss": 1.5002, "step": 304500 }, { "epoch": 98.4486102133161, "grad_norm": 1.7989153861999512, "learning_rate": 0.001, "loss": 1.5038, "step": 304600 }, { "epoch": 98.48093083387201, "grad_norm": 1.3832381963729858, "learning_rate": 0.001, "loss": 1.4964, "step": 304700 }, { "epoch": 98.51325145442793, "grad_norm": 1.3681704998016357, "learning_rate": 0.001, "loss": 1.4928, "step": 304800 }, { "epoch": 98.54557207498384, "grad_norm": 1.1660397052764893, "learning_rate": 0.001, "loss": 1.5218, "step": 304900 }, { "epoch": 98.57789269553976, "grad_norm": 1.1860438585281372, "learning_rate": 0.001, "loss": 1.5182, "step": 305000 }, { "epoch": 98.61021331609567, "grad_norm": 1.3886991739273071, "learning_rate": 0.001, "loss": 1.5075, "step": 305100 }, { "epoch": 98.64253393665159, "grad_norm": 2.4036288261413574, "learning_rate": 0.001, "loss": 1.5138, "step": 305200 }, { "epoch": 98.6748545572075, "grad_norm": 1.3081300258636475, "learning_rate": 0.001, "loss": 1.5195, "step": 305300 }, { "epoch": 98.70717517776342, "grad_norm": 1.1856963634490967, "learning_rate": 0.001, "loss": 1.5123, "step": 305400 }, { "epoch": 98.73949579831933, "grad_norm": 1.581463098526001, "learning_rate": 0.001, "loss": 1.5242, "step": 305500 }, { "epoch": 98.77181641887525, "grad_norm": 1.230708360671997, "learning_rate": 0.001, "loss": 1.5321, "step": 305600 }, { "epoch": 98.80413703943115, "grad_norm": 1.2596337795257568, "learning_rate": 0.001, "loss": 1.5291, "step": 305700 }, { "epoch": 98.83645765998708, "grad_norm": 1.3812332153320312, "learning_rate": 0.001, "loss": 1.5228, "step": 305800 }, { "epoch": 98.86877828054298, "grad_norm": 2.0288922786712646, "learning_rate": 0.001, "loss": 1.5349, "step": 305900 }, { "epoch": 98.9010989010989, "grad_norm": 1.2749881744384766, "learning_rate": 0.001, "loss": 1.5383, "step": 306000 }, { "epoch": 98.93341952165481, "grad_norm": 1.3553789854049683, "learning_rate": 0.001, "loss": 1.5486, "step": 306100 }, { "epoch": 98.96574014221073, "grad_norm": 1.373578667640686, "learning_rate": 0.001, "loss": 1.5479, "step": 306200 }, { "epoch": 98.99806076276664, "grad_norm": 2.032135486602783, "learning_rate": 0.001, "loss": 1.5466, "step": 306300 }, { "epoch": 99.03038138332256, "grad_norm": 1.7534527778625488, "learning_rate": 0.001, "loss": 1.4647, "step": 306400 }, { "epoch": 99.06270200387847, "grad_norm": 1.6375123262405396, "learning_rate": 0.001, "loss": 1.4484, "step": 306500 }, { "epoch": 99.09502262443439, "grad_norm": 1.5149050951004028, "learning_rate": 0.001, "loss": 1.4632, "step": 306600 }, { "epoch": 99.1273432449903, "grad_norm": 1.5042215585708618, "learning_rate": 0.001, "loss": 1.4559, "step": 306700 }, { "epoch": 99.15966386554622, "grad_norm": 1.7225611209869385, "learning_rate": 0.001, "loss": 1.4449, "step": 306800 }, { "epoch": 99.19198448610213, "grad_norm": 1.719565987586975, "learning_rate": 0.001, "loss": 1.4625, "step": 306900 }, { "epoch": 99.22430510665805, "grad_norm": 2.224355697631836, "learning_rate": 0.001, "loss": 1.4678, "step": 307000 }, { "epoch": 99.25662572721396, "grad_norm": 1.4405461549758911, "learning_rate": 0.001, "loss": 1.4782, "step": 307100 }, { "epoch": 99.28894634776988, "grad_norm": 1.723349690437317, "learning_rate": 0.001, "loss": 1.4934, "step": 307200 }, { "epoch": 99.32126696832579, "grad_norm": 1.8138247728347778, "learning_rate": 0.001, "loss": 1.4876, "step": 307300 }, { "epoch": 99.35358758888171, "grad_norm": 5.676575660705566, "learning_rate": 0.001, "loss": 1.4791, "step": 307400 }, { "epoch": 99.38590820943762, "grad_norm": 16.33941078186035, "learning_rate": 0.001, "loss": 1.5016, "step": 307500 }, { "epoch": 99.41822882999354, "grad_norm": 15.466409683227539, "learning_rate": 0.001, "loss": 1.4953, "step": 307600 }, { "epoch": 99.45054945054945, "grad_norm": 1.7408664226531982, "learning_rate": 0.001, "loss": 1.4892, "step": 307700 }, { "epoch": 99.48287007110537, "grad_norm": 2.4915411472320557, "learning_rate": 0.001, "loss": 1.4957, "step": 307800 }, { "epoch": 99.51519069166127, "grad_norm": 1.3973917961120605, "learning_rate": 0.001, "loss": 1.5124, "step": 307900 }, { "epoch": 99.5475113122172, "grad_norm": 1.6027195453643799, "learning_rate": 0.001, "loss": 1.5004, "step": 308000 }, { "epoch": 99.5798319327731, "grad_norm": 1.42005455493927, "learning_rate": 0.001, "loss": 1.5197, "step": 308100 }, { "epoch": 99.61215255332903, "grad_norm": 1.9192101955413818, "learning_rate": 0.001, "loss": 1.518, "step": 308200 }, { "epoch": 99.64447317388493, "grad_norm": 1.6204633712768555, "learning_rate": 0.001, "loss": 1.5091, "step": 308300 }, { "epoch": 99.67679379444085, "grad_norm": 1.4960740804672241, "learning_rate": 0.001, "loss": 1.5188, "step": 308400 }, { "epoch": 99.70911441499676, "grad_norm": 1.8098118305206299, "learning_rate": 0.001, "loss": 1.5222, "step": 308500 }, { "epoch": 99.74143503555268, "grad_norm": 3.732240915298462, "learning_rate": 0.001, "loss": 1.5251, "step": 308600 }, { "epoch": 99.77375565610859, "grad_norm": 2.231865406036377, "learning_rate": 0.001, "loss": 1.5284, "step": 308700 }, { "epoch": 99.80607627666451, "grad_norm": 2.645705223083496, "learning_rate": 0.001, "loss": 1.5143, "step": 308800 }, { "epoch": 99.83839689722042, "grad_norm": 1.2635338306427002, "learning_rate": 0.001, "loss": 1.5285, "step": 308900 }, { "epoch": 99.87071751777634, "grad_norm": 1.598740816116333, "learning_rate": 0.001, "loss": 1.5382, "step": 309000 }, { "epoch": 99.90303813833225, "grad_norm": 1.4834953546524048, "learning_rate": 0.001, "loss": 1.5417, "step": 309100 }, { "epoch": 99.93535875888817, "grad_norm": 1.325324535369873, "learning_rate": 0.001, "loss": 1.5365, "step": 309200 }, { "epoch": 99.96767937944408, "grad_norm": 3.42002534866333, "learning_rate": 0.001, "loss": 1.5447, "step": 309300 }, { "epoch": 100.0, "grad_norm": 2.38211989402771, "learning_rate": 0.001, "loss": 1.4991, "step": 309400 }, { "epoch": 100.0, "step": 309400, "total_flos": 6.968427992064e+17, "train_loss": 2.023799631258096, "train_runtime": 29940.4391, "train_samples_per_second": 330.656, "train_steps_per_second": 10.334 } ], "logging_steps": 100, "max_steps": 309400, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.968427992064e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }