{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 2783, "global_step": 579600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004830917874396135, "grad_norm": 2.5537285804748535, "learning_rate": 3.9539899352983465e-05, "loss": 5.5989, "step": 56 }, { "epoch": 0.00966183574879227, "grad_norm": 0.9791821837425232, "learning_rate": 7.979870596693027e-05, "loss": 4.2859, "step": 112 }, { "epoch": 0.014492753623188406, "grad_norm": 1.1985901594161987, "learning_rate": 0.00012005751258087708, "loss": 4.0112, "step": 168 }, { "epoch": 0.01932367149758454, "grad_norm": 1.6471028327941895, "learning_rate": 0.00016031631919482387, "loss": 3.8553, "step": 224 }, { "epoch": 0.024154589371980676, "grad_norm": 2.3778419494628906, "learning_rate": 0.0002005751258087707, "loss": 3.7549, "step": 280 }, { "epoch": 0.028985507246376812, "grad_norm": 2.404374122619629, "learning_rate": 0.0002408339324227175, "loss": 3.6717, "step": 336 }, { "epoch": 0.033816425120772944, "grad_norm": 2.839930295944214, "learning_rate": 0.00028109273903666426, "loss": 3.6109, "step": 392 }, { "epoch": 0.03864734299516908, "grad_norm": 10.898863792419434, "learning_rate": 0.0003213515456506111, "loss": 3.5459, "step": 448 }, { "epoch": 0.043478260869565216, "grad_norm": 1.387969732284546, "learning_rate": 0.0003616103522645579, "loss": 3.4739, "step": 504 }, { "epoch": 0.04830917874396135, "grad_norm": 4.0545196533203125, "learning_rate": 0.00040186915887850466, "loss": 3.419, "step": 560 }, { "epoch": 0.05314009661835749, "grad_norm": 2.1500046253204346, "learning_rate": 0.0004421279654924515, "loss": 3.3747, "step": 616 }, { "epoch": 0.057971014492753624, "grad_norm": 2.3752946853637695, "learning_rate": 0.0004823867721063983, "loss": 3.3339, "step": 672 }, { "epoch": 0.06280193236714976, "grad_norm": 2.856977701187134, "learning_rate": 0.0005226455787203452, "loss": 3.3246, "step": 728 }, { "epoch": 0.06763285024154589, "grad_norm": 1.4701484441757202, "learning_rate": 0.0005629043853342918, "loss": 3.2797, "step": 784 }, { "epoch": 0.07246376811594203, "grad_norm": 1.2803332805633545, "learning_rate": 0.0006031631919482386, "loss": 3.2239, "step": 840 }, { "epoch": 0.07729468599033816, "grad_norm": 1.0485491752624512, "learning_rate": 0.0006434219985621856, "loss": 3.172, "step": 896 }, { "epoch": 0.0821256038647343, "grad_norm": 1.3146588802337646, "learning_rate": 0.0006836808051761322, "loss": 3.168, "step": 952 }, { "epoch": 0.08695652173913043, "grad_norm": 1.1967456340789795, "learning_rate": 0.000723939611790079, "loss": 3.164, "step": 1008 }, { "epoch": 0.09178743961352658, "grad_norm": 4.6934051513671875, "learning_rate": 0.000764198418404026, "loss": 3.1539, "step": 1064 }, { "epoch": 0.0966183574879227, "grad_norm": 1.1069180965423584, "learning_rate": 0.0008044572250179727, "loss": 3.151, "step": 1120 }, { "epoch": 0.10144927536231885, "grad_norm": 1.1855157613754272, "learning_rate": 0.0008447160316319194, "loss": 3.1367, "step": 1176 }, { "epoch": 0.10628019323671498, "grad_norm": 2.2205240726470947, "learning_rate": 0.0008849748382458663, "loss": 3.1277, "step": 1232 }, { "epoch": 0.1111111111111111, "grad_norm": 16.359506607055664, "learning_rate": 0.0009252336448598131, "loss": 3.1434, "step": 1288 }, { "epoch": 0.11594202898550725, "grad_norm": 1.0570050477981567, "learning_rate": 0.0009654924514737598, "loss": 3.1172, "step": 1344 }, { "epoch": 0.12077294685990338, "grad_norm": 1.5385373830795288, "learning_rate": 0.001, "loss": 3.0791, "step": 1400 }, { "epoch": 0.12560386473429952, "grad_norm": 2.051922082901001, "learning_rate": 0.001, "loss": 3.0802, "step": 1456 }, { "epoch": 0.13043478260869565, "grad_norm": 0.982364296913147, "learning_rate": 0.001, "loss": 3.0863, "step": 1512 }, { "epoch": 0.13526570048309178, "grad_norm": 0.7527421116828918, "learning_rate": 0.001, "loss": 3.0592, "step": 1568 }, { "epoch": 0.14009661835748793, "grad_norm": 0.9949536919593811, "learning_rate": 0.001, "loss": 3.0478, "step": 1624 }, { "epoch": 0.14492753623188406, "grad_norm": 0.9814801216125488, "learning_rate": 0.001, "loss": 3.0359, "step": 1680 }, { "epoch": 0.1497584541062802, "grad_norm": 0.5726284980773926, "learning_rate": 0.001, "loss": 3.0048, "step": 1736 }, { "epoch": 0.15458937198067632, "grad_norm": 0.730490505695343, "learning_rate": 0.001, "loss": 2.9867, "step": 1792 }, { "epoch": 0.15942028985507245, "grad_norm": 0.7469838261604309, "learning_rate": 0.001, "loss": 2.9971, "step": 1848 }, { "epoch": 0.1642512077294686, "grad_norm": 2.1876564025878906, "learning_rate": 0.001, "loss": 2.9858, "step": 1904 }, { "epoch": 0.16908212560386474, "grad_norm": 0.5412757396697998, "learning_rate": 0.001, "loss": 2.9758, "step": 1960 }, { "epoch": 0.17391304347826086, "grad_norm": 0.6346341371536255, "learning_rate": 0.001, "loss": 2.9901, "step": 2016 }, { "epoch": 0.178743961352657, "grad_norm": 1.2752057313919067, "learning_rate": 0.001, "loss": 2.9721, "step": 2072 }, { "epoch": 0.18357487922705315, "grad_norm": 0.5239083170890808, "learning_rate": 0.001, "loss": 2.9763, "step": 2128 }, { "epoch": 0.18840579710144928, "grad_norm": 0.7067678570747375, "learning_rate": 0.001, "loss": 2.9666, "step": 2184 }, { "epoch": 0.1932367149758454, "grad_norm": 0.9121713638305664, "learning_rate": 0.001, "loss": 2.9591, "step": 2240 }, { "epoch": 0.19806763285024154, "grad_norm": 0.545002818107605, "learning_rate": 0.001, "loss": 2.946, "step": 2296 }, { "epoch": 0.2028985507246377, "grad_norm": 1.5359148979187012, "learning_rate": 0.001, "loss": 2.9436, "step": 2352 }, { "epoch": 0.20772946859903382, "grad_norm": 1.2984882593154907, "learning_rate": 0.001, "loss": 2.9719, "step": 2408 }, { "epoch": 0.21256038647342995, "grad_norm": 0.5250646471977234, "learning_rate": 0.001, "loss": 2.957, "step": 2464 }, { "epoch": 0.21739130434782608, "grad_norm": 0.6154928803443909, "learning_rate": 0.001, "loss": 2.9321, "step": 2520 }, { "epoch": 0.2222222222222222, "grad_norm": 0.8087754249572754, "learning_rate": 0.001, "loss": 2.9349, "step": 2576 }, { "epoch": 0.22705314009661837, "grad_norm": 0.6237892508506775, "learning_rate": 0.001, "loss": 2.9337, "step": 2632 }, { "epoch": 0.2318840579710145, "grad_norm": 0.4661741852760315, "learning_rate": 0.001, "loss": 2.9089, "step": 2688 }, { "epoch": 0.23671497584541062, "grad_norm": 0.5616157054901123, "learning_rate": 0.001, "loss": 2.897, "step": 2744 }, { "epoch": 0.24154589371980675, "grad_norm": 14.491456031799316, "learning_rate": 0.001, "loss": 2.8879, "step": 2800 }, { "epoch": 0.2463768115942029, "grad_norm": 1.5201140642166138, "learning_rate": 0.001, "loss": 2.891, "step": 2856 }, { "epoch": 0.25120772946859904, "grad_norm": 1.0428102016448975, "learning_rate": 0.001, "loss": 2.9029, "step": 2912 }, { "epoch": 0.2560386473429952, "grad_norm": 0.6364895701408386, "learning_rate": 0.001, "loss": 2.8891, "step": 2968 }, { "epoch": 0.2608695652173913, "grad_norm": 0.5871291756629944, "learning_rate": 0.001, "loss": 2.8822, "step": 3024 }, { "epoch": 0.26570048309178745, "grad_norm": 1.1912660598754883, "learning_rate": 0.001, "loss": 2.8918, "step": 3080 }, { "epoch": 0.27053140096618356, "grad_norm": 0.6245777606964111, "learning_rate": 0.001, "loss": 2.8923, "step": 3136 }, { "epoch": 0.2753623188405797, "grad_norm": 0.5338204503059387, "learning_rate": 0.001, "loss": 2.8884, "step": 3192 }, { "epoch": 0.28019323671497587, "grad_norm": 1.2675994634628296, "learning_rate": 0.001, "loss": 2.8719, "step": 3248 }, { "epoch": 0.28502415458937197, "grad_norm": 0.5750728845596313, "learning_rate": 0.001, "loss": 2.8696, "step": 3304 }, { "epoch": 0.2898550724637681, "grad_norm": 0.5912678837776184, "learning_rate": 0.001, "loss": 2.865, "step": 3360 }, { "epoch": 0.2946859903381642, "grad_norm": 1.8540053367614746, "learning_rate": 0.001, "loss": 2.8636, "step": 3416 }, { "epoch": 0.2995169082125604, "grad_norm": 0.8271320462226868, "learning_rate": 0.001, "loss": 2.865, "step": 3472 }, { "epoch": 0.30434782608695654, "grad_norm": 0.6457290649414062, "learning_rate": 0.001, "loss": 2.8557, "step": 3528 }, { "epoch": 0.30917874396135264, "grad_norm": 0.5149587988853455, "learning_rate": 0.001, "loss": 2.8526, "step": 3584 }, { "epoch": 0.3140096618357488, "grad_norm": 0.39953938126564026, "learning_rate": 0.001, "loss": 2.8266, "step": 3640 }, { "epoch": 0.3188405797101449, "grad_norm": 0.5023137927055359, "learning_rate": 0.001, "loss": 2.8189, "step": 3696 }, { "epoch": 0.32367149758454106, "grad_norm": 0.4729611575603485, "learning_rate": 0.001, "loss": 2.8405, "step": 3752 }, { "epoch": 0.3285024154589372, "grad_norm": 0.5361127853393555, "learning_rate": 0.001, "loss": 2.822, "step": 3808 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5213225483894348, "learning_rate": 0.001, "loss": 2.8117, "step": 3864 }, { "epoch": 0.33816425120772947, "grad_norm": 0.4088309705257416, "learning_rate": 0.001, "loss": 2.8038, "step": 3920 }, { "epoch": 0.34299516908212563, "grad_norm": 0.5374923348426819, "learning_rate": 0.001, "loss": 2.817, "step": 3976 }, { "epoch": 0.34782608695652173, "grad_norm": 1.1506937742233276, "learning_rate": 0.001, "loss": 2.8348, "step": 4032 }, { "epoch": 0.3526570048309179, "grad_norm": 26.974454879760742, "learning_rate": 0.001, "loss": 2.8107, "step": 4088 }, { "epoch": 0.357487922705314, "grad_norm": 0.4402843713760376, "learning_rate": 0.001, "loss": 2.7993, "step": 4144 }, { "epoch": 0.36231884057971014, "grad_norm": 0.6160932183265686, "learning_rate": 0.001, "loss": 2.7813, "step": 4200 }, { "epoch": 0.3671497584541063, "grad_norm": 0.4454444646835327, "learning_rate": 0.001, "loss": 2.8068, "step": 4256 }, { "epoch": 0.3719806763285024, "grad_norm": 0.5421523451805115, "learning_rate": 0.001, "loss": 2.8237, "step": 4312 }, { "epoch": 0.37681159420289856, "grad_norm": 2.7624282836914062, "learning_rate": 0.001, "loss": 2.8097, "step": 4368 }, { "epoch": 0.38164251207729466, "grad_norm": 1.5045050382614136, "learning_rate": 0.001, "loss": 2.79, "step": 4424 }, { "epoch": 0.3864734299516908, "grad_norm": 0.4124404191970825, "learning_rate": 0.001, "loss": 2.782, "step": 4480 }, { "epoch": 0.391304347826087, "grad_norm": 0.4538789987564087, "learning_rate": 0.001, "loss": 2.7813, "step": 4536 }, { "epoch": 0.3961352657004831, "grad_norm": 0.6743329167366028, "learning_rate": 0.001, "loss": 2.7994, "step": 4592 }, { "epoch": 0.40096618357487923, "grad_norm": 1.868725061416626, "learning_rate": 0.001, "loss": 2.7789, "step": 4648 }, { "epoch": 0.4057971014492754, "grad_norm": 0.7265204191207886, "learning_rate": 0.001, "loss": 2.8014, "step": 4704 }, { "epoch": 0.4106280193236715, "grad_norm": 0.8547886610031128, "learning_rate": 0.001, "loss": 2.7869, "step": 4760 }, { "epoch": 0.41545893719806765, "grad_norm": 2.316642999649048, "learning_rate": 0.001, "loss": 2.7914, "step": 4816 }, { "epoch": 0.42028985507246375, "grad_norm": 0.5473759770393372, "learning_rate": 0.001, "loss": 2.7652, "step": 4872 }, { "epoch": 0.4251207729468599, "grad_norm": 0.42073145508766174, "learning_rate": 0.001, "loss": 2.7829, "step": 4928 }, { "epoch": 0.42995169082125606, "grad_norm": 0.40427178144454956, "learning_rate": 0.001, "loss": 2.7786, "step": 4984 }, { "epoch": 0.43478260869565216, "grad_norm": 2.3570165634155273, "learning_rate": 0.001, "loss": 2.7673, "step": 5040 }, { "epoch": 0.4396135265700483, "grad_norm": 0.5806084275245667, "learning_rate": 0.001, "loss": 2.7583, "step": 5096 }, { "epoch": 0.4444444444444444, "grad_norm": 0.4433354139328003, "learning_rate": 0.001, "loss": 2.7771, "step": 5152 }, { "epoch": 0.4492753623188406, "grad_norm": 0.5527592897415161, "learning_rate": 0.001, "loss": 2.7567, "step": 5208 }, { "epoch": 0.45410628019323673, "grad_norm": 0.5190341472625732, "learning_rate": 0.001, "loss": 2.7683, "step": 5264 }, { "epoch": 0.45893719806763283, "grad_norm": 0.7538208365440369, "learning_rate": 0.001, "loss": 2.7646, "step": 5320 }, { "epoch": 0.463768115942029, "grad_norm": 0.5732608437538147, "learning_rate": 0.001, "loss": 2.7684, "step": 5376 }, { "epoch": 0.46859903381642515, "grad_norm": 0.9430089592933655, "learning_rate": 0.001, "loss": 2.7726, "step": 5432 }, { "epoch": 0.47342995169082125, "grad_norm": 0.4525170922279358, "learning_rate": 0.001, "loss": 2.7672, "step": 5488 }, { "epoch": 0.4782608695652174, "grad_norm": 1.7289822101593018, "learning_rate": 0.001, "loss": 2.7696, "step": 5544 }, { "epoch": 0.4830917874396135, "grad_norm": 0.7026426792144775, "learning_rate": 0.001, "loss": 2.7558, "step": 5600 }, { "epoch": 0.48792270531400966, "grad_norm": 1.3366003036499023, "learning_rate": 0.001, "loss": 2.7427, "step": 5656 }, { "epoch": 0.4927536231884058, "grad_norm": 0.5988692045211792, "learning_rate": 0.001, "loss": 2.7447, "step": 5712 }, { "epoch": 0.4975845410628019, "grad_norm": 0.421079158782959, "learning_rate": 0.001, "loss": 2.7457, "step": 5768 }, { "epoch": 0.5024154589371981, "grad_norm": 0.6967017650604248, "learning_rate": 0.001, "loss": 2.752, "step": 5824 }, { "epoch": 0.5072463768115942, "grad_norm": 0.520272433757782, "learning_rate": 0.001, "loss": 2.7445, "step": 5880 }, { "epoch": 0.5120772946859904, "grad_norm": 0.5526834726333618, "learning_rate": 0.001, "loss": 2.7273, "step": 5936 }, { "epoch": 0.5169082125603864, "grad_norm": 0.44426271319389343, "learning_rate": 0.001, "loss": 2.7303, "step": 5992 }, { "epoch": 0.5217391304347826, "grad_norm": 0.600886881351471, "learning_rate": 0.001, "loss": 2.7161, "step": 6048 }, { "epoch": 0.5265700483091788, "grad_norm": 0.4419015049934387, "learning_rate": 0.001, "loss": 2.7262, "step": 6104 }, { "epoch": 0.5314009661835749, "grad_norm": 0.6358337998390198, "learning_rate": 0.001, "loss": 2.7241, "step": 6160 }, { "epoch": 0.5362318840579711, "grad_norm": 0.7351999282836914, "learning_rate": 0.001, "loss": 2.7136, "step": 6216 }, { "epoch": 0.5410628019323671, "grad_norm": 0.3952183127403259, "learning_rate": 0.001, "loss": 2.7197, "step": 6272 }, { "epoch": 0.5458937198067633, "grad_norm": 0.7855304479598999, "learning_rate": 0.001, "loss": 2.7155, "step": 6328 }, { "epoch": 0.5507246376811594, "grad_norm": 0.4529874920845032, "learning_rate": 0.001, "loss": 2.7097, "step": 6384 }, { "epoch": 0.5555555555555556, "grad_norm": 0.43028897047042847, "learning_rate": 0.001, "loss": 2.7161, "step": 6440 }, { "epoch": 0.5603864734299517, "grad_norm": 0.7543787360191345, "learning_rate": 0.001, "loss": 2.7073, "step": 6496 }, { "epoch": 0.5652173913043478, "grad_norm": 0.4770491421222687, "learning_rate": 0.001, "loss": 2.705, "step": 6552 }, { "epoch": 0.5700483091787439, "grad_norm": 0.5735542178153992, "learning_rate": 0.001, "loss": 2.698, "step": 6608 }, { "epoch": 0.5748792270531401, "grad_norm": 0.4636959731578827, "learning_rate": 0.001, "loss": 2.7053, "step": 6664 }, { "epoch": 0.5797101449275363, "grad_norm": 0.4329095184803009, "learning_rate": 0.001, "loss": 2.7017, "step": 6720 }, { "epoch": 0.5845410628019324, "grad_norm": 15.111337661743164, "learning_rate": 0.001, "loss": 2.6841, "step": 6776 }, { "epoch": 0.5893719806763285, "grad_norm": 1.0087028741836548, "learning_rate": 0.001, "loss": 2.6914, "step": 6832 }, { "epoch": 0.5942028985507246, "grad_norm": 0.38676732778549194, "learning_rate": 0.001, "loss": 2.6915, "step": 6888 }, { "epoch": 0.5990338164251208, "grad_norm": 0.5741737484931946, "learning_rate": 0.001, "loss": 2.6856, "step": 6944 }, { "epoch": 0.6038647342995169, "grad_norm": 0.5002315640449524, "learning_rate": 0.001, "loss": 2.6959, "step": 7000 }, { "epoch": 0.6086956521739131, "grad_norm": 0.5718513131141663, "learning_rate": 0.001, "loss": 2.7031, "step": 7056 }, { "epoch": 0.6135265700483091, "grad_norm": 0.5187315344810486, "learning_rate": 0.001, "loss": 2.7019, "step": 7112 }, { "epoch": 0.6183574879227053, "grad_norm": 0.7257050275802612, "learning_rate": 0.001, "loss": 2.6985, "step": 7168 }, { "epoch": 0.6231884057971014, "grad_norm": 0.6796103715896606, "learning_rate": 0.001, "loss": 2.7002, "step": 7224 }, { "epoch": 0.6280193236714976, "grad_norm": 0.6530753970146179, "learning_rate": 0.001, "loss": 2.6943, "step": 7280 }, { "epoch": 0.6328502415458938, "grad_norm": 0.6346811652183533, "learning_rate": 0.001, "loss": 2.7002, "step": 7336 }, { "epoch": 0.6376811594202898, "grad_norm": 2.5241458415985107, "learning_rate": 0.001, "loss": 2.6976, "step": 7392 }, { "epoch": 0.642512077294686, "grad_norm": 0.6433071494102478, "learning_rate": 0.001, "loss": 2.7005, "step": 7448 }, { "epoch": 0.6473429951690821, "grad_norm": 0.44189754128456116, "learning_rate": 0.001, "loss": 2.6813, "step": 7504 }, { "epoch": 0.6521739130434783, "grad_norm": 0.5451778769493103, "learning_rate": 0.001, "loss": 2.6796, "step": 7560 }, { "epoch": 0.6570048309178744, "grad_norm": 0.4468247890472412, "learning_rate": 0.001, "loss": 2.6873, "step": 7616 }, { "epoch": 0.6618357487922706, "grad_norm": 0.5533420443534851, "learning_rate": 0.001, "loss": 2.6867, "step": 7672 }, { "epoch": 0.6666666666666666, "grad_norm": 0.40803027153015137, "learning_rate": 0.001, "loss": 2.7191, "step": 7728 }, { "epoch": 0.6714975845410628, "grad_norm": 0.47552070021629333, "learning_rate": 0.001, "loss": 2.6886, "step": 7784 }, { "epoch": 0.6763285024154589, "grad_norm": 0.621493399143219, "learning_rate": 0.001, "loss": 2.6887, "step": 7840 }, { "epoch": 0.6811594202898551, "grad_norm": 0.8410442471504211, "learning_rate": 0.001, "loss": 2.6666, "step": 7896 }, { "epoch": 0.6859903381642513, "grad_norm": 0.8546284437179565, "learning_rate": 0.001, "loss": 2.667, "step": 7952 }, { "epoch": 0.6908212560386473, "grad_norm": 0.42941534519195557, "learning_rate": 0.001, "loss": 2.6665, "step": 8008 }, { "epoch": 0.6956521739130435, "grad_norm": 0.603959321975708, "learning_rate": 0.001, "loss": 2.6733, "step": 8064 }, { "epoch": 0.7004830917874396, "grad_norm": 0.6979280710220337, "learning_rate": 0.001, "loss": 2.665, "step": 8120 }, { "epoch": 0.7053140096618358, "grad_norm": 0.5473920702934265, "learning_rate": 0.001, "loss": 2.6592, "step": 8176 }, { "epoch": 0.7101449275362319, "grad_norm": 3.5187981128692627, "learning_rate": 0.001, "loss": 2.653, "step": 8232 }, { "epoch": 0.714975845410628, "grad_norm": 0.6898009181022644, "learning_rate": 0.001, "loss": 2.6634, "step": 8288 }, { "epoch": 0.7198067632850241, "grad_norm": 0.6146760582923889, "learning_rate": 0.001, "loss": 2.6719, "step": 8344 }, { "epoch": 0.7246376811594203, "grad_norm": 0.723409116268158, "learning_rate": 0.001, "loss": 2.6557, "step": 8400 }, { "epoch": 0.7294685990338164, "grad_norm": 0.7172557711601257, "learning_rate": 0.001, "loss": 2.6575, "step": 8456 }, { "epoch": 0.7342995169082126, "grad_norm": 0.5060122013092041, "learning_rate": 0.001, "loss": 2.6628, "step": 8512 }, { "epoch": 0.7391304347826086, "grad_norm": 1.128843069076538, "learning_rate": 0.001, "loss": 2.6571, "step": 8568 }, { "epoch": 0.7439613526570048, "grad_norm": 0.44131237268447876, "learning_rate": 0.001, "loss": 2.6568, "step": 8624 }, { "epoch": 0.748792270531401, "grad_norm": 1.0537309646606445, "learning_rate": 0.001, "loss": 2.6586, "step": 8680 }, { "epoch": 0.7536231884057971, "grad_norm": 0.5386660099029541, "learning_rate": 0.001, "loss": 2.6512, "step": 8736 }, { "epoch": 0.7584541062801933, "grad_norm": 0.43016335368156433, "learning_rate": 0.001, "loss": 2.6554, "step": 8792 }, { "epoch": 0.7632850241545893, "grad_norm": 2.5915980339050293, "learning_rate": 0.001, "loss": 2.6363, "step": 8848 }, { "epoch": 0.7681159420289855, "grad_norm": 0.533907949924469, "learning_rate": 0.001, "loss": 2.6539, "step": 8904 }, { "epoch": 0.7729468599033816, "grad_norm": 0.4950580894947052, "learning_rate": 0.001, "loss": 2.6495, "step": 8960 }, { "epoch": 0.7777777777777778, "grad_norm": 1.6670414209365845, "learning_rate": 0.001, "loss": 2.6701, "step": 9016 }, { "epoch": 0.782608695652174, "grad_norm": 0.44445034861564636, "learning_rate": 0.001, "loss": 2.6641, "step": 9072 }, { "epoch": 0.7874396135265701, "grad_norm": 0.5860577821731567, "learning_rate": 0.001, "loss": 2.6442, "step": 9128 }, { "epoch": 0.7922705314009661, "grad_norm": 0.3840193450450897, "learning_rate": 0.001, "loss": 2.6559, "step": 9184 }, { "epoch": 0.7971014492753623, "grad_norm": 0.9412776827812195, "learning_rate": 0.001, "loss": 2.6315, "step": 9240 }, { "epoch": 0.8019323671497585, "grad_norm": 0.4733305275440216, "learning_rate": 0.001, "loss": 2.6414, "step": 9296 }, { "epoch": 0.8067632850241546, "grad_norm": 0.7020483613014221, "learning_rate": 0.001, "loss": 2.6484, "step": 9352 }, { "epoch": 0.8115942028985508, "grad_norm": 0.6107551455497742, "learning_rate": 0.001, "loss": 2.6384, "step": 9408 }, { "epoch": 0.8164251207729468, "grad_norm": 0.49444472789764404, "learning_rate": 0.001, "loss": 2.6365, "step": 9464 }, { "epoch": 0.821256038647343, "grad_norm": 0.4061006009578705, "learning_rate": 0.001, "loss": 2.6277, "step": 9520 }, { "epoch": 0.8260869565217391, "grad_norm": 0.38213226199150085, "learning_rate": 0.001, "loss": 2.6328, "step": 9576 }, { "epoch": 0.8309178743961353, "grad_norm": 0.4887073040008545, "learning_rate": 0.001, "loss": 2.6279, "step": 9632 }, { "epoch": 0.8357487922705314, "grad_norm": 0.5472325682640076, "learning_rate": 0.001, "loss": 2.6209, "step": 9688 }, { "epoch": 0.8405797101449275, "grad_norm": 0.47439444065093994, "learning_rate": 0.001, "loss": 2.6298, "step": 9744 }, { "epoch": 0.8454106280193237, "grad_norm": 1.0408443212509155, "learning_rate": 0.001, "loss": 2.6428, "step": 9800 }, { "epoch": 0.8502415458937198, "grad_norm": 0.37197762727737427, "learning_rate": 0.001, "loss": 2.6288, "step": 9856 }, { "epoch": 0.855072463768116, "grad_norm": 0.6389931440353394, "learning_rate": 0.001, "loss": 2.6346, "step": 9912 }, { "epoch": 0.8599033816425121, "grad_norm": 0.42257222533226013, "learning_rate": 0.001, "loss": 2.6203, "step": 9968 }, { "epoch": 0.8647342995169082, "grad_norm": 0.5119916796684265, "learning_rate": 0.001, "loss": 2.6045, "step": 10024 }, { "epoch": 0.8695652173913043, "grad_norm": 0.40754446387290955, "learning_rate": 0.001, "loss": 2.6078, "step": 10080 }, { "epoch": 0.8743961352657005, "grad_norm": 1.4318445920944214, "learning_rate": 0.001, "loss": 2.6103, "step": 10136 }, { "epoch": 0.8792270531400966, "grad_norm": 0.47217634320259094, "learning_rate": 0.001, "loss": 2.6194, "step": 10192 }, { "epoch": 0.8840579710144928, "grad_norm": 0.44421687722206116, "learning_rate": 0.001, "loss": 2.614, "step": 10248 }, { "epoch": 0.8888888888888888, "grad_norm": 0.49661970138549805, "learning_rate": 0.001, "loss": 2.6052, "step": 10304 }, { "epoch": 0.893719806763285, "grad_norm": 30.351938247680664, "learning_rate": 0.001, "loss": 2.6135, "step": 10360 }, { "epoch": 0.8985507246376812, "grad_norm": 0.5131446123123169, "learning_rate": 0.001, "loss": 2.6159, "step": 10416 }, { "epoch": 0.9033816425120773, "grad_norm": 0.5762516856193542, "learning_rate": 0.001, "loss": 2.6145, "step": 10472 }, { "epoch": 0.9082125603864735, "grad_norm": 0.4128069579601288, "learning_rate": 0.001, "loss": 2.6181, "step": 10528 }, { "epoch": 0.9130434782608695, "grad_norm": 0.4651366174221039, "learning_rate": 0.001, "loss": 2.6083, "step": 10584 }, { "epoch": 0.9178743961352657, "grad_norm": 0.42533278465270996, "learning_rate": 0.001, "loss": 2.6271, "step": 10640 }, { "epoch": 0.9227053140096618, "grad_norm": 0.4388916492462158, "learning_rate": 0.001, "loss": 2.5972, "step": 10696 }, { "epoch": 0.927536231884058, "grad_norm": 0.5819514989852905, "learning_rate": 0.001, "loss": 2.5942, "step": 10752 }, { "epoch": 0.9323671497584541, "grad_norm": 0.5410124659538269, "learning_rate": 0.001, "loss": 2.5966, "step": 10808 }, { "epoch": 0.9371980676328503, "grad_norm": 2.474395751953125, "learning_rate": 0.001, "loss": 2.6074, "step": 10864 }, { "epoch": 0.9420289855072463, "grad_norm": 1.7280431985855103, "learning_rate": 0.001, "loss": 2.5975, "step": 10920 }, { "epoch": 0.9468599033816425, "grad_norm": 0.5472970008850098, "learning_rate": 0.001, "loss": 2.6011, "step": 10976 }, { "epoch": 0.9516908212560387, "grad_norm": 0.6419417858123779, "learning_rate": 0.001, "loss": 2.5942, "step": 11032 }, { "epoch": 0.9565217391304348, "grad_norm": 0.49544742703437805, "learning_rate": 0.001, "loss": 2.5985, "step": 11088 }, { "epoch": 0.961352657004831, "grad_norm": 0.41243302822113037, "learning_rate": 0.001, "loss": 2.5896, "step": 11144 }, { "epoch": 0.966183574879227, "grad_norm": 0.437237411737442, "learning_rate": 0.001, "loss": 2.5921, "step": 11200 }, { "epoch": 0.9710144927536232, "grad_norm": 0.4309455454349518, "learning_rate": 0.001, "loss": 2.5913, "step": 11256 }, { "epoch": 0.9758454106280193, "grad_norm": 0.49835360050201416, "learning_rate": 0.001, "loss": 2.5867, "step": 11312 }, { "epoch": 0.9806763285024155, "grad_norm": 0.7193912267684937, "learning_rate": 0.001, "loss": 2.5837, "step": 11368 }, { "epoch": 0.9855072463768116, "grad_norm": 1.3050037622451782, "learning_rate": 0.001, "loss": 2.5919, "step": 11424 }, { "epoch": 0.9903381642512077, "grad_norm": 0.41976985335350037, "learning_rate": 0.001, "loss": 2.5937, "step": 11480 }, { "epoch": 0.9951690821256038, "grad_norm": 0.41240671277046204, "learning_rate": 0.001, "loss": 2.5878, "step": 11536 }, { "epoch": 1.0, "grad_norm": 0.7706860899925232, "learning_rate": 0.001, "loss": 2.5806, "step": 11592 }, { "epoch": 1.0048309178743962, "grad_norm": 3.327430248260498, "learning_rate": 0.001, "loss": 2.5611, "step": 11648 }, { "epoch": 1.0096618357487923, "grad_norm": 0.4381028115749359, "learning_rate": 0.001, "loss": 2.5579, "step": 11704 }, { "epoch": 1.0144927536231885, "grad_norm": 0.36722248792648315, "learning_rate": 0.001, "loss": 2.5613, "step": 11760 }, { "epoch": 1.0193236714975846, "grad_norm": 0.5505610108375549, "learning_rate": 0.001, "loss": 2.5644, "step": 11816 }, { "epoch": 1.0241545893719808, "grad_norm": 0.3663302958011627, "learning_rate": 0.001, "loss": 2.5538, "step": 11872 }, { "epoch": 1.0289855072463767, "grad_norm": 0.7702949047088623, "learning_rate": 0.001, "loss": 2.5446, "step": 11928 }, { "epoch": 1.0338164251207729, "grad_norm": 0.5923094153404236, "learning_rate": 0.001, "loss": 2.5511, "step": 11984 }, { "epoch": 1.038647342995169, "grad_norm": 0.8538995981216431, "learning_rate": 0.001, "loss": 2.5559, "step": 12040 }, { "epoch": 1.0434782608695652, "grad_norm": 0.46077409386634827, "learning_rate": 0.001, "loss": 2.5492, "step": 12096 }, { "epoch": 1.0483091787439613, "grad_norm": 0.4534064829349518, "learning_rate": 0.001, "loss": 2.5517, "step": 12152 }, { "epoch": 1.0531400966183575, "grad_norm": 19.5181941986084, "learning_rate": 0.001, "loss": 2.5423, "step": 12208 }, { "epoch": 1.0579710144927537, "grad_norm": 0.5569064617156982, "learning_rate": 0.001, "loss": 2.5709, "step": 12264 }, { "epoch": 1.0628019323671498, "grad_norm": 1.4424077272415161, "learning_rate": 0.001, "loss": 2.5801, "step": 12320 }, { "epoch": 1.067632850241546, "grad_norm": 0.5305166244506836, "learning_rate": 0.001, "loss": 2.5554, "step": 12376 }, { "epoch": 1.0724637681159421, "grad_norm": 1.1172679662704468, "learning_rate": 0.001, "loss": 2.5635, "step": 12432 }, { "epoch": 1.077294685990338, "grad_norm": 0.6793813109397888, "learning_rate": 0.001, "loss": 2.5673, "step": 12488 }, { "epoch": 1.0821256038647342, "grad_norm": 1.1641849279403687, "learning_rate": 0.001, "loss": 2.5855, "step": 12544 }, { "epoch": 1.0869565217391304, "grad_norm": 0.5341097116470337, "learning_rate": 0.001, "loss": 2.5788, "step": 12600 }, { "epoch": 1.0917874396135265, "grad_norm": 0.49252578616142273, "learning_rate": 0.001, "loss": 2.5746, "step": 12656 }, { "epoch": 1.0966183574879227, "grad_norm": 0.7578367590904236, "learning_rate": 0.001, "loss": 2.5494, "step": 12712 }, { "epoch": 1.1014492753623188, "grad_norm": 0.7495577335357666, "learning_rate": 0.001, "loss": 2.5565, "step": 12768 }, { "epoch": 1.106280193236715, "grad_norm": 0.8345962762832642, "learning_rate": 0.001, "loss": 2.5549, "step": 12824 }, { "epoch": 1.1111111111111112, "grad_norm": 0.8163776993751526, "learning_rate": 0.001, "loss": 2.5496, "step": 12880 }, { "epoch": 1.1159420289855073, "grad_norm": 2.7103028297424316, "learning_rate": 0.001, "loss": 2.555, "step": 12936 }, { "epoch": 1.1207729468599035, "grad_norm": 1.8082388639450073, "learning_rate": 0.001, "loss": 2.5497, "step": 12992 }, { "epoch": 1.1256038647342996, "grad_norm": 0.5198656320571899, "learning_rate": 0.001, "loss": 2.5321, "step": 13048 }, { "epoch": 1.1304347826086956, "grad_norm": 0.7601255178451538, "learning_rate": 0.001, "loss": 2.5489, "step": 13104 }, { "epoch": 1.1352657004830917, "grad_norm": 0.5740835070610046, "learning_rate": 0.001, "loss": 2.5454, "step": 13160 }, { "epoch": 1.1400966183574879, "grad_norm": 0.6274489760398865, "learning_rate": 0.001, "loss": 2.532, "step": 13216 }, { "epoch": 1.144927536231884, "grad_norm": 0.5133732557296753, "learning_rate": 0.001, "loss": 2.5303, "step": 13272 }, { "epoch": 1.1497584541062802, "grad_norm": 0.6891068816184998, "learning_rate": 0.001, "loss": 2.5397, "step": 13328 }, { "epoch": 1.1545893719806763, "grad_norm": 0.6237878203392029, "learning_rate": 0.001, "loss": 2.5461, "step": 13384 }, { "epoch": 1.1594202898550725, "grad_norm": 0.9357394576072693, "learning_rate": 0.001, "loss": 2.5487, "step": 13440 }, { "epoch": 1.1642512077294687, "grad_norm": 0.7408100962638855, "learning_rate": 0.001, "loss": 2.542, "step": 13496 }, { "epoch": 1.1690821256038648, "grad_norm": 0.42847955226898193, "learning_rate": 0.001, "loss": 2.5479, "step": 13552 }, { "epoch": 1.1739130434782608, "grad_norm": 0.6769561767578125, "learning_rate": 0.001, "loss": 2.5479, "step": 13608 }, { "epoch": 1.178743961352657, "grad_norm": 1.5430186986923218, "learning_rate": 0.001, "loss": 2.5502, "step": 13664 }, { "epoch": 1.183574879227053, "grad_norm": 0.7028371691703796, "learning_rate": 0.001, "loss": 2.5502, "step": 13720 }, { "epoch": 1.1884057971014492, "grad_norm": 0.43584156036376953, "learning_rate": 0.001, "loss": 2.5536, "step": 13776 }, { "epoch": 1.1932367149758454, "grad_norm": 0.5009216666221619, "learning_rate": 0.001, "loss": 2.5383, "step": 13832 }, { "epoch": 1.1980676328502415, "grad_norm": 0.4713088274002075, "learning_rate": 0.001, "loss": 2.5316, "step": 13888 }, { "epoch": 1.2028985507246377, "grad_norm": 4.079718112945557, "learning_rate": 0.001, "loss": 2.5395, "step": 13944 }, { "epoch": 1.2077294685990339, "grad_norm": 0.5456583499908447, "learning_rate": 0.001, "loss": 2.5382, "step": 14000 }, { "epoch": 1.21256038647343, "grad_norm": 0.512478768825531, "learning_rate": 0.001, "loss": 2.5208, "step": 14056 }, { "epoch": 1.2173913043478262, "grad_norm": 0.3990437388420105, "learning_rate": 0.001, "loss": 2.5248, "step": 14112 }, { "epoch": 1.2222222222222223, "grad_norm": 0.6852943897247314, "learning_rate": 0.001, "loss": 2.5159, "step": 14168 }, { "epoch": 1.2270531400966185, "grad_norm": 0.6256563067436218, "learning_rate": 0.001, "loss": 2.5295, "step": 14224 }, { "epoch": 1.2318840579710144, "grad_norm": 0.6702936887741089, "learning_rate": 0.001, "loss": 2.5288, "step": 14280 }, { "epoch": 1.2367149758454106, "grad_norm": 0.5786619782447815, "learning_rate": 0.001, "loss": 2.537, "step": 14336 }, { "epoch": 1.2415458937198067, "grad_norm": 0.6425799131393433, "learning_rate": 0.001, "loss": 2.5313, "step": 14392 }, { "epoch": 1.2463768115942029, "grad_norm": 0.46116113662719727, "learning_rate": 0.001, "loss": 2.5306, "step": 14448 }, { "epoch": 1.251207729468599, "grad_norm": 0.52046799659729, "learning_rate": 0.001, "loss": 2.5407, "step": 14504 }, { "epoch": 1.2560386473429952, "grad_norm": 0.48209285736083984, "learning_rate": 0.001, "loss": 2.5422, "step": 14560 }, { "epoch": 1.2608695652173914, "grad_norm": 0.8265407681465149, "learning_rate": 0.001, "loss": 2.5347, "step": 14616 }, { "epoch": 1.2657004830917875, "grad_norm": 0.4520986080169678, "learning_rate": 0.001, "loss": 2.5347, "step": 14672 }, { "epoch": 1.2705314009661834, "grad_norm": 0.6868043541908264, "learning_rate": 0.001, "loss": 2.5379, "step": 14728 }, { "epoch": 1.2753623188405796, "grad_norm": 0.5073258280754089, "learning_rate": 0.001, "loss": 2.5232, "step": 14784 }, { "epoch": 1.2801932367149758, "grad_norm": 4.345489025115967, "learning_rate": 0.001, "loss": 2.5282, "step": 14840 }, { "epoch": 1.285024154589372, "grad_norm": 0.6298401951789856, "learning_rate": 0.001, "loss": 2.5255, "step": 14896 }, { "epoch": 1.289855072463768, "grad_norm": 8.941032409667969, "learning_rate": 0.001, "loss": 2.5527, "step": 14952 }, { "epoch": 1.2946859903381642, "grad_norm": 1.0845667123794556, "learning_rate": 0.001, "loss": 2.5406, "step": 15008 }, { "epoch": 1.2995169082125604, "grad_norm": 0.5030841827392578, "learning_rate": 0.001, "loss": 2.5283, "step": 15064 }, { "epoch": 1.3043478260869565, "grad_norm": 0.533234179019928, "learning_rate": 0.001, "loss": 2.5361, "step": 15120 }, { "epoch": 1.3091787439613527, "grad_norm": 1.023293375968933, "learning_rate": 0.001, "loss": 2.5345, "step": 15176 }, { "epoch": 1.3140096618357489, "grad_norm": 5.5791826248168945, "learning_rate": 0.001, "loss": 2.5309, "step": 15232 }, { "epoch": 1.318840579710145, "grad_norm": 4.967889308929443, "learning_rate": 0.001, "loss": 2.5395, "step": 15288 }, { "epoch": 1.3236714975845412, "grad_norm": 5.1368889808654785, "learning_rate": 0.001, "loss": 2.5617, "step": 15344 }, { "epoch": 1.3285024154589373, "grad_norm": 1.534320592880249, "learning_rate": 0.001, "loss": 2.5659, "step": 15400 }, { "epoch": 1.3333333333333333, "grad_norm": 0.9548292756080627, "learning_rate": 0.001, "loss": 2.5445, "step": 15456 }, { "epoch": 1.3381642512077294, "grad_norm": 0.7324444055557251, "learning_rate": 0.001, "loss": 2.5397, "step": 15512 }, { "epoch": 1.3429951690821256, "grad_norm": 3.2579472064971924, "learning_rate": 0.001, "loss": 2.5363, "step": 15568 }, { "epoch": 1.3478260869565217, "grad_norm": 0.6363402605056763, "learning_rate": 0.001, "loss": 2.5334, "step": 15624 }, { "epoch": 1.3526570048309179, "grad_norm": 0.5935930013656616, "learning_rate": 0.001, "loss": 2.532, "step": 15680 }, { "epoch": 1.357487922705314, "grad_norm": 0.8492759466171265, "learning_rate": 0.001, "loss": 2.5298, "step": 15736 }, { "epoch": 1.3623188405797102, "grad_norm": 0.5029017925262451, "learning_rate": 0.001, "loss": 2.5355, "step": 15792 }, { "epoch": 1.3671497584541064, "grad_norm": 0.5794525742530823, "learning_rate": 0.001, "loss": 2.5371, "step": 15848 }, { "epoch": 1.3719806763285023, "grad_norm": 1.3159703016281128, "learning_rate": 0.001, "loss": 2.524, "step": 15904 }, { "epoch": 1.3768115942028984, "grad_norm": 0.4209741950035095, "learning_rate": 0.001, "loss": 2.5255, "step": 15960 }, { "epoch": 1.3816425120772946, "grad_norm": 0.5418590307235718, "learning_rate": 0.001, "loss": 2.5166, "step": 16016 }, { "epoch": 1.3864734299516908, "grad_norm": 0.7169878482818604, "learning_rate": 0.001, "loss": 2.5188, "step": 16072 }, { "epoch": 1.391304347826087, "grad_norm": 0.5207206010818481, "learning_rate": 0.001, "loss": 2.5142, "step": 16128 }, { "epoch": 1.396135265700483, "grad_norm": 0.6924039721488953, "learning_rate": 0.001, "loss": 2.5015, "step": 16184 }, { "epoch": 1.4009661835748792, "grad_norm": 1.1841403245925903, "learning_rate": 0.001, "loss": 2.5206, "step": 16240 }, { "epoch": 1.4057971014492754, "grad_norm": 0.43086743354797363, "learning_rate": 0.001, "loss": 2.511, "step": 16296 }, { "epoch": 1.4106280193236715, "grad_norm": 0.47768867015838623, "learning_rate": 0.001, "loss": 2.5183, "step": 16352 }, { "epoch": 1.4154589371980677, "grad_norm": 1.4415768384933472, "learning_rate": 0.001, "loss": 2.51, "step": 16408 }, { "epoch": 1.4202898550724639, "grad_norm": 0.9194180965423584, "learning_rate": 0.001, "loss": 2.5097, "step": 16464 }, { "epoch": 1.42512077294686, "grad_norm": 0.4032771587371826, "learning_rate": 0.001, "loss": 2.5214, "step": 16520 }, { "epoch": 1.4299516908212562, "grad_norm": 0.3488192558288574, "learning_rate": 0.001, "loss": 2.5076, "step": 16576 }, { "epoch": 1.434782608695652, "grad_norm": 0.4139269292354584, "learning_rate": 0.001, "loss": 2.5203, "step": 16632 }, { "epoch": 1.4396135265700483, "grad_norm": 0.4293974041938782, "learning_rate": 0.001, "loss": 2.513, "step": 16688 }, { "epoch": 1.4444444444444444, "grad_norm": 0.5713045001029968, "learning_rate": 0.001, "loss": 2.5055, "step": 16744 }, { "epoch": 1.4492753623188406, "grad_norm": 0.6148470640182495, "learning_rate": 0.001, "loss": 2.5064, "step": 16800 }, { "epoch": 1.4541062801932367, "grad_norm": 0.6875100135803223, "learning_rate": 0.001, "loss": 2.5174, "step": 16856 }, { "epoch": 1.458937198067633, "grad_norm": 0.959662914276123, "learning_rate": 0.001, "loss": 2.5159, "step": 16912 }, { "epoch": 1.463768115942029, "grad_norm": 0.7817173600196838, "learning_rate": 0.001, "loss": 2.5173, "step": 16968 }, { "epoch": 1.4685990338164252, "grad_norm": 0.9040431976318359, "learning_rate": 0.001, "loss": 2.5087, "step": 17024 }, { "epoch": 1.4734299516908211, "grad_norm": 0.4779967665672302, "learning_rate": 0.001, "loss": 2.5099, "step": 17080 }, { "epoch": 1.4782608695652173, "grad_norm": 2.2653839588165283, "learning_rate": 0.001, "loss": 2.5015, "step": 17136 }, { "epoch": 1.4830917874396135, "grad_norm": 0.6779986023902893, "learning_rate": 0.001, "loss": 2.517, "step": 17192 }, { "epoch": 1.4879227053140096, "grad_norm": 0.8404067754745483, "learning_rate": 0.001, "loss": 2.5209, "step": 17248 }, { "epoch": 1.4927536231884058, "grad_norm": 2.1037940979003906, "learning_rate": 0.001, "loss": 2.5054, "step": 17304 }, { "epoch": 1.497584541062802, "grad_norm": 0.5338457226753235, "learning_rate": 0.001, "loss": 2.5078, "step": 17360 }, { "epoch": 1.502415458937198, "grad_norm": 0.6839510798454285, "learning_rate": 0.001, "loss": 2.5158, "step": 17416 }, { "epoch": 1.5072463768115942, "grad_norm": 0.6963435411453247, "learning_rate": 0.001, "loss": 2.5223, "step": 17472 }, { "epoch": 1.5120772946859904, "grad_norm": 0.7761881351470947, "learning_rate": 0.001, "loss": 2.5113, "step": 17528 }, { "epoch": 1.5169082125603865, "grad_norm": 0.4439939856529236, "learning_rate": 0.001, "loss": 2.5035, "step": 17584 }, { "epoch": 1.5217391304347827, "grad_norm": 8.835694313049316, "learning_rate": 0.001, "loss": 2.5081, "step": 17640 }, { "epoch": 1.5265700483091789, "grad_norm": 0.654516339302063, "learning_rate": 0.001, "loss": 2.5211, "step": 17696 }, { "epoch": 1.531400966183575, "grad_norm": 0.7336230874061584, "learning_rate": 0.001, "loss": 2.4982, "step": 17752 }, { "epoch": 1.5362318840579712, "grad_norm": 1.263433575630188, "learning_rate": 0.001, "loss": 2.4997, "step": 17808 }, { "epoch": 1.541062801932367, "grad_norm": 0.46169614791870117, "learning_rate": 0.001, "loss": 2.5066, "step": 17864 }, { "epoch": 1.5458937198067633, "grad_norm": 1.5880149602890015, "learning_rate": 0.001, "loss": 2.4869, "step": 17920 }, { "epoch": 1.5507246376811594, "grad_norm": 0.6592299342155457, "learning_rate": 0.001, "loss": 2.4857, "step": 17976 }, { "epoch": 1.5555555555555556, "grad_norm": 0.42128682136535645, "learning_rate": 0.001, "loss": 2.493, "step": 18032 }, { "epoch": 1.5603864734299517, "grad_norm": 0.7555729746818542, "learning_rate": 0.001, "loss": 2.4786, "step": 18088 }, { "epoch": 1.5652173913043477, "grad_norm": 0.6741869449615479, "learning_rate": 0.001, "loss": 2.4889, "step": 18144 }, { "epoch": 1.5700483091787438, "grad_norm": 0.6820804476737976, "learning_rate": 0.001, "loss": 2.5124, "step": 18200 }, { "epoch": 1.57487922705314, "grad_norm": 0.7633949518203735, "learning_rate": 0.001, "loss": 2.4979, "step": 18256 }, { "epoch": 1.5797101449275361, "grad_norm": 0.4473211169242859, "learning_rate": 0.001, "loss": 2.5009, "step": 18312 }, { "epoch": 1.5845410628019323, "grad_norm": 0.6472347378730774, "learning_rate": 0.001, "loss": 2.4978, "step": 18368 }, { "epoch": 1.5893719806763285, "grad_norm": 0.46679043769836426, "learning_rate": 0.001, "loss": 2.4903, "step": 18424 }, { "epoch": 1.5942028985507246, "grad_norm": 1.9245352745056152, "learning_rate": 0.001, "loss": 2.494, "step": 18480 }, { "epoch": 1.5990338164251208, "grad_norm": 2.9718055725097656, "learning_rate": 0.001, "loss": 2.4925, "step": 18536 }, { "epoch": 1.603864734299517, "grad_norm": 0.7821285724639893, "learning_rate": 0.001, "loss": 2.4897, "step": 18592 }, { "epoch": 1.608695652173913, "grad_norm": 0.39512935280799866, "learning_rate": 0.001, "loss": 2.4837, "step": 18648 }, { "epoch": 1.6135265700483092, "grad_norm": 201.1911163330078, "learning_rate": 0.001, "loss": 2.4799, "step": 18704 }, { "epoch": 1.6183574879227054, "grad_norm": 0.4764461815357208, "learning_rate": 0.001, "loss": 2.4948, "step": 18760 }, { "epoch": 1.6231884057971016, "grad_norm": 0.422262042760849, "learning_rate": 0.001, "loss": 2.4756, "step": 18816 }, { "epoch": 1.6280193236714977, "grad_norm": 0.7426127791404724, "learning_rate": 0.001, "loss": 2.4776, "step": 18872 }, { "epoch": 1.6328502415458939, "grad_norm": 0.6004542708396912, "learning_rate": 0.001, "loss": 2.4736, "step": 18928 }, { "epoch": 1.6376811594202898, "grad_norm": 0.511694073677063, "learning_rate": 0.001, "loss": 2.4928, "step": 18984 }, { "epoch": 1.642512077294686, "grad_norm": 0.9940207600593567, "learning_rate": 0.001, "loss": 2.5022, "step": 19040 }, { "epoch": 1.6473429951690821, "grad_norm": 0.5756489634513855, "learning_rate": 0.001, "loss": 2.4926, "step": 19096 }, { "epoch": 1.6521739130434783, "grad_norm": 0.7159668207168579, "learning_rate": 0.001, "loss": 2.5008, "step": 19152 }, { "epoch": 1.6570048309178744, "grad_norm": 0.6493143439292908, "learning_rate": 0.001, "loss": 2.5039, "step": 19208 }, { "epoch": 1.6618357487922706, "grad_norm": 0.5644376277923584, "learning_rate": 0.001, "loss": 2.4634, "step": 19264 }, { "epoch": 1.6666666666666665, "grad_norm": 1.4159189462661743, "learning_rate": 0.001, "loss": 2.4742, "step": 19320 }, { "epoch": 1.6714975845410627, "grad_norm": 1.2761787176132202, "learning_rate": 0.001, "loss": 2.5077, "step": 19376 }, { "epoch": 1.6763285024154588, "grad_norm": 0.5674095153808594, "learning_rate": 0.001, "loss": 2.4865, "step": 19432 }, { "epoch": 1.681159420289855, "grad_norm": 0.4240173399448395, "learning_rate": 0.001, "loss": 2.4782, "step": 19488 }, { "epoch": 1.6859903381642511, "grad_norm": 0.951404869556427, "learning_rate": 0.001, "loss": 2.4718, "step": 19544 }, { "epoch": 1.6908212560386473, "grad_norm": 1.0875020027160645, "learning_rate": 0.001, "loss": 2.4819, "step": 19600 }, { "epoch": 1.6956521739130435, "grad_norm": 0.904477059841156, "learning_rate": 0.001, "loss": 2.4951, "step": 19656 }, { "epoch": 1.7004830917874396, "grad_norm": 0.6886747479438782, "learning_rate": 0.001, "loss": 2.4934, "step": 19712 }, { "epoch": 1.7053140096618358, "grad_norm": 0.7169251441955566, "learning_rate": 0.001, "loss": 2.4836, "step": 19768 }, { "epoch": 1.710144927536232, "grad_norm": 0.47756174206733704, "learning_rate": 0.001, "loss": 2.4878, "step": 19824 }, { "epoch": 1.714975845410628, "grad_norm": 0.4913206100463867, "learning_rate": 0.001, "loss": 2.476, "step": 19880 }, { "epoch": 1.7198067632850242, "grad_norm": 1.1071933507919312, "learning_rate": 0.001, "loss": 2.4667, "step": 19936 }, { "epoch": 1.7246376811594204, "grad_norm": 0.5026872158050537, "learning_rate": 0.001, "loss": 2.4782, "step": 19992 }, { "epoch": 1.7294685990338166, "grad_norm": 0.6514714360237122, "learning_rate": 0.001, "loss": 2.4647, "step": 20048 }, { "epoch": 1.7342995169082127, "grad_norm": 0.5138649344444275, "learning_rate": 0.001, "loss": 2.467, "step": 20104 }, { "epoch": 1.7391304347826086, "grad_norm": 0.5914953947067261, "learning_rate": 0.001, "loss": 2.4651, "step": 20160 }, { "epoch": 1.7439613526570048, "grad_norm": 0.5537645816802979, "learning_rate": 0.001, "loss": 2.4685, "step": 20216 }, { "epoch": 1.748792270531401, "grad_norm": 1.2123079299926758, "learning_rate": 0.001, "loss": 2.462, "step": 20272 }, { "epoch": 1.7536231884057971, "grad_norm": 0.7601057291030884, "learning_rate": 0.001, "loss": 2.4921, "step": 20328 }, { "epoch": 1.7584541062801933, "grad_norm": 2.7119219303131104, "learning_rate": 0.001, "loss": 2.5011, "step": 20384 }, { "epoch": 1.7632850241545892, "grad_norm": 0.4809827208518982, "learning_rate": 0.001, "loss": 2.4822, "step": 20440 }, { "epoch": 1.7681159420289854, "grad_norm": 0.5912278294563293, "learning_rate": 0.001, "loss": 2.5019, "step": 20496 }, { "epoch": 1.7729468599033815, "grad_norm": 1.2461860179901123, "learning_rate": 0.001, "loss": 2.4881, "step": 20552 }, { "epoch": 1.7777777777777777, "grad_norm": 0.6390478014945984, "learning_rate": 0.001, "loss": 2.4783, "step": 20608 }, { "epoch": 1.7826086956521738, "grad_norm": 0.5740552544593811, "learning_rate": 0.001, "loss": 2.5142, "step": 20664 }, { "epoch": 1.78743961352657, "grad_norm": 1.578294277191162, "learning_rate": 0.001, "loss": 2.4988, "step": 20720 }, { "epoch": 1.7922705314009661, "grad_norm": 0.613778829574585, "learning_rate": 0.001, "loss": 2.4975, "step": 20776 }, { "epoch": 1.7971014492753623, "grad_norm": 0.6002821326255798, "learning_rate": 0.001, "loss": 2.4776, "step": 20832 }, { "epoch": 1.8019323671497585, "grad_norm": 0.7093578577041626, "learning_rate": 0.001, "loss": 2.4774, "step": 20888 }, { "epoch": 1.8067632850241546, "grad_norm": 0.5581182241439819, "learning_rate": 0.001, "loss": 2.4622, "step": 20944 }, { "epoch": 1.8115942028985508, "grad_norm": 0.7352020144462585, "learning_rate": 0.001, "loss": 2.4627, "step": 21000 }, { "epoch": 1.816425120772947, "grad_norm": 2.4271843433380127, "learning_rate": 0.001, "loss": 2.4801, "step": 21056 }, { "epoch": 1.821256038647343, "grad_norm": 0.5340695977210999, "learning_rate": 0.001, "loss": 2.4902, "step": 21112 }, { "epoch": 1.8260869565217392, "grad_norm": 0.46885180473327637, "learning_rate": 0.001, "loss": 2.4881, "step": 21168 }, { "epoch": 1.8309178743961354, "grad_norm": 1.0845648050308228, "learning_rate": 0.001, "loss": 2.4685, "step": 21224 }, { "epoch": 1.8357487922705316, "grad_norm": 1.0396238565444946, "learning_rate": 0.001, "loss": 2.465, "step": 21280 }, { "epoch": 1.8405797101449275, "grad_norm": 0.8950973153114319, "learning_rate": 0.001, "loss": 2.4743, "step": 21336 }, { "epoch": 1.8454106280193237, "grad_norm": 0.9389447569847107, "learning_rate": 0.001, "loss": 2.4888, "step": 21392 }, { "epoch": 1.8502415458937198, "grad_norm": 0.5466434955596924, "learning_rate": 0.001, "loss": 2.4757, "step": 21448 }, { "epoch": 1.855072463768116, "grad_norm": 2.5765113830566406, "learning_rate": 0.001, "loss": 2.4701, "step": 21504 }, { "epoch": 1.8599033816425121, "grad_norm": 0.6471238732337952, "learning_rate": 0.001, "loss": 2.4813, "step": 21560 }, { "epoch": 1.864734299516908, "grad_norm": 3.128875970840454, "learning_rate": 0.001, "loss": 2.4798, "step": 21616 }, { "epoch": 1.8695652173913042, "grad_norm": 1.321737289428711, "learning_rate": 0.001, "loss": 2.4852, "step": 21672 }, { "epoch": 1.8743961352657004, "grad_norm": 3.308558702468872, "learning_rate": 0.001, "loss": 2.4896, "step": 21728 }, { "epoch": 1.8792270531400965, "grad_norm": 0.8100047707557678, "learning_rate": 0.001, "loss": 2.4785, "step": 21784 }, { "epoch": 1.8840579710144927, "grad_norm": 2.5754692554473877, "learning_rate": 0.001, "loss": 2.4809, "step": 21840 }, { "epoch": 1.8888888888888888, "grad_norm": 5.025903224945068, "learning_rate": 0.001, "loss": 2.4905, "step": 21896 }, { "epoch": 1.893719806763285, "grad_norm": 0.5873627066612244, "learning_rate": 0.001, "loss": 2.48, "step": 21952 }, { "epoch": 1.8985507246376812, "grad_norm": 0.41663891077041626, "learning_rate": 0.001, "loss": 2.4703, "step": 22008 }, { "epoch": 1.9033816425120773, "grad_norm": 2.2055370807647705, "learning_rate": 0.001, "loss": 2.4637, "step": 22064 }, { "epoch": 1.9082125603864735, "grad_norm": 0.5622344017028809, "learning_rate": 0.001, "loss": 2.4748, "step": 22120 }, { "epoch": 1.9130434782608696, "grad_norm": 0.4542618691921234, "learning_rate": 0.001, "loss": 2.4666, "step": 22176 }, { "epoch": 1.9178743961352658, "grad_norm": 0.974029541015625, "learning_rate": 0.001, "loss": 2.4585, "step": 22232 }, { "epoch": 1.922705314009662, "grad_norm": 0.6217877268791199, "learning_rate": 0.001, "loss": 2.4649, "step": 22288 }, { "epoch": 1.927536231884058, "grad_norm": 0.5779778361320496, "learning_rate": 0.001, "loss": 2.4563, "step": 22344 }, { "epoch": 1.9323671497584543, "grad_norm": 0.48157015442848206, "learning_rate": 0.001, "loss": 2.4602, "step": 22400 }, { "epoch": 1.9371980676328504, "grad_norm": 0.6020475625991821, "learning_rate": 0.001, "loss": 2.4505, "step": 22456 }, { "epoch": 1.9420289855072463, "grad_norm": 0.534919261932373, "learning_rate": 0.001, "loss": 2.4587, "step": 22512 }, { "epoch": 1.9468599033816425, "grad_norm": 0.5620009899139404, "learning_rate": 0.001, "loss": 2.4592, "step": 22568 }, { "epoch": 1.9516908212560387, "grad_norm": 0.9667491912841797, "learning_rate": 0.001, "loss": 2.4745, "step": 22624 }, { "epoch": 1.9565217391304348, "grad_norm": 0.9978834390640259, "learning_rate": 0.001, "loss": 2.4855, "step": 22680 }, { "epoch": 1.961352657004831, "grad_norm": 0.6425617933273315, "learning_rate": 0.001, "loss": 2.4812, "step": 22736 }, { "epoch": 1.966183574879227, "grad_norm": 1.0051997900009155, "learning_rate": 0.001, "loss": 2.4851, "step": 22792 }, { "epoch": 1.971014492753623, "grad_norm": 0.9055406451225281, "learning_rate": 0.001, "loss": 2.4946, "step": 22848 }, { "epoch": 1.9758454106280192, "grad_norm": 0.9933205842971802, "learning_rate": 0.001, "loss": 2.5218, "step": 22904 }, { "epoch": 1.9806763285024154, "grad_norm": 4.04213809967041, "learning_rate": 0.001, "loss": 2.4898, "step": 22960 }, { "epoch": 1.9855072463768115, "grad_norm": 0.6505899429321289, "learning_rate": 0.001, "loss": 2.4845, "step": 23016 }, { "epoch": 1.9903381642512077, "grad_norm": 0.9572941660881042, "learning_rate": 0.001, "loss": 2.4716, "step": 23072 }, { "epoch": 1.9951690821256038, "grad_norm": 1.3944183588027954, "learning_rate": 0.001, "loss": 2.4616, "step": 23128 }, { "epoch": 2.0, "grad_norm": 1.0966757535934448, "learning_rate": 0.001, "loss": 2.4681, "step": 23184 }, { "epoch": 2.004830917874396, "grad_norm": 1.0105741024017334, "learning_rate": 0.001, "loss": 2.4401, "step": 23240 }, { "epoch": 2.0096618357487923, "grad_norm": 0.920160174369812, "learning_rate": 0.001, "loss": 2.4322, "step": 23296 }, { "epoch": 2.0144927536231885, "grad_norm": 0.7583364844322205, "learning_rate": 0.001, "loss": 2.4339, "step": 23352 }, { "epoch": 2.0193236714975846, "grad_norm": 0.4173266887664795, "learning_rate": 0.001, "loss": 2.4171, "step": 23408 }, { "epoch": 2.024154589371981, "grad_norm": 1.54490327835083, "learning_rate": 0.001, "loss": 2.4264, "step": 23464 }, { "epoch": 2.028985507246377, "grad_norm": 52.289432525634766, "learning_rate": 0.001, "loss": 2.4346, "step": 23520 }, { "epoch": 2.033816425120773, "grad_norm": 5.7289719581604, "learning_rate": 0.001, "loss": 2.4591, "step": 23576 }, { "epoch": 2.0386473429951693, "grad_norm": 1.1728147268295288, "learning_rate": 0.001, "loss": 2.4574, "step": 23632 }, { "epoch": 2.0434782608695654, "grad_norm": 0.7576716542243958, "learning_rate": 0.001, "loss": 2.4416, "step": 23688 }, { "epoch": 2.0483091787439616, "grad_norm": 1.6649419069290161, "learning_rate": 0.001, "loss": 2.4562, "step": 23744 }, { "epoch": 2.0531400966183573, "grad_norm": 1.3794751167297363, "learning_rate": 0.001, "loss": 2.4485, "step": 23800 }, { "epoch": 2.0579710144927534, "grad_norm": 0.5330275297164917, "learning_rate": 0.001, "loss": 2.4663, "step": 23856 }, { "epoch": 2.0628019323671496, "grad_norm": 0.6258566975593567, "learning_rate": 0.001, "loss": 2.4456, "step": 23912 }, { "epoch": 2.0676328502415457, "grad_norm": 0.7583393454551697, "learning_rate": 0.001, "loss": 2.4413, "step": 23968 }, { "epoch": 2.072463768115942, "grad_norm": 1.1774613857269287, "learning_rate": 0.001, "loss": 2.4271, "step": 24024 }, { "epoch": 2.077294685990338, "grad_norm": 0.7557084560394287, "learning_rate": 0.001, "loss": 2.4344, "step": 24080 }, { "epoch": 2.082125603864734, "grad_norm": 1.0778863430023193, "learning_rate": 0.001, "loss": 2.4356, "step": 24136 }, { "epoch": 2.0869565217391304, "grad_norm": 1.0254830121994019, "learning_rate": 0.001, "loss": 2.4305, "step": 24192 }, { "epoch": 2.0917874396135265, "grad_norm": 1.5008212327957153, "learning_rate": 0.001, "loss": 2.4283, "step": 24248 }, { "epoch": 2.0966183574879227, "grad_norm": 1.0089281797409058, "learning_rate": 0.001, "loss": 2.4314, "step": 24304 }, { "epoch": 2.101449275362319, "grad_norm": 1.9808462858200073, "learning_rate": 0.001, "loss": 2.4301, "step": 24360 }, { "epoch": 2.106280193236715, "grad_norm": 0.7853475213050842, "learning_rate": 0.001, "loss": 2.4551, "step": 24416 }, { "epoch": 2.111111111111111, "grad_norm": 2.9563910961151123, "learning_rate": 0.001, "loss": 2.4392, "step": 24472 }, { "epoch": 2.1159420289855073, "grad_norm": 5.112704277038574, "learning_rate": 0.001, "loss": 2.436, "step": 24528 }, { "epoch": 2.1207729468599035, "grad_norm": 2.355679512023926, "learning_rate": 0.001, "loss": 2.4323, "step": 24584 }, { "epoch": 2.1256038647342996, "grad_norm": 1.002687692642212, "learning_rate": 0.001, "loss": 2.4337, "step": 24640 }, { "epoch": 2.130434782608696, "grad_norm": 0.48910364508628845, "learning_rate": 0.001, "loss": 2.4386, "step": 24696 }, { "epoch": 2.135265700483092, "grad_norm": 0.6243553161621094, "learning_rate": 0.001, "loss": 2.4367, "step": 24752 }, { "epoch": 2.140096618357488, "grad_norm": 0.6357074975967407, "learning_rate": 0.001, "loss": 2.4165, "step": 24808 }, { "epoch": 2.1449275362318843, "grad_norm": 0.6496347784996033, "learning_rate": 0.001, "loss": 2.4182, "step": 24864 }, { "epoch": 2.14975845410628, "grad_norm": 1.040175437927246, "learning_rate": 0.001, "loss": 2.4306, "step": 24920 }, { "epoch": 2.154589371980676, "grad_norm": 0.988701343536377, "learning_rate": 0.001, "loss": 2.4526, "step": 24976 }, { "epoch": 2.1594202898550723, "grad_norm": 0.997657060623169, "learning_rate": 0.001, "loss": 2.4393, "step": 25032 }, { "epoch": 2.1642512077294684, "grad_norm": 0.5957661271095276, "learning_rate": 0.001, "loss": 2.4464, "step": 25088 }, { "epoch": 2.1690821256038646, "grad_norm": 4.283563137054443, "learning_rate": 0.001, "loss": 2.452, "step": 25144 }, { "epoch": 2.1739130434782608, "grad_norm": 0.6921948194503784, "learning_rate": 0.001, "loss": 2.4507, "step": 25200 }, { "epoch": 2.178743961352657, "grad_norm": 3.557818651199341, "learning_rate": 0.001, "loss": 2.4363, "step": 25256 }, { "epoch": 2.183574879227053, "grad_norm": 0.8437843918800354, "learning_rate": 0.001, "loss": 2.425, "step": 25312 }, { "epoch": 2.1884057971014492, "grad_norm": 0.5580781102180481, "learning_rate": 0.001, "loss": 2.4246, "step": 25368 }, { "epoch": 2.1932367149758454, "grad_norm": 0.7081442475318909, "learning_rate": 0.001, "loss": 2.4275, "step": 25424 }, { "epoch": 2.1980676328502415, "grad_norm": 0.7513576149940491, "learning_rate": 0.001, "loss": 2.4184, "step": 25480 }, { "epoch": 2.2028985507246377, "grad_norm": 0.8365774750709534, "learning_rate": 0.001, "loss": 2.4172, "step": 25536 }, { "epoch": 2.207729468599034, "grad_norm": 0.9274559020996094, "learning_rate": 0.001, "loss": 2.4368, "step": 25592 }, { "epoch": 2.21256038647343, "grad_norm": 3.1649367809295654, "learning_rate": 0.001, "loss": 2.4443, "step": 25648 }, { "epoch": 2.217391304347826, "grad_norm": 4.661854267120361, "learning_rate": 0.001, "loss": 2.4351, "step": 25704 }, { "epoch": 2.2222222222222223, "grad_norm": 0.6997871398925781, "learning_rate": 0.001, "loss": 2.418, "step": 25760 }, { "epoch": 2.2270531400966185, "grad_norm": 0.6769877076148987, "learning_rate": 0.001, "loss": 2.4135, "step": 25816 }, { "epoch": 2.2318840579710146, "grad_norm": 0.5421584248542786, "learning_rate": 0.001, "loss": 2.4134, "step": 25872 }, { "epoch": 2.236714975845411, "grad_norm": 5.050682544708252, "learning_rate": 0.001, "loss": 2.4485, "step": 25928 }, { "epoch": 2.241545893719807, "grad_norm": 2.9767906665802, "learning_rate": 0.001, "loss": 2.4348, "step": 25984 }, { "epoch": 2.246376811594203, "grad_norm": 0.7618628144264221, "learning_rate": 0.001, "loss": 2.4202, "step": 26040 }, { "epoch": 2.2512077294685993, "grad_norm": 1.7416142225265503, "learning_rate": 0.001, "loss": 2.4208, "step": 26096 }, { "epoch": 2.2560386473429954, "grad_norm": 1.0441137552261353, "learning_rate": 0.001, "loss": 2.4213, "step": 26152 }, { "epoch": 2.260869565217391, "grad_norm": 0.8699880242347717, "learning_rate": 0.001, "loss": 2.4057, "step": 26208 }, { "epoch": 2.2657004830917873, "grad_norm": 0.8893274068832397, "learning_rate": 0.001, "loss": 2.4261, "step": 26264 }, { "epoch": 2.2705314009661834, "grad_norm": 1.335638403892517, "learning_rate": 0.001, "loss": 2.4237, "step": 26320 }, { "epoch": 2.2753623188405796, "grad_norm": 0.8254622220993042, "learning_rate": 0.001, "loss": 2.4434, "step": 26376 }, { "epoch": 2.2801932367149758, "grad_norm": 0.9731737375259399, "learning_rate": 0.001, "loss": 2.4238, "step": 26432 }, { "epoch": 2.285024154589372, "grad_norm": 0.9786226153373718, "learning_rate": 0.001, "loss": 2.4306, "step": 26488 }, { "epoch": 2.289855072463768, "grad_norm": 0.9375965595245361, "learning_rate": 0.001, "loss": 2.4184, "step": 26544 }, { "epoch": 2.2946859903381642, "grad_norm": 0.7358525395393372, "learning_rate": 0.001, "loss": 2.427, "step": 26600 }, { "epoch": 2.2995169082125604, "grad_norm": 1.1536989212036133, "learning_rate": 0.001, "loss": 2.4207, "step": 26656 }, { "epoch": 2.3043478260869565, "grad_norm": 1.206375002861023, "learning_rate": 0.001, "loss": 2.4209, "step": 26712 }, { "epoch": 2.3091787439613527, "grad_norm": 2.6680002212524414, "learning_rate": 0.001, "loss": 2.4268, "step": 26768 }, { "epoch": 2.314009661835749, "grad_norm": 2.0401179790496826, "learning_rate": 0.001, "loss": 2.4262, "step": 26824 }, { "epoch": 2.318840579710145, "grad_norm": 1.0442848205566406, "learning_rate": 0.001, "loss": 2.4438, "step": 26880 }, { "epoch": 2.323671497584541, "grad_norm": 1.3826639652252197, "learning_rate": 0.001, "loss": 2.443, "step": 26936 }, { "epoch": 2.3285024154589373, "grad_norm": 1.0294597148895264, "learning_rate": 0.001, "loss": 2.4465, "step": 26992 }, { "epoch": 2.3333333333333335, "grad_norm": 0.6414815187454224, "learning_rate": 0.001, "loss": 2.4212, "step": 27048 }, { "epoch": 2.3381642512077296, "grad_norm": 0.5294144153594971, "learning_rate": 0.001, "loss": 2.4202, "step": 27104 }, { "epoch": 2.342995169082126, "grad_norm": 0.589288055896759, "learning_rate": 0.001, "loss": 2.4226, "step": 27160 }, { "epoch": 2.3478260869565215, "grad_norm": 0.5152631998062134, "learning_rate": 0.001, "loss": 2.4139, "step": 27216 }, { "epoch": 2.3526570048309177, "grad_norm": 0.8567907810211182, "learning_rate": 0.001, "loss": 2.4279, "step": 27272 }, { "epoch": 2.357487922705314, "grad_norm": 1.1003116369247437, "learning_rate": 0.001, "loss": 2.426, "step": 27328 }, { "epoch": 2.36231884057971, "grad_norm": 0.5347234606742859, "learning_rate": 0.001, "loss": 2.4184, "step": 27384 }, { "epoch": 2.367149758454106, "grad_norm": 2.898610830307007, "learning_rate": 0.001, "loss": 2.4114, "step": 27440 }, { "epoch": 2.3719806763285023, "grad_norm": 0.6177157759666443, "learning_rate": 0.001, "loss": 2.4163, "step": 27496 }, { "epoch": 2.3768115942028984, "grad_norm": 1.2459274530410767, "learning_rate": 0.001, "loss": 2.4205, "step": 27552 }, { "epoch": 2.3816425120772946, "grad_norm": 0.676740288734436, "learning_rate": 0.001, "loss": 2.4079, "step": 27608 }, { "epoch": 2.3864734299516908, "grad_norm": 4.750284194946289, "learning_rate": 0.001, "loss": 2.4174, "step": 27664 }, { "epoch": 2.391304347826087, "grad_norm": 0.7142317891120911, "learning_rate": 0.001, "loss": 2.4184, "step": 27720 }, { "epoch": 2.396135265700483, "grad_norm": 0.7198566794395447, "learning_rate": 0.001, "loss": 2.4104, "step": 27776 }, { "epoch": 2.4009661835748792, "grad_norm": 1.5782791376113892, "learning_rate": 0.001, "loss": 2.4156, "step": 27832 }, { "epoch": 2.4057971014492754, "grad_norm": 0.6034999489784241, "learning_rate": 0.001, "loss": 2.4137, "step": 27888 }, { "epoch": 2.4106280193236715, "grad_norm": 0.803419828414917, "learning_rate": 0.001, "loss": 2.4355, "step": 27944 }, { "epoch": 2.4154589371980677, "grad_norm": 0.4411523938179016, "learning_rate": 0.001, "loss": 2.4033, "step": 28000 }, { "epoch": 2.420289855072464, "grad_norm": 0.7060615420341492, "learning_rate": 0.001, "loss": 2.4017, "step": 28056 }, { "epoch": 2.42512077294686, "grad_norm": 1.1304291486740112, "learning_rate": 0.001, "loss": 2.4224, "step": 28112 }, { "epoch": 2.429951690821256, "grad_norm": 1.5663508176803589, "learning_rate": 0.001, "loss": 2.4447, "step": 28168 }, { "epoch": 2.4347826086956523, "grad_norm": 0.8626444339752197, "learning_rate": 0.001, "loss": 2.4373, "step": 28224 }, { "epoch": 2.4396135265700485, "grad_norm": 1.1484856605529785, "learning_rate": 0.001, "loss": 2.4197, "step": 28280 }, { "epoch": 2.4444444444444446, "grad_norm": 4.306599140167236, "learning_rate": 0.001, "loss": 2.4194, "step": 28336 }, { "epoch": 2.449275362318841, "grad_norm": 1.4168084859848022, "learning_rate": 0.001, "loss": 2.4192, "step": 28392 }, { "epoch": 2.454106280193237, "grad_norm": 0.7608858942985535, "learning_rate": 0.001, "loss": 2.4183, "step": 28448 }, { "epoch": 2.4589371980676327, "grad_norm": 0.8891749382019043, "learning_rate": 0.001, "loss": 2.4234, "step": 28504 }, { "epoch": 2.463768115942029, "grad_norm": 1.1063004732131958, "learning_rate": 0.001, "loss": 2.4095, "step": 28560 }, { "epoch": 2.468599033816425, "grad_norm": 1.788999319076538, "learning_rate": 0.001, "loss": 2.4042, "step": 28616 }, { "epoch": 2.473429951690821, "grad_norm": 10.093421936035156, "learning_rate": 0.001, "loss": 2.4096, "step": 28672 }, { "epoch": 2.4782608695652173, "grad_norm": 0.8079579472541809, "learning_rate": 0.001, "loss": 2.4206, "step": 28728 }, { "epoch": 2.4830917874396135, "grad_norm": 1.0279505252838135, "learning_rate": 0.001, "loss": 2.4228, "step": 28784 }, { "epoch": 2.4879227053140096, "grad_norm": 0.5833010077476501, "learning_rate": 0.001, "loss": 2.4188, "step": 28840 }, { "epoch": 2.4927536231884058, "grad_norm": 1.3775608539581299, "learning_rate": 0.001, "loss": 2.4335, "step": 28896 }, { "epoch": 2.497584541062802, "grad_norm": 1.3201850652694702, "learning_rate": 0.001, "loss": 2.4225, "step": 28952 }, { "epoch": 2.502415458937198, "grad_norm": 0.9545646905899048, "learning_rate": 0.001, "loss": 2.4268, "step": 29008 }, { "epoch": 2.5072463768115942, "grad_norm": 1.4709309339523315, "learning_rate": 0.001, "loss": 2.421, "step": 29064 }, { "epoch": 2.5120772946859904, "grad_norm": 1.4570496082305908, "learning_rate": 0.001, "loss": 2.4136, "step": 29120 }, { "epoch": 2.5169082125603865, "grad_norm": 0.827422022819519, "learning_rate": 0.001, "loss": 2.4117, "step": 29176 }, { "epoch": 2.5217391304347827, "grad_norm": 4.7756829261779785, "learning_rate": 0.001, "loss": 2.4091, "step": 29232 }, { "epoch": 2.526570048309179, "grad_norm": 0.78351891040802, "learning_rate": 0.001, "loss": 2.4321, "step": 29288 }, { "epoch": 2.531400966183575, "grad_norm": 1.6681420803070068, "learning_rate": 0.001, "loss": 2.4213, "step": 29344 }, { "epoch": 2.536231884057971, "grad_norm": 0.878593921661377, "learning_rate": 0.001, "loss": 2.4263, "step": 29400 }, { "epoch": 2.541062801932367, "grad_norm": 1.1612260341644287, "learning_rate": 0.001, "loss": 2.4086, "step": 29456 }, { "epoch": 2.545893719806763, "grad_norm": 0.6242434978485107, "learning_rate": 0.001, "loss": 2.4028, "step": 29512 }, { "epoch": 2.550724637681159, "grad_norm": 0.6720430254936218, "learning_rate": 0.001, "loss": 2.4017, "step": 29568 }, { "epoch": 2.5555555555555554, "grad_norm": 1.0026371479034424, "learning_rate": 0.001, "loss": 2.39, "step": 29624 }, { "epoch": 2.5603864734299515, "grad_norm": 1.4545741081237793, "learning_rate": 0.001, "loss": 2.3999, "step": 29680 }, { "epoch": 2.5652173913043477, "grad_norm": 1.666398286819458, "learning_rate": 0.001, "loss": 2.4321, "step": 29736 }, { "epoch": 2.570048309178744, "grad_norm": 1.4893743991851807, "learning_rate": 0.001, "loss": 2.4469, "step": 29792 }, { "epoch": 2.57487922705314, "grad_norm": 1.9381307363510132, "learning_rate": 0.001, "loss": 2.4638, "step": 29848 }, { "epoch": 2.579710144927536, "grad_norm": 3.441112518310547, "learning_rate": 0.001, "loss": 2.4324, "step": 29904 }, { "epoch": 2.5845410628019323, "grad_norm": 0.7186459302902222, "learning_rate": 0.001, "loss": 2.4248, "step": 29960 }, { "epoch": 2.5893719806763285, "grad_norm": 2.555569648742676, "learning_rate": 0.001, "loss": 2.4287, "step": 30016 }, { "epoch": 2.5942028985507246, "grad_norm": 3.84259033203125, "learning_rate": 0.001, "loss": 2.4291, "step": 30072 }, { "epoch": 2.5990338164251208, "grad_norm": 1.7482112646102905, "learning_rate": 0.001, "loss": 2.4289, "step": 30128 }, { "epoch": 2.603864734299517, "grad_norm": 0.4381943643093109, "learning_rate": 0.001, "loss": 2.4185, "step": 30184 }, { "epoch": 2.608695652173913, "grad_norm": 1.1325384378433228, "learning_rate": 0.001, "loss": 2.4051, "step": 30240 }, { "epoch": 2.6135265700483092, "grad_norm": 1.4725890159606934, "learning_rate": 0.001, "loss": 2.4256, "step": 30296 }, { "epoch": 2.6183574879227054, "grad_norm": 0.6949960589408875, "learning_rate": 0.001, "loss": 2.424, "step": 30352 }, { "epoch": 2.6231884057971016, "grad_norm": 1.008844017982483, "learning_rate": 0.001, "loss": 2.4252, "step": 30408 }, { "epoch": 2.6280193236714977, "grad_norm": 0.7304251790046692, "learning_rate": 0.001, "loss": 2.4212, "step": 30464 }, { "epoch": 2.632850241545894, "grad_norm": 1.120819330215454, "learning_rate": 0.001, "loss": 2.4291, "step": 30520 }, { "epoch": 2.63768115942029, "grad_norm": 1.4787769317626953, "learning_rate": 0.001, "loss": 2.4368, "step": 30576 }, { "epoch": 2.642512077294686, "grad_norm": 2.0001964569091797, "learning_rate": 0.001, "loss": 2.4326, "step": 30632 }, { "epoch": 2.6473429951690823, "grad_norm": 0.625577449798584, "learning_rate": 0.001, "loss": 2.4416, "step": 30688 }, { "epoch": 2.6521739130434785, "grad_norm": 0.8403634428977966, "learning_rate": 0.001, "loss": 2.4254, "step": 30744 }, { "epoch": 2.6570048309178746, "grad_norm": 0.8532978296279907, "learning_rate": 0.001, "loss": 2.4213, "step": 30800 }, { "epoch": 2.661835748792271, "grad_norm": 1.9018747806549072, "learning_rate": 0.001, "loss": 2.4101, "step": 30856 }, { "epoch": 2.6666666666666665, "grad_norm": 0.7936710715293884, "learning_rate": 0.001, "loss": 2.417, "step": 30912 }, { "epoch": 2.6714975845410627, "grad_norm": 1.145591139793396, "learning_rate": 0.001, "loss": 2.4236, "step": 30968 }, { "epoch": 2.676328502415459, "grad_norm": 1.7701199054718018, "learning_rate": 0.001, "loss": 2.426, "step": 31024 }, { "epoch": 2.681159420289855, "grad_norm": 2.0457608699798584, "learning_rate": 0.001, "loss": 2.4161, "step": 31080 }, { "epoch": 2.685990338164251, "grad_norm": 2.239499807357788, "learning_rate": 0.001, "loss": 2.4072, "step": 31136 }, { "epoch": 2.6908212560386473, "grad_norm": 1.0082460641860962, "learning_rate": 0.001, "loss": 2.418, "step": 31192 }, { "epoch": 2.6956521739130435, "grad_norm": 1.1277071237564087, "learning_rate": 0.001, "loss": 2.4139, "step": 31248 }, { "epoch": 2.7004830917874396, "grad_norm": 1.4032684564590454, "learning_rate": 0.001, "loss": 2.4174, "step": 31304 }, { "epoch": 2.7053140096618358, "grad_norm": 0.9336313605308533, "learning_rate": 0.001, "loss": 2.4108, "step": 31360 }, { "epoch": 2.710144927536232, "grad_norm": 1.1663310527801514, "learning_rate": 0.001, "loss": 2.4309, "step": 31416 }, { "epoch": 2.714975845410628, "grad_norm": 0.6759818196296692, "learning_rate": 0.001, "loss": 2.4193, "step": 31472 }, { "epoch": 2.7198067632850242, "grad_norm": 0.6269552111625671, "learning_rate": 0.001, "loss": 2.4199, "step": 31528 }, { "epoch": 2.7246376811594204, "grad_norm": 0.4199528098106384, "learning_rate": 0.001, "loss": 2.402, "step": 31584 }, { "epoch": 2.7294685990338166, "grad_norm": 0.5285377502441406, "learning_rate": 0.001, "loss": 2.4001, "step": 31640 }, { "epoch": 2.7342995169082127, "grad_norm": 9.366227149963379, "learning_rate": 0.001, "loss": 2.3955, "step": 31696 }, { "epoch": 2.7391304347826084, "grad_norm": 1.677487850189209, "learning_rate": 0.001, "loss": 2.4229, "step": 31752 }, { "epoch": 2.7439613526570046, "grad_norm": 1.4587961435317993, "learning_rate": 0.001, "loss": 2.4199, "step": 31808 }, { "epoch": 2.7487922705314007, "grad_norm": 2.9107236862182617, "learning_rate": 0.001, "loss": 2.4217, "step": 31864 }, { "epoch": 2.753623188405797, "grad_norm": 0.9053725600242615, "learning_rate": 0.001, "loss": 2.4047, "step": 31920 }, { "epoch": 2.758454106280193, "grad_norm": 2.644174098968506, "learning_rate": 0.001, "loss": 2.4136, "step": 31976 }, { "epoch": 2.763285024154589, "grad_norm": 6.017156600952148, "learning_rate": 0.001, "loss": 2.4273, "step": 32032 }, { "epoch": 2.7681159420289854, "grad_norm": 1.0742233991622925, "learning_rate": 0.001, "loss": 2.4398, "step": 32088 }, { "epoch": 2.7729468599033815, "grad_norm": 1.4463682174682617, "learning_rate": 0.001, "loss": 2.4396, "step": 32144 }, { "epoch": 2.7777777777777777, "grad_norm": 2.0621907711029053, "learning_rate": 0.001, "loss": 2.4205, "step": 32200 }, { "epoch": 2.782608695652174, "grad_norm": 1.5583444833755493, "learning_rate": 0.001, "loss": 2.4205, "step": 32256 }, { "epoch": 2.78743961352657, "grad_norm": 1.4091781377792358, "learning_rate": 0.001, "loss": 2.4293, "step": 32312 }, { "epoch": 2.792270531400966, "grad_norm": 2.341855049133301, "learning_rate": 0.001, "loss": 2.4186, "step": 32368 }, { "epoch": 2.7971014492753623, "grad_norm": 2.265354633331299, "learning_rate": 0.001, "loss": 2.4147, "step": 32424 }, { "epoch": 2.8019323671497585, "grad_norm": 0.8014535903930664, "learning_rate": 0.001, "loss": 2.4212, "step": 32480 }, { "epoch": 2.8067632850241546, "grad_norm": 1.4704686403274536, "learning_rate": 0.001, "loss": 2.408, "step": 32536 }, { "epoch": 2.8115942028985508, "grad_norm": 1.1337703466415405, "learning_rate": 0.001, "loss": 2.4283, "step": 32592 }, { "epoch": 2.816425120772947, "grad_norm": 1.2696563005447388, "learning_rate": 0.001, "loss": 2.4296, "step": 32648 }, { "epoch": 2.821256038647343, "grad_norm": 2.815958261489868, "learning_rate": 0.001, "loss": 2.4238, "step": 32704 }, { "epoch": 2.8260869565217392, "grad_norm": 1.2532445192337036, "learning_rate": 0.001, "loss": 2.4316, "step": 32760 }, { "epoch": 2.8309178743961354, "grad_norm": 1.9834295511245728, "learning_rate": 0.001, "loss": 2.4249, "step": 32816 }, { "epoch": 2.8357487922705316, "grad_norm": 1.0654997825622559, "learning_rate": 0.001, "loss": 2.4156, "step": 32872 }, { "epoch": 2.8405797101449277, "grad_norm": 2.375105619430542, "learning_rate": 0.001, "loss": 2.4218, "step": 32928 }, { "epoch": 2.845410628019324, "grad_norm": 0.7590191960334778, "learning_rate": 0.001, "loss": 2.4053, "step": 32984 }, { "epoch": 2.85024154589372, "grad_norm": 0.9580590128898621, "learning_rate": 0.001, "loss": 2.4126, "step": 33040 }, { "epoch": 2.855072463768116, "grad_norm": 1.3444570302963257, "learning_rate": 0.001, "loss": 2.4201, "step": 33096 }, { "epoch": 2.8599033816425123, "grad_norm": 1.0784677267074585, "learning_rate": 0.001, "loss": 2.4255, "step": 33152 }, { "epoch": 2.864734299516908, "grad_norm": 1.7011573314666748, "learning_rate": 0.001, "loss": 2.4174, "step": 33208 }, { "epoch": 2.869565217391304, "grad_norm": 0.7227048873901367, "learning_rate": 0.001, "loss": 2.4227, "step": 33264 }, { "epoch": 2.8743961352657004, "grad_norm": 1.003725290298462, "learning_rate": 0.001, "loss": 2.4237, "step": 33320 }, { "epoch": 2.8792270531400965, "grad_norm": 3.9613237380981445, "learning_rate": 0.001, "loss": 2.4162, "step": 33376 }, { "epoch": 2.8840579710144927, "grad_norm": 2.6184682846069336, "learning_rate": 0.001, "loss": 2.4105, "step": 33432 }, { "epoch": 2.888888888888889, "grad_norm": 1.3031649589538574, "learning_rate": 0.001, "loss": 2.4192, "step": 33488 }, { "epoch": 2.893719806763285, "grad_norm": 9.451916694641113, "learning_rate": 0.001, "loss": 2.4395, "step": 33544 }, { "epoch": 2.898550724637681, "grad_norm": 1.3001573085784912, "learning_rate": 0.001, "loss": 2.4331, "step": 33600 }, { "epoch": 2.9033816425120773, "grad_norm": 0.5848278999328613, "learning_rate": 0.001, "loss": 2.4337, "step": 33656 }, { "epoch": 2.9082125603864735, "grad_norm": 0.8421841263771057, "learning_rate": 0.001, "loss": 2.436, "step": 33712 }, { "epoch": 2.9130434782608696, "grad_norm": 1.9961779117584229, "learning_rate": 0.001, "loss": 2.4331, "step": 33768 }, { "epoch": 2.917874396135266, "grad_norm": 2.2637486457824707, "learning_rate": 0.001, "loss": 2.4234, "step": 33824 }, { "epoch": 2.922705314009662, "grad_norm": 0.6603698134422302, "learning_rate": 0.001, "loss": 2.4233, "step": 33880 }, { "epoch": 2.927536231884058, "grad_norm": 1.0390866994857788, "learning_rate": 0.001, "loss": 2.4183, "step": 33936 }, { "epoch": 2.9323671497584543, "grad_norm": 1.7142407894134521, "learning_rate": 0.001, "loss": 2.435, "step": 33992 }, { "epoch": 2.9371980676328504, "grad_norm": 0.8366157412528992, "learning_rate": 0.001, "loss": 2.4312, "step": 34048 }, { "epoch": 2.942028985507246, "grad_norm": 2.7395501136779785, "learning_rate": 0.001, "loss": 2.421, "step": 34104 }, { "epoch": 2.9468599033816423, "grad_norm": 6.661783218383789, "learning_rate": 0.001, "loss": 2.4213, "step": 34160 }, { "epoch": 2.9516908212560384, "grad_norm": 1.2035876512527466, "learning_rate": 0.001, "loss": 2.4559, "step": 34216 }, { "epoch": 2.9565217391304346, "grad_norm": 14.095723152160645, "learning_rate": 0.001, "loss": 2.4303, "step": 34272 }, { "epoch": 2.9613526570048307, "grad_norm": 1.9609922170639038, "learning_rate": 0.001, "loss": 2.4221, "step": 34328 }, { "epoch": 2.966183574879227, "grad_norm": 2.0201287269592285, "learning_rate": 0.001, "loss": 2.4337, "step": 34384 }, { "epoch": 2.971014492753623, "grad_norm": 0.8179659247398376, "learning_rate": 0.001, "loss": 2.4274, "step": 34440 }, { "epoch": 2.975845410628019, "grad_norm": 0.998927891254425, "learning_rate": 0.001, "loss": 2.4182, "step": 34496 }, { "epoch": 2.9806763285024154, "grad_norm": 1.5369782447814941, "learning_rate": 0.001, "loss": 2.404, "step": 34552 }, { "epoch": 2.9855072463768115, "grad_norm": 0.8678004145622253, "learning_rate": 0.001, "loss": 2.4057, "step": 34608 }, { "epoch": 2.9903381642512077, "grad_norm": 5.1712260246276855, "learning_rate": 0.001, "loss": 2.4016, "step": 34664 }, { "epoch": 2.995169082125604, "grad_norm": 3.1465790271759033, "learning_rate": 0.001, "loss": 2.4078, "step": 34720 }, { "epoch": 3.0, "grad_norm": 1.173697829246521, "learning_rate": 0.001, "loss": 2.4113, "step": 34776 }, { "epoch": 3.004830917874396, "grad_norm": 0.6932175159454346, "learning_rate": 0.001, "loss": 2.3669, "step": 34832 }, { "epoch": 3.0096618357487923, "grad_norm": 0.6690827012062073, "learning_rate": 0.001, "loss": 2.3564, "step": 34888 }, { "epoch": 3.0144927536231885, "grad_norm": 1.135644793510437, "learning_rate": 0.001, "loss": 2.3536, "step": 34944 }, { "epoch": 3.0193236714975846, "grad_norm": 0.9978145956993103, "learning_rate": 0.001, "loss": 2.3597, "step": 35000 }, { "epoch": 3.024154589371981, "grad_norm": 1.3222076892852783, "learning_rate": 0.001, "loss": 2.3672, "step": 35056 }, { "epoch": 3.028985507246377, "grad_norm": 1.1678580045700073, "learning_rate": 0.001, "loss": 2.3754, "step": 35112 }, { "epoch": 3.033816425120773, "grad_norm": 1.3769093751907349, "learning_rate": 0.001, "loss": 2.3743, "step": 35168 }, { "epoch": 3.0386473429951693, "grad_norm": 5.447660446166992, "learning_rate": 0.001, "loss": 2.391, "step": 35224 }, { "epoch": 3.0434782608695654, "grad_norm": 0.9091969728469849, "learning_rate": 0.001, "loss": 2.3728, "step": 35280 }, { "epoch": 3.0483091787439616, "grad_norm": 2.0814619064331055, "learning_rate": 0.001, "loss": 2.3758, "step": 35336 }, { "epoch": 3.0531400966183573, "grad_norm": 5.833859443664551, "learning_rate": 0.001, "loss": 2.3669, "step": 35392 }, { "epoch": 3.0579710144927534, "grad_norm": 1.9732599258422852, "learning_rate": 0.001, "loss": 2.3788, "step": 35448 }, { "epoch": 3.0628019323671496, "grad_norm": 2.450575828552246, "learning_rate": 0.001, "loss": 2.384, "step": 35504 }, { "epoch": 3.0676328502415457, "grad_norm": 25.521682739257812, "learning_rate": 0.001, "loss": 2.376, "step": 35560 }, { "epoch": 3.072463768115942, "grad_norm": 1.3444690704345703, "learning_rate": 0.001, "loss": 2.3685, "step": 35616 }, { "epoch": 3.077294685990338, "grad_norm": 1.0060046911239624, "learning_rate": 0.001, "loss": 2.3669, "step": 35672 }, { "epoch": 3.082125603864734, "grad_norm": 0.6015172600746155, "learning_rate": 0.001, "loss": 2.3713, "step": 35728 }, { "epoch": 3.0869565217391304, "grad_norm": 0.646270215511322, "learning_rate": 0.001, "loss": 2.3624, "step": 35784 }, { "epoch": 3.0917874396135265, "grad_norm": 0.7399847507476807, "learning_rate": 0.001, "loss": 2.3608, "step": 35840 }, { "epoch": 3.0966183574879227, "grad_norm": 2.9859659671783447, "learning_rate": 0.001, "loss": 2.3609, "step": 35896 }, { "epoch": 3.101449275362319, "grad_norm": 1.0483369827270508, "learning_rate": 0.001, "loss": 2.3751, "step": 35952 }, { "epoch": 3.106280193236715, "grad_norm": 2.3558247089385986, "learning_rate": 0.001, "loss": 2.3808, "step": 36008 }, { "epoch": 3.111111111111111, "grad_norm": 0.7647687792778015, "learning_rate": 0.001, "loss": 2.3737, "step": 36064 }, { "epoch": 3.1159420289855073, "grad_norm": 1.0547163486480713, "learning_rate": 0.001, "loss": 2.3622, "step": 36120 }, { "epoch": 3.1207729468599035, "grad_norm": 0.7350274920463562, "learning_rate": 0.001, "loss": 2.3519, "step": 36176 }, { "epoch": 3.1256038647342996, "grad_norm": 1.322343349456787, "learning_rate": 0.001, "loss": 2.3671, "step": 36232 }, { "epoch": 3.130434782608696, "grad_norm": 1.419541835784912, "learning_rate": 0.001, "loss": 2.3643, "step": 36288 }, { "epoch": 3.135265700483092, "grad_norm": 0.640250563621521, "learning_rate": 0.001, "loss": 2.356, "step": 36344 }, { "epoch": 3.140096618357488, "grad_norm": 1.2903010845184326, "learning_rate": 0.001, "loss": 2.3529, "step": 36400 }, { "epoch": 3.1449275362318843, "grad_norm": 0.8562397956848145, "learning_rate": 0.001, "loss": 2.3652, "step": 36456 }, { "epoch": 3.14975845410628, "grad_norm": 1.3351072072982788, "learning_rate": 0.001, "loss": 2.3707, "step": 36512 }, { "epoch": 3.154589371980676, "grad_norm": 1.0129743814468384, "learning_rate": 0.001, "loss": 2.3673, "step": 36568 }, { "epoch": 3.1594202898550723, "grad_norm": 1.540778636932373, "learning_rate": 0.001, "loss": 2.3822, "step": 36624 }, { "epoch": 3.1642512077294684, "grad_norm": 1.474141240119934, "learning_rate": 0.001, "loss": 2.394, "step": 36680 }, { "epoch": 3.1690821256038646, "grad_norm": 1.1241166591644287, "learning_rate": 0.001, "loss": 2.3829, "step": 36736 }, { "epoch": 3.1739130434782608, "grad_norm": 1.6276636123657227, "learning_rate": 0.001, "loss": 2.4077, "step": 36792 }, { "epoch": 3.178743961352657, "grad_norm": 3.3927533626556396, "learning_rate": 0.001, "loss": 2.3993, "step": 36848 }, { "epoch": 3.183574879227053, "grad_norm": 2.856873035430908, "learning_rate": 0.001, "loss": 2.3929, "step": 36904 }, { "epoch": 3.1884057971014492, "grad_norm": 0.8577069044113159, "learning_rate": 0.001, "loss": 2.3923, "step": 36960 }, { "epoch": 3.1932367149758454, "grad_norm": 1.7790100574493408, "learning_rate": 0.001, "loss": 2.3837, "step": 37016 }, { "epoch": 3.1980676328502415, "grad_norm": 1.3106088638305664, "learning_rate": 0.001, "loss": 2.3818, "step": 37072 }, { "epoch": 3.2028985507246377, "grad_norm": 3.6986899375915527, "learning_rate": 0.001, "loss": 2.3823, "step": 37128 }, { "epoch": 3.207729468599034, "grad_norm": 0.8908578157424927, "learning_rate": 0.001, "loss": 2.382, "step": 37184 }, { "epoch": 3.21256038647343, "grad_norm": 1.2004361152648926, "learning_rate": 0.001, "loss": 2.3904, "step": 37240 }, { "epoch": 3.217391304347826, "grad_norm": 1.119553565979004, "learning_rate": 0.001, "loss": 2.3771, "step": 37296 }, { "epoch": 3.2222222222222223, "grad_norm": 1.9667686223983765, "learning_rate": 0.001, "loss": 2.39, "step": 37352 }, { "epoch": 3.2270531400966185, "grad_norm": 6.152589797973633, "learning_rate": 0.001, "loss": 2.3972, "step": 37408 }, { "epoch": 3.2318840579710146, "grad_norm": 1.4891480207443237, "learning_rate": 0.001, "loss": 2.3827, "step": 37464 }, { "epoch": 3.236714975845411, "grad_norm": 0.7430106997489929, "learning_rate": 0.001, "loss": 2.3867, "step": 37520 }, { "epoch": 3.241545893719807, "grad_norm": 3.5744717121124268, "learning_rate": 0.001, "loss": 2.3889, "step": 37576 }, { "epoch": 3.246376811594203, "grad_norm": 0.8001721501350403, "learning_rate": 0.001, "loss": 2.3608, "step": 37632 }, { "epoch": 3.2512077294685993, "grad_norm": 5.070853233337402, "learning_rate": 0.001, "loss": 2.3563, "step": 37688 }, { "epoch": 3.2560386473429954, "grad_norm": 0.9352041482925415, "learning_rate": 0.001, "loss": 2.3497, "step": 37744 }, { "epoch": 3.260869565217391, "grad_norm": 0.6637979745864868, "learning_rate": 0.001, "loss": 2.3715, "step": 37800 }, { "epoch": 3.2657004830917873, "grad_norm": 1.2793562412261963, "learning_rate": 0.001, "loss": 2.3667, "step": 37856 }, { "epoch": 3.2705314009661834, "grad_norm": 2.063894271850586, "learning_rate": 0.001, "loss": 2.3751, "step": 37912 }, { "epoch": 3.2753623188405796, "grad_norm": 1.5859637260437012, "learning_rate": 0.001, "loss": 2.3676, "step": 37968 }, { "epoch": 3.2801932367149758, "grad_norm": 1.7910484075546265, "learning_rate": 0.001, "loss": 2.3684, "step": 38024 }, { "epoch": 3.285024154589372, "grad_norm": 0.799378514289856, "learning_rate": 0.001, "loss": 2.3761, "step": 38080 }, { "epoch": 3.289855072463768, "grad_norm": 4.270195484161377, "learning_rate": 0.001, "loss": 2.3861, "step": 38136 }, { "epoch": 3.2946859903381642, "grad_norm": 1.1123530864715576, "learning_rate": 0.001, "loss": 2.377, "step": 38192 }, { "epoch": 3.2995169082125604, "grad_norm": 12.888733863830566, "learning_rate": 0.001, "loss": 2.3744, "step": 38248 }, { "epoch": 3.3043478260869565, "grad_norm": 1.509628176689148, "learning_rate": 0.001, "loss": 2.3722, "step": 38304 }, { "epoch": 3.3091787439613527, "grad_norm": 3.0714049339294434, "learning_rate": 0.001, "loss": 2.3778, "step": 38360 }, { "epoch": 3.314009661835749, "grad_norm": 1.1967629194259644, "learning_rate": 0.001, "loss": 2.3735, "step": 38416 }, { "epoch": 3.318840579710145, "grad_norm": 19.75279998779297, "learning_rate": 0.001, "loss": 2.3656, "step": 38472 }, { "epoch": 3.323671497584541, "grad_norm": 1.0594563484191895, "learning_rate": 0.001, "loss": 2.3639, "step": 38528 }, { "epoch": 3.3285024154589373, "grad_norm": 3.912726879119873, "learning_rate": 0.001, "loss": 2.3538, "step": 38584 }, { "epoch": 3.3333333333333335, "grad_norm": 1.6983665227890015, "learning_rate": 0.001, "loss": 2.3834, "step": 38640 }, { "epoch": 3.3381642512077296, "grad_norm": 1.1165878772735596, "learning_rate": 0.001, "loss": 2.3839, "step": 38696 }, { "epoch": 3.342995169082126, "grad_norm": 4.868439674377441, "learning_rate": 0.001, "loss": 2.3871, "step": 38752 }, { "epoch": 3.3478260869565215, "grad_norm": 0.9852940440177917, "learning_rate": 0.001, "loss": 2.3875, "step": 38808 }, { "epoch": 3.3526570048309177, "grad_norm": 1.9336177110671997, "learning_rate": 0.001, "loss": 2.3655, "step": 38864 }, { "epoch": 3.357487922705314, "grad_norm": 2.272597074508667, "learning_rate": 0.001, "loss": 2.3883, "step": 38920 }, { "epoch": 3.36231884057971, "grad_norm": 0.9089294075965881, "learning_rate": 0.001, "loss": 2.3731, "step": 38976 }, { "epoch": 3.367149758454106, "grad_norm": 1.0074656009674072, "learning_rate": 0.001, "loss": 2.3778, "step": 39032 }, { "epoch": 3.3719806763285023, "grad_norm": 0.8165333271026611, "learning_rate": 0.001, "loss": 2.3656, "step": 39088 }, { "epoch": 3.3768115942028984, "grad_norm": 2.1966381072998047, "learning_rate": 0.001, "loss": 2.37, "step": 39144 }, { "epoch": 3.3816425120772946, "grad_norm": 6.151560306549072, "learning_rate": 0.001, "loss": 2.3625, "step": 39200 }, { "epoch": 3.3864734299516908, "grad_norm": 1.6020865440368652, "learning_rate": 0.001, "loss": 2.3578, "step": 39256 }, { "epoch": 3.391304347826087, "grad_norm": 1.2496628761291504, "learning_rate": 0.001, "loss": 2.3733, "step": 39312 }, { "epoch": 3.396135265700483, "grad_norm": 4.8708415031433105, "learning_rate": 0.001, "loss": 2.3903, "step": 39368 }, { "epoch": 3.4009661835748792, "grad_norm": 5.318726062774658, "learning_rate": 0.001, "loss": 2.3869, "step": 39424 }, { "epoch": 3.4057971014492754, "grad_norm": 1.2490580081939697, "learning_rate": 0.001, "loss": 2.3868, "step": 39480 }, { "epoch": 3.4106280193236715, "grad_norm": 0.7086006999015808, "learning_rate": 0.001, "loss": 2.3629, "step": 39536 }, { "epoch": 3.4154589371980677, "grad_norm": 0.7509753108024597, "learning_rate": 0.001, "loss": 2.3701, "step": 39592 }, { "epoch": 3.420289855072464, "grad_norm": 0.6684341430664062, "learning_rate": 0.001, "loss": 2.3713, "step": 39648 }, { "epoch": 3.42512077294686, "grad_norm": 2.8551783561706543, "learning_rate": 0.001, "loss": 2.3598, "step": 39704 }, { "epoch": 3.429951690821256, "grad_norm": 0.6681911945343018, "learning_rate": 0.001, "loss": 2.3663, "step": 39760 }, { "epoch": 3.4347826086956523, "grad_norm": 0.7762264013290405, "learning_rate": 0.001, "loss": 2.374, "step": 39816 }, { "epoch": 3.4396135265700485, "grad_norm": 0.6667366027832031, "learning_rate": 0.001, "loss": 2.3668, "step": 39872 }, { "epoch": 3.4444444444444446, "grad_norm": 0.9514179229736328, "learning_rate": 0.001, "loss": 2.3556, "step": 39928 }, { "epoch": 3.449275362318841, "grad_norm": 1.7346069812774658, "learning_rate": 0.001, "loss": 2.3612, "step": 39984 }, { "epoch": 3.454106280193237, "grad_norm": 3.320202589035034, "learning_rate": 0.001, "loss": 2.3536, "step": 40040 }, { "epoch": 3.4589371980676327, "grad_norm": 0.8877231478691101, "learning_rate": 0.001, "loss": 2.345, "step": 40096 }, { "epoch": 3.463768115942029, "grad_norm": 1.4169694185256958, "learning_rate": 0.001, "loss": 2.359, "step": 40152 }, { "epoch": 3.468599033816425, "grad_norm": 0.502339243888855, "learning_rate": 0.001, "loss": 2.3615, "step": 40208 }, { "epoch": 3.473429951690821, "grad_norm": 0.5285590887069702, "learning_rate": 0.001, "loss": 2.3535, "step": 40264 }, { "epoch": 3.4782608695652173, "grad_norm": 1.1485893726348877, "learning_rate": 0.001, "loss": 2.3511, "step": 40320 }, { "epoch": 3.4830917874396135, "grad_norm": 1.5130213499069214, "learning_rate": 0.001, "loss": 2.3458, "step": 40376 }, { "epoch": 3.4879227053140096, "grad_norm": 0.7220565676689148, "learning_rate": 0.001, "loss": 2.3439, "step": 40432 }, { "epoch": 3.4927536231884058, "grad_norm": 0.5075478553771973, "learning_rate": 0.001, "loss": 2.3487, "step": 40488 }, { "epoch": 3.497584541062802, "grad_norm": 0.9343673586845398, "learning_rate": 0.001, "loss": 2.3489, "step": 40544 }, { "epoch": 3.502415458937198, "grad_norm": 1.2456401586532593, "learning_rate": 0.001, "loss": 2.3695, "step": 40600 }, { "epoch": 3.5072463768115942, "grad_norm": 0.6643732190132141, "learning_rate": 0.001, "loss": 2.3577, "step": 40656 }, { "epoch": 3.5120772946859904, "grad_norm": 0.9231414794921875, "learning_rate": 0.001, "loss": 2.3388, "step": 40712 }, { "epoch": 3.5169082125603865, "grad_norm": 0.7390984296798706, "learning_rate": 0.001, "loss": 2.3442, "step": 40768 }, { "epoch": 3.5217391304347827, "grad_norm": 0.9680396318435669, "learning_rate": 0.001, "loss": 2.3412, "step": 40824 }, { "epoch": 3.526570048309179, "grad_norm": 1.5306947231292725, "learning_rate": 0.001, "loss": 2.3529, "step": 40880 }, { "epoch": 3.531400966183575, "grad_norm": 1.0215588808059692, "learning_rate": 0.001, "loss": 2.3539, "step": 40936 }, { "epoch": 3.536231884057971, "grad_norm": 0.6781653761863708, "learning_rate": 0.001, "loss": 2.3463, "step": 40992 }, { "epoch": 3.541062801932367, "grad_norm": 2.7816197872161865, "learning_rate": 0.001, "loss": 2.3695, "step": 41048 }, { "epoch": 3.545893719806763, "grad_norm": 1.1754366159439087, "learning_rate": 0.001, "loss": 2.3644, "step": 41104 }, { "epoch": 3.550724637681159, "grad_norm": 0.45001232624053955, "learning_rate": 0.001, "loss": 2.3557, "step": 41160 }, { "epoch": 3.5555555555555554, "grad_norm": 1.077300786972046, "learning_rate": 0.001, "loss": 2.3658, "step": 41216 }, { "epoch": 3.5603864734299515, "grad_norm": 0.5185337662696838, "learning_rate": 0.001, "loss": 2.3554, "step": 41272 }, { "epoch": 3.5652173913043477, "grad_norm": 0.7596719264984131, "learning_rate": 0.001, "loss": 2.3617, "step": 41328 }, { "epoch": 3.570048309178744, "grad_norm": 2.055612564086914, "learning_rate": 0.001, "loss": 2.3923, "step": 41384 }, { "epoch": 3.57487922705314, "grad_norm": 2.0701406002044678, "learning_rate": 0.001, "loss": 2.3913, "step": 41440 }, { "epoch": 3.579710144927536, "grad_norm": 1.7943741083145142, "learning_rate": 0.001, "loss": 2.3745, "step": 41496 }, { "epoch": 3.5845410628019323, "grad_norm": 0.8631559014320374, "learning_rate": 0.001, "loss": 2.3578, "step": 41552 }, { "epoch": 3.5893719806763285, "grad_norm": 0.800894558429718, "learning_rate": 0.001, "loss": 2.3661, "step": 41608 }, { "epoch": 3.5942028985507246, "grad_norm": 1.5744913816452026, "learning_rate": 0.001, "loss": 2.3576, "step": 41664 }, { "epoch": 3.5990338164251208, "grad_norm": 0.7339915633201599, "learning_rate": 0.001, "loss": 2.3562, "step": 41720 }, { "epoch": 3.603864734299517, "grad_norm": 2.8779172897338867, "learning_rate": 0.001, "loss": 2.3571, "step": 41776 }, { "epoch": 3.608695652173913, "grad_norm": 1.536848783493042, "learning_rate": 0.001, "loss": 2.3668, "step": 41832 }, { "epoch": 3.6135265700483092, "grad_norm": 2.2175509929656982, "learning_rate": 0.001, "loss": 2.3465, "step": 41888 }, { "epoch": 3.6183574879227054, "grad_norm": 0.8303064107894897, "learning_rate": 0.001, "loss": 2.3456, "step": 41944 }, { "epoch": 3.6231884057971016, "grad_norm": 0.8914777040481567, "learning_rate": 0.001, "loss": 2.3377, "step": 42000 }, { "epoch": 3.6280193236714977, "grad_norm": 0.8713904619216919, "learning_rate": 0.001, "loss": 2.3453, "step": 42056 }, { "epoch": 3.632850241545894, "grad_norm": 2.1183042526245117, "learning_rate": 0.001, "loss": 2.3424, "step": 42112 }, { "epoch": 3.63768115942029, "grad_norm": 1.0792075395584106, "learning_rate": 0.001, "loss": 2.3469, "step": 42168 }, { "epoch": 3.642512077294686, "grad_norm": 1.2308788299560547, "learning_rate": 0.001, "loss": 2.362, "step": 42224 }, { "epoch": 3.6473429951690823, "grad_norm": 1.8587312698364258, "learning_rate": 0.001, "loss": 2.3539, "step": 42280 }, { "epoch": 3.6521739130434785, "grad_norm": 0.7397122383117676, "learning_rate": 0.001, "loss": 2.3419, "step": 42336 }, { "epoch": 3.6570048309178746, "grad_norm": 0.6592168211936951, "learning_rate": 0.001, "loss": 2.3266, "step": 42392 }, { "epoch": 3.661835748792271, "grad_norm": 0.8108003735542297, "learning_rate": 0.001, "loss": 2.3363, "step": 42448 }, { "epoch": 3.6666666666666665, "grad_norm": 0.8156822919845581, "learning_rate": 0.001, "loss": 2.3565, "step": 42504 }, { "epoch": 3.6714975845410627, "grad_norm": 0.9192153215408325, "learning_rate": 0.001, "loss": 2.3567, "step": 42560 }, { "epoch": 3.676328502415459, "grad_norm": 0.9951876401901245, "learning_rate": 0.001, "loss": 2.3583, "step": 42616 }, { "epoch": 3.681159420289855, "grad_norm": 1.245253562927246, "learning_rate": 0.001, "loss": 2.3465, "step": 42672 }, { "epoch": 3.685990338164251, "grad_norm": 1.2041829824447632, "learning_rate": 0.001, "loss": 2.3535, "step": 42728 }, { "epoch": 3.6908212560386473, "grad_norm": 2.3600947856903076, "learning_rate": 0.001, "loss": 2.3513, "step": 42784 }, { "epoch": 3.6956521739130435, "grad_norm": 1.3931894302368164, "learning_rate": 0.001, "loss": 2.3649, "step": 42840 }, { "epoch": 3.7004830917874396, "grad_norm": 1.2088695764541626, "learning_rate": 0.001, "loss": 2.3467, "step": 42896 }, { "epoch": 3.7053140096618358, "grad_norm": 0.7605326175689697, "learning_rate": 0.001, "loss": 2.3452, "step": 42952 }, { "epoch": 3.710144927536232, "grad_norm": 1.9344475269317627, "learning_rate": 0.001, "loss": 2.3464, "step": 43008 }, { "epoch": 3.714975845410628, "grad_norm": 4.121121883392334, "learning_rate": 0.001, "loss": 2.353, "step": 43064 }, { "epoch": 3.7198067632850242, "grad_norm": 0.7761598229408264, "learning_rate": 0.001, "loss": 2.346, "step": 43120 }, { "epoch": 3.7246376811594204, "grad_norm": 1.034733772277832, "learning_rate": 0.001, "loss": 2.3597, "step": 43176 }, { "epoch": 3.7294685990338166, "grad_norm": 0.6464439630508423, "learning_rate": 0.001, "loss": 2.359, "step": 43232 }, { "epoch": 3.7342995169082127, "grad_norm": 3.42350172996521, "learning_rate": 0.001, "loss": 2.3582, "step": 43288 }, { "epoch": 3.7391304347826084, "grad_norm": 0.6033596396446228, "learning_rate": 0.001, "loss": 2.3409, "step": 43344 }, { "epoch": 3.7439613526570046, "grad_norm": 0.5961412191390991, "learning_rate": 0.001, "loss": 2.3418, "step": 43400 }, { "epoch": 3.7487922705314007, "grad_norm": 0.8360373377799988, "learning_rate": 0.001, "loss": 2.3427, "step": 43456 }, { "epoch": 3.753623188405797, "grad_norm": 28.263586044311523, "learning_rate": 0.001, "loss": 2.3364, "step": 43512 }, { "epoch": 3.758454106280193, "grad_norm": 5.00962495803833, "learning_rate": 0.001, "loss": 2.3359, "step": 43568 }, { "epoch": 3.763285024154589, "grad_norm": 0.5710622072219849, "learning_rate": 0.001, "loss": 2.333, "step": 43624 }, { "epoch": 3.7681159420289854, "grad_norm": 0.9183070659637451, "learning_rate": 0.001, "loss": 2.3388, "step": 43680 }, { "epoch": 3.7729468599033815, "grad_norm": 0.6936941146850586, "learning_rate": 0.001, "loss": 2.3309, "step": 43736 }, { "epoch": 3.7777777777777777, "grad_norm": 1.2598804235458374, "learning_rate": 0.001, "loss": 2.3337, "step": 43792 }, { "epoch": 3.782608695652174, "grad_norm": 2.6248106956481934, "learning_rate": 0.001, "loss": 2.3619, "step": 43848 }, { "epoch": 3.78743961352657, "grad_norm": 0.989886999130249, "learning_rate": 0.001, "loss": 2.3554, "step": 43904 }, { "epoch": 3.792270531400966, "grad_norm": 3.3399837017059326, "learning_rate": 0.001, "loss": 2.3388, "step": 43960 }, { "epoch": 3.7971014492753623, "grad_norm": 2.2985455989837646, "learning_rate": 0.001, "loss": 2.337, "step": 44016 }, { "epoch": 3.8019323671497585, "grad_norm": 0.9566785097122192, "learning_rate": 0.001, "loss": 2.3436, "step": 44072 }, { "epoch": 3.8067632850241546, "grad_norm": 21.195798873901367, "learning_rate": 0.001, "loss": 2.339, "step": 44128 }, { "epoch": 3.8115942028985508, "grad_norm": 0.740143358707428, "learning_rate": 0.001, "loss": 2.3431, "step": 44184 }, { "epoch": 3.816425120772947, "grad_norm": 4.102219104766846, "learning_rate": 0.001, "loss": 2.3453, "step": 44240 }, { "epoch": 3.821256038647343, "grad_norm": 0.8171431422233582, "learning_rate": 0.001, "loss": 2.3447, "step": 44296 }, { "epoch": 3.8260869565217392, "grad_norm": 1.02897047996521, "learning_rate": 0.001, "loss": 2.3345, "step": 44352 }, { "epoch": 3.8309178743961354, "grad_norm": 1.1245150566101074, "learning_rate": 0.001, "loss": 2.3513, "step": 44408 }, { "epoch": 3.8357487922705316, "grad_norm": 0.6639866232872009, "learning_rate": 0.001, "loss": 2.3618, "step": 44464 }, { "epoch": 3.8405797101449277, "grad_norm": 29.38960838317871, "learning_rate": 0.001, "loss": 2.3647, "step": 44520 }, { "epoch": 3.845410628019324, "grad_norm": 0.9987631440162659, "learning_rate": 0.001, "loss": 2.3577, "step": 44576 }, { "epoch": 3.85024154589372, "grad_norm": 3.8040685653686523, "learning_rate": 0.001, "loss": 2.3563, "step": 44632 }, { "epoch": 3.855072463768116, "grad_norm": 1.2185957431793213, "learning_rate": 0.001, "loss": 2.3569, "step": 44688 }, { "epoch": 3.8599033816425123, "grad_norm": 0.7800244688987732, "learning_rate": 0.001, "loss": 2.3533, "step": 44744 }, { "epoch": 3.864734299516908, "grad_norm": 0.5030954480171204, "learning_rate": 0.001, "loss": 2.3544, "step": 44800 }, { "epoch": 3.869565217391304, "grad_norm": 2.9667656421661377, "learning_rate": 0.001, "loss": 2.3588, "step": 44856 }, { "epoch": 3.8743961352657004, "grad_norm": 0.5120835900306702, "learning_rate": 0.001, "loss": 2.3369, "step": 44912 }, { "epoch": 3.8792270531400965, "grad_norm": 0.4867868721485138, "learning_rate": 0.001, "loss": 2.3426, "step": 44968 }, { "epoch": 3.8840579710144927, "grad_norm": 0.6894313097000122, "learning_rate": 0.001, "loss": 2.3384, "step": 45024 }, { "epoch": 3.888888888888889, "grad_norm": 1.0191646814346313, "learning_rate": 0.001, "loss": 2.341, "step": 45080 }, { "epoch": 3.893719806763285, "grad_norm": 1.1036583185195923, "learning_rate": 0.001, "loss": 2.3347, "step": 45136 }, { "epoch": 3.898550724637681, "grad_norm": 1.3702045679092407, "learning_rate": 0.001, "loss": 2.3467, "step": 45192 }, { "epoch": 3.9033816425120773, "grad_norm": 3.228801727294922, "learning_rate": 0.001, "loss": 2.3804, "step": 45248 }, { "epoch": 3.9082125603864735, "grad_norm": 1.7107059955596924, "learning_rate": 0.001, "loss": 2.3604, "step": 45304 }, { "epoch": 3.9130434782608696, "grad_norm": 3.484675884246826, "learning_rate": 0.001, "loss": 2.3519, "step": 45360 }, { "epoch": 3.917874396135266, "grad_norm": 0.9079760313034058, "learning_rate": 0.001, "loss": 2.3438, "step": 45416 }, { "epoch": 3.922705314009662, "grad_norm": 0.9418869614601135, "learning_rate": 0.001, "loss": 2.3397, "step": 45472 }, { "epoch": 3.927536231884058, "grad_norm": 2.3312509059906006, "learning_rate": 0.001, "loss": 2.336, "step": 45528 }, { "epoch": 3.9323671497584543, "grad_norm": 0.617605984210968, "learning_rate": 0.001, "loss": 2.3388, "step": 45584 }, { "epoch": 3.9371980676328504, "grad_norm": 0.6304759383201599, "learning_rate": 0.001, "loss": 2.3294, "step": 45640 }, { "epoch": 3.942028985507246, "grad_norm": 0.9962377548217773, "learning_rate": 0.001, "loss": 2.3311, "step": 45696 }, { "epoch": 3.9468599033816423, "grad_norm": 1.7532322406768799, "learning_rate": 0.001, "loss": 2.3495, "step": 45752 }, { "epoch": 3.9516908212560384, "grad_norm": 0.6233911514282227, "learning_rate": 0.001, "loss": 2.3499, "step": 45808 }, { "epoch": 3.9565217391304346, "grad_norm": 5.571962833404541, "learning_rate": 0.001, "loss": 2.3529, "step": 45864 }, { "epoch": 3.9613526570048307, "grad_norm": 1.1596258878707886, "learning_rate": 0.001, "loss": 2.3522, "step": 45920 }, { "epoch": 3.966183574879227, "grad_norm": 1.252943992614746, "learning_rate": 0.001, "loss": 2.3653, "step": 45976 }, { "epoch": 3.971014492753623, "grad_norm": 2.0798392295837402, "learning_rate": 0.001, "loss": 2.3519, "step": 46032 }, { "epoch": 3.975845410628019, "grad_norm": 2.891594409942627, "learning_rate": 0.001, "loss": 2.3491, "step": 46088 }, { "epoch": 3.9806763285024154, "grad_norm": 1.825319766998291, "learning_rate": 0.001, "loss": 2.3438, "step": 46144 }, { "epoch": 3.9855072463768115, "grad_norm": 1.724923014640808, "learning_rate": 0.001, "loss": 2.3465, "step": 46200 }, { "epoch": 3.9903381642512077, "grad_norm": 0.7429100275039673, "learning_rate": 0.001, "loss": 2.336, "step": 46256 }, { "epoch": 3.995169082125604, "grad_norm": 0.5450997948646545, "learning_rate": 0.001, "loss": 2.3371, "step": 46312 }, { "epoch": 4.0, "grad_norm": 1.657492995262146, "learning_rate": 0.001, "loss": 2.3274, "step": 46368 }, { "epoch": 4.004830917874396, "grad_norm": 0.9657808542251587, "learning_rate": 0.001, "loss": 2.3042, "step": 46424 }, { "epoch": 4.009661835748792, "grad_norm": 0.600604772567749, "learning_rate": 0.001, "loss": 2.3016, "step": 46480 }, { "epoch": 4.0144927536231885, "grad_norm": 0.924943745136261, "learning_rate": 0.001, "loss": 2.2868, "step": 46536 }, { "epoch": 4.019323671497585, "grad_norm": 1.3035205602645874, "learning_rate": 0.001, "loss": 2.2841, "step": 46592 }, { "epoch": 4.024154589371981, "grad_norm": 1.3386552333831787, "learning_rate": 0.001, "loss": 2.2942, "step": 46648 }, { "epoch": 4.028985507246377, "grad_norm": 1.0220056772232056, "learning_rate": 0.001, "loss": 2.3054, "step": 46704 }, { "epoch": 4.033816425120773, "grad_norm": 0.7145740985870361, "learning_rate": 0.001, "loss": 2.3049, "step": 46760 }, { "epoch": 4.038647342995169, "grad_norm": 3.8825621604919434, "learning_rate": 0.001, "loss": 2.3003, "step": 46816 }, { "epoch": 4.043478260869565, "grad_norm": 0.8055011630058289, "learning_rate": 0.001, "loss": 2.303, "step": 46872 }, { "epoch": 4.048309178743962, "grad_norm": 2.655247449874878, "learning_rate": 0.001, "loss": 2.3049, "step": 46928 }, { "epoch": 4.053140096618358, "grad_norm": 1.0282988548278809, "learning_rate": 0.001, "loss": 2.3202, "step": 46984 }, { "epoch": 4.057971014492754, "grad_norm": 0.9139070510864258, "learning_rate": 0.001, "loss": 2.3198, "step": 47040 }, { "epoch": 4.06280193236715, "grad_norm": 1.8222072124481201, "learning_rate": 0.001, "loss": 2.3254, "step": 47096 }, { "epoch": 4.067632850241546, "grad_norm": 1.2185211181640625, "learning_rate": 0.001, "loss": 2.3282, "step": 47152 }, { "epoch": 4.072463768115942, "grad_norm": 1.2186691761016846, "learning_rate": 0.001, "loss": 2.3232, "step": 47208 }, { "epoch": 4.0772946859903385, "grad_norm": 1.0944017171859741, "learning_rate": 0.001, "loss": 2.3133, "step": 47264 }, { "epoch": 4.082125603864735, "grad_norm": 2.1194827556610107, "learning_rate": 0.001, "loss": 2.3058, "step": 47320 }, { "epoch": 4.086956521739131, "grad_norm": 0.6063691973686218, "learning_rate": 0.001, "loss": 2.2968, "step": 47376 }, { "epoch": 4.091787439613527, "grad_norm": 3.2358484268188477, "learning_rate": 0.001, "loss": 2.3189, "step": 47432 }, { "epoch": 4.096618357487923, "grad_norm": 3.020056962966919, "learning_rate": 0.001, "loss": 2.3264, "step": 47488 }, { "epoch": 4.101449275362318, "grad_norm": 1.6198798418045044, "learning_rate": 0.001, "loss": 2.3141, "step": 47544 }, { "epoch": 4.106280193236715, "grad_norm": 0.7674989104270935, "learning_rate": 0.001, "loss": 2.3134, "step": 47600 }, { "epoch": 4.111111111111111, "grad_norm": 2.0637145042419434, "learning_rate": 0.001, "loss": 2.302, "step": 47656 }, { "epoch": 4.115942028985507, "grad_norm": 0.9420506358146667, "learning_rate": 0.001, "loss": 2.3089, "step": 47712 }, { "epoch": 4.120772946859903, "grad_norm": 1.3257102966308594, "learning_rate": 0.001, "loss": 2.3085, "step": 47768 }, { "epoch": 4.125603864734299, "grad_norm": 1.247165560722351, "learning_rate": 0.001, "loss": 2.3131, "step": 47824 }, { "epoch": 4.130434782608695, "grad_norm": 0.8171547055244446, "learning_rate": 0.001, "loss": 2.3057, "step": 47880 }, { "epoch": 4.1352657004830915, "grad_norm": 1.356412410736084, "learning_rate": 0.001, "loss": 2.3158, "step": 47936 }, { "epoch": 4.140096618357488, "grad_norm": 0.8632891774177551, "learning_rate": 0.001, "loss": 2.3179, "step": 47992 }, { "epoch": 4.144927536231884, "grad_norm": 1.7877506017684937, "learning_rate": 0.001, "loss": 2.3102, "step": 48048 }, { "epoch": 4.14975845410628, "grad_norm": 0.7102479338645935, "learning_rate": 0.001, "loss": 2.313, "step": 48104 }, { "epoch": 4.154589371980676, "grad_norm": 1.066693663597107, "learning_rate": 0.001, "loss": 2.3231, "step": 48160 }, { "epoch": 4.159420289855072, "grad_norm": 0.9703291654586792, "learning_rate": 0.001, "loss": 2.3412, "step": 48216 }, { "epoch": 4.164251207729468, "grad_norm": 0.6456874012947083, "learning_rate": 0.001, "loss": 2.3408, "step": 48272 }, { "epoch": 4.169082125603865, "grad_norm": 2.233797788619995, "learning_rate": 0.001, "loss": 2.3462, "step": 48328 }, { "epoch": 4.173913043478261, "grad_norm": 0.7574034929275513, "learning_rate": 0.001, "loss": 2.329, "step": 48384 }, { "epoch": 4.178743961352657, "grad_norm": 4.511397838592529, "learning_rate": 0.001, "loss": 2.3114, "step": 48440 }, { "epoch": 4.183574879227053, "grad_norm": 1.9656661748886108, "learning_rate": 0.001, "loss": 2.3126, "step": 48496 }, { "epoch": 4.188405797101449, "grad_norm": 0.8456041216850281, "learning_rate": 0.001, "loss": 2.3165, "step": 48552 }, { "epoch": 4.193236714975845, "grad_norm": 0.8470112681388855, "learning_rate": 0.001, "loss": 2.3002, "step": 48608 }, { "epoch": 4.1980676328502415, "grad_norm": 1.1490195989608765, "learning_rate": 0.001, "loss": 2.3093, "step": 48664 }, { "epoch": 4.202898550724638, "grad_norm": 1.7943440675735474, "learning_rate": 0.001, "loss": 2.3118, "step": 48720 }, { "epoch": 4.207729468599034, "grad_norm": 2.768073797225952, "learning_rate": 0.001, "loss": 2.3263, "step": 48776 }, { "epoch": 4.21256038647343, "grad_norm": 2.6669111251831055, "learning_rate": 0.001, "loss": 2.3253, "step": 48832 }, { "epoch": 4.217391304347826, "grad_norm": 1.7654824256896973, "learning_rate": 0.001, "loss": 2.3146, "step": 48888 }, { "epoch": 4.222222222222222, "grad_norm": 2.6006264686584473, "learning_rate": 0.001, "loss": 2.3115, "step": 48944 }, { "epoch": 4.2270531400966185, "grad_norm": 0.9127808213233948, "learning_rate": 0.001, "loss": 2.3162, "step": 49000 }, { "epoch": 4.231884057971015, "grad_norm": 0.8338162899017334, "learning_rate": 0.001, "loss": 2.3179, "step": 49056 }, { "epoch": 4.236714975845411, "grad_norm": 3.532031536102295, "learning_rate": 0.001, "loss": 2.3216, "step": 49112 }, { "epoch": 4.241545893719807, "grad_norm": 1.8172245025634766, "learning_rate": 0.001, "loss": 2.3287, "step": 49168 }, { "epoch": 4.246376811594203, "grad_norm": 1.6738920211791992, "learning_rate": 0.001, "loss": 2.3288, "step": 49224 }, { "epoch": 4.251207729468599, "grad_norm": 8.565791130065918, "learning_rate": 0.001, "loss": 2.3467, "step": 49280 }, { "epoch": 4.256038647342995, "grad_norm": 0.9684979319572449, "learning_rate": 0.001, "loss": 2.3482, "step": 49336 }, { "epoch": 4.260869565217392, "grad_norm": 0.7297049164772034, "learning_rate": 0.001, "loss": 2.3401, "step": 49392 }, { "epoch": 4.265700483091788, "grad_norm": 1.755307674407959, "learning_rate": 0.001, "loss": 2.339, "step": 49448 }, { "epoch": 4.270531400966184, "grad_norm": 1.299764633178711, "learning_rate": 0.001, "loss": 2.3326, "step": 49504 }, { "epoch": 4.27536231884058, "grad_norm": 0.7953894138336182, "learning_rate": 0.001, "loss": 2.313, "step": 49560 }, { "epoch": 4.280193236714976, "grad_norm": 5.326798915863037, "learning_rate": 0.001, "loss": 2.3136, "step": 49616 }, { "epoch": 4.285024154589372, "grad_norm": 0.6478893756866455, "learning_rate": 0.001, "loss": 2.324, "step": 49672 }, { "epoch": 4.2898550724637685, "grad_norm": 2.68768572807312, "learning_rate": 0.001, "loss": 2.3243, "step": 49728 }, { "epoch": 4.294685990338165, "grad_norm": 4.061188220977783, "learning_rate": 0.001, "loss": 2.328, "step": 49784 }, { "epoch": 4.29951690821256, "grad_norm": 3.017909049987793, "learning_rate": 0.001, "loss": 2.3194, "step": 49840 }, { "epoch": 4.304347826086957, "grad_norm": 20.849956512451172, "learning_rate": 0.001, "loss": 2.3196, "step": 49896 }, { "epoch": 4.309178743961352, "grad_norm": 1.5654706954956055, "learning_rate": 0.001, "loss": 2.3183, "step": 49952 }, { "epoch": 4.314009661835748, "grad_norm": 1.9425815343856812, "learning_rate": 0.001, "loss": 2.3212, "step": 50008 }, { "epoch": 4.318840579710145, "grad_norm": 1.0273939371109009, "learning_rate": 0.001, "loss": 2.32, "step": 50064 }, { "epoch": 4.323671497584541, "grad_norm": 1.2710562944412231, "learning_rate": 0.001, "loss": 2.3277, "step": 50120 }, { "epoch": 4.328502415458937, "grad_norm": 0.6078893542289734, "learning_rate": 0.001, "loss": 2.3262, "step": 50176 }, { "epoch": 4.333333333333333, "grad_norm": 1.3283988237380981, "learning_rate": 0.001, "loss": 2.3322, "step": 50232 }, { "epoch": 4.338164251207729, "grad_norm": 4.818864822387695, "learning_rate": 0.001, "loss": 2.337, "step": 50288 }, { "epoch": 4.342995169082125, "grad_norm": 0.9144983291625977, "learning_rate": 0.001, "loss": 2.343, "step": 50344 }, { "epoch": 4.3478260869565215, "grad_norm": 1.5212979316711426, "learning_rate": 0.001, "loss": 2.33, "step": 50400 }, { "epoch": 4.352657004830918, "grad_norm": 1.1908268928527832, "learning_rate": 0.001, "loss": 2.3208, "step": 50456 }, { "epoch": 4.357487922705314, "grad_norm": 0.7681655287742615, "learning_rate": 0.001, "loss": 2.305, "step": 50512 }, { "epoch": 4.36231884057971, "grad_norm": 1.3250541687011719, "learning_rate": 0.001, "loss": 2.3104, "step": 50568 }, { "epoch": 4.367149758454106, "grad_norm": 3.156174421310425, "learning_rate": 0.001, "loss": 2.3074, "step": 50624 }, { "epoch": 4.371980676328502, "grad_norm": 0.6974619030952454, "learning_rate": 0.001, "loss": 2.3126, "step": 50680 }, { "epoch": 4.3768115942028984, "grad_norm": 0.8445868492126465, "learning_rate": 0.001, "loss": 2.3006, "step": 50736 }, { "epoch": 4.381642512077295, "grad_norm": 0.9576389193534851, "learning_rate": 0.001, "loss": 2.3192, "step": 50792 }, { "epoch": 4.386473429951691, "grad_norm": 2.0181868076324463, "learning_rate": 0.001, "loss": 2.3117, "step": 50848 }, { "epoch": 4.391304347826087, "grad_norm": 2.806661367416382, "learning_rate": 0.001, "loss": 2.3253, "step": 50904 }, { "epoch": 4.396135265700483, "grad_norm": 2.122889518737793, "learning_rate": 0.001, "loss": 2.3485, "step": 50960 }, { "epoch": 4.400966183574879, "grad_norm": 1.5743014812469482, "learning_rate": 0.001, "loss": 2.3542, "step": 51016 }, { "epoch": 4.405797101449275, "grad_norm": 0.6997669339179993, "learning_rate": 0.001, "loss": 2.3423, "step": 51072 }, { "epoch": 4.4106280193236715, "grad_norm": 1.544404149055481, "learning_rate": 0.001, "loss": 2.3334, "step": 51128 }, { "epoch": 4.415458937198068, "grad_norm": 0.7238136529922485, "learning_rate": 0.001, "loss": 2.3272, "step": 51184 }, { "epoch": 4.420289855072464, "grad_norm": 1.068169355392456, "learning_rate": 0.001, "loss": 2.33, "step": 51240 }, { "epoch": 4.42512077294686, "grad_norm": 9.178274154663086, "learning_rate": 0.001, "loss": 2.3207, "step": 51296 }, { "epoch": 4.429951690821256, "grad_norm": 0.7421141862869263, "learning_rate": 0.001, "loss": 2.3107, "step": 51352 }, { "epoch": 4.434782608695652, "grad_norm": 2.0498499870300293, "learning_rate": 0.001, "loss": 2.3354, "step": 51408 }, { "epoch": 4.4396135265700485, "grad_norm": 1.3518296480178833, "learning_rate": 0.001, "loss": 2.3227, "step": 51464 }, { "epoch": 4.444444444444445, "grad_norm": 0.5711866021156311, "learning_rate": 0.001, "loss": 2.3205, "step": 51520 }, { "epoch": 4.449275362318841, "grad_norm": 0.9583142995834351, "learning_rate": 0.001, "loss": 2.31, "step": 51576 }, { "epoch": 4.454106280193237, "grad_norm": 0.9213517904281616, "learning_rate": 0.001, "loss": 2.3185, "step": 51632 }, { "epoch": 4.458937198067633, "grad_norm": 1.1538068056106567, "learning_rate": 0.001, "loss": 2.333, "step": 51688 }, { "epoch": 4.463768115942029, "grad_norm": 0.8258316516876221, "learning_rate": 0.001, "loss": 2.3246, "step": 51744 }, { "epoch": 4.468599033816425, "grad_norm": 0.8119305968284607, "learning_rate": 0.001, "loss": 2.3348, "step": 51800 }, { "epoch": 4.473429951690822, "grad_norm": 1.4755408763885498, "learning_rate": 0.001, "loss": 2.3104, "step": 51856 }, { "epoch": 4.478260869565218, "grad_norm": 0.9129286408424377, "learning_rate": 0.001, "loss": 2.3173, "step": 51912 }, { "epoch": 4.483091787439614, "grad_norm": 0.6550956964492798, "learning_rate": 0.001, "loss": 2.3084, "step": 51968 }, { "epoch": 4.48792270531401, "grad_norm": 0.9634305834770203, "learning_rate": 0.001, "loss": 2.3055, "step": 52024 }, { "epoch": 4.492753623188406, "grad_norm": 1.192183494567871, "learning_rate": 0.001, "loss": 2.3051, "step": 52080 }, { "epoch": 4.4975845410628015, "grad_norm": 1.9183534383773804, "learning_rate": 0.001, "loss": 2.3209, "step": 52136 }, { "epoch": 4.5024154589371985, "grad_norm": 2.6884896755218506, "learning_rate": 0.001, "loss": 2.3101, "step": 52192 }, { "epoch": 4.507246376811594, "grad_norm": 0.943748414516449, "learning_rate": 0.001, "loss": 2.3101, "step": 52248 }, { "epoch": 4.512077294685991, "grad_norm": 1.654974102973938, "learning_rate": 0.001, "loss": 2.3182, "step": 52304 }, { "epoch": 4.516908212560386, "grad_norm": 1.3912440538406372, "learning_rate": 0.001, "loss": 2.3219, "step": 52360 }, { "epoch": 4.521739130434782, "grad_norm": 1.3495748043060303, "learning_rate": 0.001, "loss": 2.3039, "step": 52416 }, { "epoch": 4.526570048309178, "grad_norm": 3.488729238510132, "learning_rate": 0.001, "loss": 2.2987, "step": 52472 }, { "epoch": 4.531400966183575, "grad_norm": 1.2651500701904297, "learning_rate": 0.001, "loss": 2.2985, "step": 52528 }, { "epoch": 4.536231884057971, "grad_norm": 0.8455155491828918, "learning_rate": 0.001, "loss": 2.2993, "step": 52584 }, { "epoch": 4.541062801932367, "grad_norm": 1.6375813484191895, "learning_rate": 0.001, "loss": 2.292, "step": 52640 }, { "epoch": 4.545893719806763, "grad_norm": 1.1256200075149536, "learning_rate": 0.001, "loss": 2.2997, "step": 52696 }, { "epoch": 4.550724637681159, "grad_norm": 1.5138390064239502, "learning_rate": 0.001, "loss": 2.3013, "step": 52752 }, { "epoch": 4.555555555555555, "grad_norm": 0.7223426103591919, "learning_rate": 0.001, "loss": 2.2979, "step": 52808 }, { "epoch": 4.5603864734299515, "grad_norm": 2.692262649536133, "learning_rate": 0.001, "loss": 2.3171, "step": 52864 }, { "epoch": 4.565217391304348, "grad_norm": 1.9587488174438477, "learning_rate": 0.001, "loss": 2.3123, "step": 52920 }, { "epoch": 4.570048309178744, "grad_norm": 1.3185560703277588, "learning_rate": 0.001, "loss": 2.3227, "step": 52976 }, { "epoch": 4.57487922705314, "grad_norm": 3.9335124492645264, "learning_rate": 0.001, "loss": 2.3099, "step": 53032 }, { "epoch": 4.579710144927536, "grad_norm": 1.239708423614502, "learning_rate": 0.001, "loss": 2.3212, "step": 53088 }, { "epoch": 4.584541062801932, "grad_norm": 1.176710844039917, "learning_rate": 0.001, "loss": 2.3238, "step": 53144 }, { "epoch": 4.5893719806763285, "grad_norm": 1.0926462411880493, "learning_rate": 0.001, "loss": 2.3362, "step": 53200 }, { "epoch": 4.594202898550725, "grad_norm": 1.1184386014938354, "learning_rate": 0.001, "loss": 2.3409, "step": 53256 }, { "epoch": 4.599033816425121, "grad_norm": 1.2343361377716064, "learning_rate": 0.001, "loss": 2.3317, "step": 53312 }, { "epoch": 4.603864734299517, "grad_norm": 0.8025084733963013, "learning_rate": 0.001, "loss": 2.35, "step": 53368 }, { "epoch": 4.608695652173913, "grad_norm": 1.1020928621292114, "learning_rate": 0.001, "loss": 2.3389, "step": 53424 }, { "epoch": 4.613526570048309, "grad_norm": 1.9558590650558472, "learning_rate": 0.001, "loss": 2.3278, "step": 53480 }, { "epoch": 4.618357487922705, "grad_norm": 1.644404649734497, "learning_rate": 0.001, "loss": 2.3245, "step": 53536 }, { "epoch": 4.6231884057971016, "grad_norm": 6.147590637207031, "learning_rate": 0.001, "loss": 2.3259, "step": 53592 }, { "epoch": 4.628019323671498, "grad_norm": 1.4744198322296143, "learning_rate": 0.001, "loss": 2.3307, "step": 53648 }, { "epoch": 4.632850241545894, "grad_norm": 1.4356679916381836, "learning_rate": 0.001, "loss": 2.3391, "step": 53704 }, { "epoch": 4.63768115942029, "grad_norm": 1.2997158765792847, "learning_rate": 0.001, "loss": 2.3385, "step": 53760 }, { "epoch": 4.642512077294686, "grad_norm": 1.5287188291549683, "learning_rate": 0.001, "loss": 2.3363, "step": 53816 }, { "epoch": 4.647342995169082, "grad_norm": 1.5143572092056274, "learning_rate": 0.001, "loss": 2.3383, "step": 53872 }, { "epoch": 4.6521739130434785, "grad_norm": 3.656484842300415, "learning_rate": 0.001, "loss": 2.3268, "step": 53928 }, { "epoch": 4.657004830917875, "grad_norm": 1.028181791305542, "learning_rate": 0.001, "loss": 2.3258, "step": 53984 }, { "epoch": 4.661835748792271, "grad_norm": 5.046627998352051, "learning_rate": 0.001, "loss": 2.3307, "step": 54040 }, { "epoch": 4.666666666666667, "grad_norm": 0.7748187184333801, "learning_rate": 0.001, "loss": 2.328, "step": 54096 }, { "epoch": 4.671497584541063, "grad_norm": 0.9460355639457703, "learning_rate": 0.001, "loss": 2.3256, "step": 54152 }, { "epoch": 4.676328502415459, "grad_norm": 2.8801820278167725, "learning_rate": 0.001, "loss": 2.3247, "step": 54208 }, { "epoch": 4.681159420289855, "grad_norm": 0.9241681694984436, "learning_rate": 0.001, "loss": 2.3478, "step": 54264 }, { "epoch": 4.685990338164252, "grad_norm": 0.6218664646148682, "learning_rate": 0.001, "loss": 2.3448, "step": 54320 }, { "epoch": 4.690821256038648, "grad_norm": 1.4080214500427246, "learning_rate": 0.001, "loss": 2.3311, "step": 54376 }, { "epoch": 4.695652173913043, "grad_norm": 0.8577357530593872, "learning_rate": 0.001, "loss": 2.3223, "step": 54432 }, { "epoch": 4.70048309178744, "grad_norm": 2.301870822906494, "learning_rate": 0.001, "loss": 2.313, "step": 54488 }, { "epoch": 4.705314009661835, "grad_norm": 0.7508590817451477, "learning_rate": 0.001, "loss": 2.3285, "step": 54544 }, { "epoch": 4.710144927536232, "grad_norm": 1.3048603534698486, "learning_rate": 0.001, "loss": 2.317, "step": 54600 }, { "epoch": 4.714975845410628, "grad_norm": 0.8950731158256531, "learning_rate": 0.001, "loss": 2.3089, "step": 54656 }, { "epoch": 4.719806763285024, "grad_norm": 1.0115829706192017, "learning_rate": 0.001, "loss": 2.2971, "step": 54712 }, { "epoch": 4.72463768115942, "grad_norm": 0.7804195880889893, "learning_rate": 0.001, "loss": 2.3047, "step": 54768 }, { "epoch": 4.729468599033816, "grad_norm": 2.3576929569244385, "learning_rate": 0.001, "loss": 2.2954, "step": 54824 }, { "epoch": 4.734299516908212, "grad_norm": 1.9797247648239136, "learning_rate": 0.001, "loss": 2.3044, "step": 54880 }, { "epoch": 4.739130434782608, "grad_norm": 1.1571468114852905, "learning_rate": 0.001, "loss": 2.3173, "step": 54936 }, { "epoch": 4.743961352657005, "grad_norm": 0.8506631851196289, "learning_rate": 0.001, "loss": 2.3097, "step": 54992 }, { "epoch": 4.748792270531401, "grad_norm": 0.8268190026283264, "learning_rate": 0.001, "loss": 2.2963, "step": 55048 }, { "epoch": 4.753623188405797, "grad_norm": 1.88607919216156, "learning_rate": 0.001, "loss": 2.2998, "step": 55104 }, { "epoch": 4.758454106280193, "grad_norm": 0.9432274103164673, "learning_rate": 0.001, "loss": 2.3113, "step": 55160 }, { "epoch": 4.763285024154589, "grad_norm": 0.725443959236145, "learning_rate": 0.001, "loss": 2.3005, "step": 55216 }, { "epoch": 4.768115942028985, "grad_norm": 1.2860841751098633, "learning_rate": 0.001, "loss": 2.2938, "step": 55272 }, { "epoch": 4.7729468599033815, "grad_norm": 0.9735112190246582, "learning_rate": 0.001, "loss": 2.3147, "step": 55328 }, { "epoch": 4.777777777777778, "grad_norm": 0.9253095984458923, "learning_rate": 0.001, "loss": 2.2996, "step": 55384 }, { "epoch": 4.782608695652174, "grad_norm": 1.4768403768539429, "learning_rate": 0.001, "loss": 2.3024, "step": 55440 }, { "epoch": 4.78743961352657, "grad_norm": 1.6015561819076538, "learning_rate": 0.001, "loss": 2.3053, "step": 55496 }, { "epoch": 4.792270531400966, "grad_norm": 0.9914491772651672, "learning_rate": 0.001, "loss": 2.2909, "step": 55552 }, { "epoch": 4.797101449275362, "grad_norm": 1.04567551612854, "learning_rate": 0.001, "loss": 2.3037, "step": 55608 }, { "epoch": 4.8019323671497585, "grad_norm": 0.932278573513031, "learning_rate": 0.001, "loss": 2.3047, "step": 55664 }, { "epoch": 4.806763285024155, "grad_norm": 2.336017608642578, "learning_rate": 0.001, "loss": 2.298, "step": 55720 }, { "epoch": 4.811594202898551, "grad_norm": 0.8514236807823181, "learning_rate": 0.001, "loss": 2.2985, "step": 55776 }, { "epoch": 4.816425120772947, "grad_norm": 0.9563355445861816, "learning_rate": 0.001, "loss": 2.3065, "step": 55832 }, { "epoch": 4.821256038647343, "grad_norm": 2.0424678325653076, "learning_rate": 0.001, "loss": 2.3127, "step": 55888 }, { "epoch": 4.826086956521739, "grad_norm": 0.9988775849342346, "learning_rate": 0.001, "loss": 2.3129, "step": 55944 }, { "epoch": 4.830917874396135, "grad_norm": 0.6675341725349426, "learning_rate": 0.001, "loss": 2.2977, "step": 56000 }, { "epoch": 4.835748792270532, "grad_norm": 0.8867150545120239, "learning_rate": 0.001, "loss": 2.2963, "step": 56056 }, { "epoch": 4.840579710144928, "grad_norm": 1.4586399793624878, "learning_rate": 0.001, "loss": 2.2996, "step": 56112 }, { "epoch": 4.845410628019324, "grad_norm": 0.9547634124755859, "learning_rate": 0.001, "loss": 2.3178, "step": 56168 }, { "epoch": 4.85024154589372, "grad_norm": 0.606196403503418, "learning_rate": 0.001, "loss": 2.3202, "step": 56224 }, { "epoch": 4.855072463768116, "grad_norm": 0.6954506635665894, "learning_rate": 0.001, "loss": 2.3034, "step": 56280 }, { "epoch": 4.859903381642512, "grad_norm": 1.4388718605041504, "learning_rate": 0.001, "loss": 2.3043, "step": 56336 }, { "epoch": 4.8647342995169085, "grad_norm": 1.8744267225265503, "learning_rate": 0.001, "loss": 2.2976, "step": 56392 }, { "epoch": 4.869565217391305, "grad_norm": 14.347280502319336, "learning_rate": 0.001, "loss": 2.3003, "step": 56448 }, { "epoch": 4.874396135265701, "grad_norm": 0.818060040473938, "learning_rate": 0.001, "loss": 2.2995, "step": 56504 }, { "epoch": 4.879227053140097, "grad_norm": 1.3645793199539185, "learning_rate": 0.001, "loss": 2.3081, "step": 56560 }, { "epoch": 4.884057971014493, "grad_norm": 0.8686359524726868, "learning_rate": 0.001, "loss": 2.3143, "step": 56616 }, { "epoch": 4.888888888888889, "grad_norm": 3.609219789505005, "learning_rate": 0.001, "loss": 2.3189, "step": 56672 }, { "epoch": 4.8937198067632846, "grad_norm": 1.5765020847320557, "learning_rate": 0.001, "loss": 2.3137, "step": 56728 }, { "epoch": 4.898550724637682, "grad_norm": 0.9636671543121338, "learning_rate": 0.001, "loss": 2.3266, "step": 56784 }, { "epoch": 4.903381642512077, "grad_norm": 0.5818440318107605, "learning_rate": 0.001, "loss": 2.3104, "step": 56840 }, { "epoch": 4.908212560386474, "grad_norm": 0.9418597221374512, "learning_rate": 0.001, "loss": 2.3213, "step": 56896 }, { "epoch": 4.913043478260869, "grad_norm": 1.7600048780441284, "learning_rate": 0.001, "loss": 2.3159, "step": 56952 }, { "epoch": 4.917874396135265, "grad_norm": 2.075094223022461, "learning_rate": 0.001, "loss": 2.3185, "step": 57008 }, { "epoch": 4.9227053140096615, "grad_norm": 70.46051025390625, "learning_rate": 0.001, "loss": 2.3041, "step": 57064 }, { "epoch": 4.927536231884058, "grad_norm": 6.2453227043151855, "learning_rate": 0.001, "loss": 2.2921, "step": 57120 }, { "epoch": 4.932367149758454, "grad_norm": 1.1778651475906372, "learning_rate": 0.001, "loss": 2.3201, "step": 57176 }, { "epoch": 4.93719806763285, "grad_norm": 1.4549427032470703, "learning_rate": 0.001, "loss": 2.3297, "step": 57232 }, { "epoch": 4.942028985507246, "grad_norm": 3.4987800121307373, "learning_rate": 0.001, "loss": 2.3402, "step": 57288 }, { "epoch": 4.946859903381642, "grad_norm": 1.0558435916900635, "learning_rate": 0.001, "loss": 2.3375, "step": 57344 }, { "epoch": 4.951690821256038, "grad_norm": 1.290441632270813, "learning_rate": 0.001, "loss": 2.3181, "step": 57400 }, { "epoch": 4.956521739130435, "grad_norm": 1.7993686199188232, "learning_rate": 0.001, "loss": 2.3217, "step": 57456 }, { "epoch": 4.961352657004831, "grad_norm": 1.179622769355774, "learning_rate": 0.001, "loss": 2.329, "step": 57512 }, { "epoch": 4.966183574879227, "grad_norm": 1.1743459701538086, "learning_rate": 0.001, "loss": 2.3237, "step": 57568 }, { "epoch": 4.971014492753623, "grad_norm": 1.2919784784317017, "learning_rate": 0.001, "loss": 2.3119, "step": 57624 }, { "epoch": 4.975845410628019, "grad_norm": 9.568537712097168, "learning_rate": 0.001, "loss": 2.3115, "step": 57680 }, { "epoch": 4.980676328502415, "grad_norm": 0.928508460521698, "learning_rate": 0.001, "loss": 2.3275, "step": 57736 }, { "epoch": 4.9855072463768115, "grad_norm": 2.3657476902008057, "learning_rate": 0.001, "loss": 2.3186, "step": 57792 }, { "epoch": 4.990338164251208, "grad_norm": 0.8450965881347656, "learning_rate": 0.001, "loss": 2.2992, "step": 57848 }, { "epoch": 4.995169082125604, "grad_norm": 0.8341897130012512, "learning_rate": 0.001, "loss": 2.2955, "step": 57904 }, { "epoch": 5.0, "grad_norm": 1.2438653707504272, "learning_rate": 0.001, "loss": 2.3025, "step": 57960 }, { "epoch": 5.004830917874396, "grad_norm": 0.9436646103858948, "learning_rate": 0.001, "loss": 2.2737, "step": 58016 }, { "epoch": 5.009661835748792, "grad_norm": 0.7943527698516846, "learning_rate": 0.001, "loss": 2.2679, "step": 58072 }, { "epoch": 5.0144927536231885, "grad_norm": 0.6496822237968445, "learning_rate": 0.001, "loss": 2.2715, "step": 58128 }, { "epoch": 5.019323671497585, "grad_norm": 1.3944389820098877, "learning_rate": 0.001, "loss": 2.2746, "step": 58184 }, { "epoch": 5.024154589371981, "grad_norm": 0.8490382432937622, "learning_rate": 0.001, "loss": 2.2733, "step": 58240 }, { "epoch": 5.028985507246377, "grad_norm": 1.5853235721588135, "learning_rate": 0.001, "loss": 2.267, "step": 58296 }, { "epoch": 5.033816425120773, "grad_norm": 2.906805992126465, "learning_rate": 0.001, "loss": 2.2706, "step": 58352 }, { "epoch": 5.038647342995169, "grad_norm": 1.4250094890594482, "learning_rate": 0.001, "loss": 2.2689, "step": 58408 }, { "epoch": 5.043478260869565, "grad_norm": 4.093613624572754, "learning_rate": 0.001, "loss": 2.2761, "step": 58464 }, { "epoch": 5.048309178743962, "grad_norm": 1.2158496379852295, "learning_rate": 0.001, "loss": 2.291, "step": 58520 }, { "epoch": 5.053140096618358, "grad_norm": 0.8814330101013184, "learning_rate": 0.001, "loss": 2.2736, "step": 58576 }, { "epoch": 5.057971014492754, "grad_norm": 0.8530583381652832, "learning_rate": 0.001, "loss": 2.2708, "step": 58632 }, { "epoch": 5.06280193236715, "grad_norm": 1.7054238319396973, "learning_rate": 0.001, "loss": 2.2688, "step": 58688 }, { "epoch": 5.067632850241546, "grad_norm": 0.7102516293525696, "learning_rate": 0.001, "loss": 2.2675, "step": 58744 }, { "epoch": 5.072463768115942, "grad_norm": 0.6261909604072571, "learning_rate": 0.001, "loss": 2.2558, "step": 58800 }, { "epoch": 5.0772946859903385, "grad_norm": 2.0799224376678467, "learning_rate": 0.001, "loss": 2.2613, "step": 58856 }, { "epoch": 5.082125603864735, "grad_norm": 0.8586704730987549, "learning_rate": 0.001, "loss": 2.262, "step": 58912 }, { "epoch": 5.086956521739131, "grad_norm": 0.7287746667861938, "learning_rate": 0.001, "loss": 2.2575, "step": 58968 }, { "epoch": 5.091787439613527, "grad_norm": 0.6179029941558838, "learning_rate": 0.001, "loss": 2.2536, "step": 59024 }, { "epoch": 5.096618357487923, "grad_norm": 2.5027058124542236, "learning_rate": 0.001, "loss": 2.265, "step": 59080 }, { "epoch": 5.101449275362318, "grad_norm": 2.4114880561828613, "learning_rate": 0.001, "loss": 2.2519, "step": 59136 }, { "epoch": 5.106280193236715, "grad_norm": 0.9805002212524414, "learning_rate": 0.001, "loss": 2.2478, "step": 59192 }, { "epoch": 5.111111111111111, "grad_norm": 0.8979089856147766, "learning_rate": 0.001, "loss": 2.2458, "step": 59248 }, { "epoch": 5.115942028985507, "grad_norm": 1.0549947023391724, "learning_rate": 0.001, "loss": 2.2485, "step": 59304 }, { "epoch": 5.120772946859903, "grad_norm": 0.9608076214790344, "learning_rate": 0.001, "loss": 2.2544, "step": 59360 }, { "epoch": 5.125603864734299, "grad_norm": 1.2952042818069458, "learning_rate": 0.001, "loss": 2.2558, "step": 59416 }, { "epoch": 5.130434782608695, "grad_norm": 1.2467831373214722, "learning_rate": 0.001, "loss": 2.2751, "step": 59472 }, { "epoch": 5.1352657004830915, "grad_norm": 1.6446683406829834, "learning_rate": 0.001, "loss": 2.2576, "step": 59528 }, { "epoch": 5.140096618357488, "grad_norm": 0.6389644145965576, "learning_rate": 0.001, "loss": 2.2533, "step": 59584 }, { "epoch": 5.144927536231884, "grad_norm": 1.7211169004440308, "learning_rate": 0.001, "loss": 2.2651, "step": 59640 }, { "epoch": 5.14975845410628, "grad_norm": 1.44622802734375, "learning_rate": 0.001, "loss": 2.2654, "step": 59696 }, { "epoch": 5.154589371980676, "grad_norm": 1.2150685787200928, "learning_rate": 0.001, "loss": 2.2703, "step": 59752 }, { "epoch": 5.159420289855072, "grad_norm": 2.292452812194824, "learning_rate": 0.001, "loss": 2.2706, "step": 59808 }, { "epoch": 5.164251207729468, "grad_norm": 1.2551698684692383, "learning_rate": 0.001, "loss": 2.2727, "step": 59864 }, { "epoch": 5.169082125603865, "grad_norm": 1.8675438165664673, "learning_rate": 0.001, "loss": 2.2745, "step": 59920 }, { "epoch": 5.173913043478261, "grad_norm": 1.4108449220657349, "learning_rate": 0.001, "loss": 2.2764, "step": 59976 }, { "epoch": 5.178743961352657, "grad_norm": 1.1421384811401367, "learning_rate": 0.001, "loss": 2.2782, "step": 60032 }, { "epoch": 5.183574879227053, "grad_norm": 1.0807000398635864, "learning_rate": 0.001, "loss": 2.2662, "step": 60088 }, { "epoch": 5.188405797101449, "grad_norm": 1.0078704357147217, "learning_rate": 0.001, "loss": 2.2602, "step": 60144 }, { "epoch": 5.193236714975845, "grad_norm": 1.1299046277999878, "learning_rate": 0.001, "loss": 2.2643, "step": 60200 }, { "epoch": 5.1980676328502415, "grad_norm": 1.1986722946166992, "learning_rate": 0.001, "loss": 2.2504, "step": 60256 }, { "epoch": 5.202898550724638, "grad_norm": 1.0549452304840088, "learning_rate": 0.001, "loss": 2.2565, "step": 60312 }, { "epoch": 5.207729468599034, "grad_norm": 2.037277936935425, "learning_rate": 0.001, "loss": 2.2465, "step": 60368 }, { "epoch": 5.21256038647343, "grad_norm": 2.5596325397491455, "learning_rate": 0.001, "loss": 2.2676, "step": 60424 }, { "epoch": 5.217391304347826, "grad_norm": 1.34528386592865, "learning_rate": 0.001, "loss": 2.2692, "step": 60480 }, { "epoch": 5.222222222222222, "grad_norm": 1.1312228441238403, "learning_rate": 0.001, "loss": 2.2777, "step": 60536 }, { "epoch": 5.2270531400966185, "grad_norm": 0.7738250494003296, "learning_rate": 0.001, "loss": 2.2779, "step": 60592 }, { "epoch": 5.231884057971015, "grad_norm": 1.8783988952636719, "learning_rate": 0.001, "loss": 2.282, "step": 60648 }, { "epoch": 5.236714975845411, "grad_norm": 3.2178571224212646, "learning_rate": 0.001, "loss": 2.2858, "step": 60704 }, { "epoch": 5.241545893719807, "grad_norm": 1.4991389513015747, "learning_rate": 0.001, "loss": 2.2652, "step": 60760 }, { "epoch": 5.246376811594203, "grad_norm": 1.0514947175979614, "learning_rate": 0.001, "loss": 2.2692, "step": 60816 }, { "epoch": 5.251207729468599, "grad_norm": 1.5282636880874634, "learning_rate": 0.001, "loss": 2.2621, "step": 60872 }, { "epoch": 5.256038647342995, "grad_norm": 0.6508818864822388, "learning_rate": 0.001, "loss": 2.2707, "step": 60928 }, { "epoch": 5.260869565217392, "grad_norm": 1.6331833600997925, "learning_rate": 0.001, "loss": 2.2656, "step": 60984 }, { "epoch": 5.265700483091788, "grad_norm": 1.6199853420257568, "learning_rate": 0.001, "loss": 2.2651, "step": 61040 }, { "epoch": 5.270531400966184, "grad_norm": 1.4836833477020264, "learning_rate": 0.001, "loss": 2.2685, "step": 61096 }, { "epoch": 5.27536231884058, "grad_norm": 1.1068944931030273, "learning_rate": 0.001, "loss": 2.2629, "step": 61152 }, { "epoch": 5.280193236714976, "grad_norm": 1.0773953199386597, "learning_rate": 0.001, "loss": 2.2695, "step": 61208 }, { "epoch": 5.285024154589372, "grad_norm": 5.323557376861572, "learning_rate": 0.001, "loss": 2.2745, "step": 61264 }, { "epoch": 5.2898550724637685, "grad_norm": 1.236106276512146, "learning_rate": 0.001, "loss": 2.2789, "step": 61320 }, { "epoch": 5.294685990338165, "grad_norm": 1.1621880531311035, "learning_rate": 0.001, "loss": 2.2767, "step": 61376 }, { "epoch": 5.29951690821256, "grad_norm": 1.3490560054779053, "learning_rate": 0.001, "loss": 2.277, "step": 61432 }, { "epoch": 5.304347826086957, "grad_norm": 0.9776967167854309, "learning_rate": 0.001, "loss": 2.2639, "step": 61488 }, { "epoch": 5.309178743961352, "grad_norm": 0.8823137879371643, "learning_rate": 0.001, "loss": 2.2806, "step": 61544 }, { "epoch": 5.314009661835748, "grad_norm": 0.8365609049797058, "learning_rate": 0.001, "loss": 2.27, "step": 61600 }, { "epoch": 5.318840579710145, "grad_norm": 0.8970274925231934, "learning_rate": 0.001, "loss": 2.2635, "step": 61656 }, { "epoch": 5.323671497584541, "grad_norm": 1.1043164730072021, "learning_rate": 0.001, "loss": 2.2573, "step": 61712 }, { "epoch": 5.328502415458937, "grad_norm": 1.1831847429275513, "learning_rate": 0.001, "loss": 2.2688, "step": 61768 }, { "epoch": 5.333333333333333, "grad_norm": 0.8626671433448792, "learning_rate": 0.001, "loss": 2.268, "step": 61824 }, { "epoch": 5.338164251207729, "grad_norm": 1.0557650327682495, "learning_rate": 0.001, "loss": 2.273, "step": 61880 }, { "epoch": 5.342995169082125, "grad_norm": 1.1638703346252441, "learning_rate": 0.001, "loss": 2.2703, "step": 61936 }, { "epoch": 5.3478260869565215, "grad_norm": 2.4471399784088135, "learning_rate": 0.001, "loss": 2.2685, "step": 61992 }, { "epoch": 5.352657004830918, "grad_norm": 0.674362301826477, "learning_rate": 0.001, "loss": 2.2726, "step": 62048 }, { "epoch": 5.357487922705314, "grad_norm": 2.021547794342041, "learning_rate": 0.001, "loss": 2.2807, "step": 62104 }, { "epoch": 5.36231884057971, "grad_norm": 1.0344667434692383, "learning_rate": 0.001, "loss": 2.2719, "step": 62160 }, { "epoch": 5.367149758454106, "grad_norm": 2.5653374195098877, "learning_rate": 0.001, "loss": 2.2827, "step": 62216 }, { "epoch": 5.371980676328502, "grad_norm": 1.1311075687408447, "learning_rate": 0.001, "loss": 2.2844, "step": 62272 }, { "epoch": 5.3768115942028984, "grad_norm": 0.9872215986251831, "learning_rate": 0.001, "loss": 2.2627, "step": 62328 }, { "epoch": 5.381642512077295, "grad_norm": 0.82745760679245, "learning_rate": 0.001, "loss": 2.2649, "step": 62384 }, { "epoch": 5.386473429951691, "grad_norm": 0.8367785811424255, "learning_rate": 0.001, "loss": 2.2808, "step": 62440 }, { "epoch": 5.391304347826087, "grad_norm": 1.1019151210784912, "learning_rate": 0.001, "loss": 2.2663, "step": 62496 }, { "epoch": 5.396135265700483, "grad_norm": 0.8462756276130676, "learning_rate": 0.001, "loss": 2.2602, "step": 62552 }, { "epoch": 5.400966183574879, "grad_norm": 0.7107194066047668, "learning_rate": 0.001, "loss": 2.2541, "step": 62608 }, { "epoch": 5.405797101449275, "grad_norm": 0.49883630871772766, "learning_rate": 0.001, "loss": 2.2563, "step": 62664 }, { "epoch": 5.4106280193236715, "grad_norm": 0.430463969707489, "learning_rate": 0.001, "loss": 2.2473, "step": 62720 }, { "epoch": 5.415458937198068, "grad_norm": 0.8459985852241516, "learning_rate": 0.001, "loss": 2.2575, "step": 62776 }, { "epoch": 5.420289855072464, "grad_norm": 0.6483646035194397, "learning_rate": 0.001, "loss": 2.2652, "step": 62832 }, { "epoch": 5.42512077294686, "grad_norm": 0.7575286030769348, "learning_rate": 0.001, "loss": 2.2548, "step": 62888 }, { "epoch": 5.429951690821256, "grad_norm": 0.6327202916145325, "learning_rate": 0.001, "loss": 2.2484, "step": 62944 }, { "epoch": 5.434782608695652, "grad_norm": 1.3381153345108032, "learning_rate": 0.001, "loss": 2.2424, "step": 63000 }, { "epoch": 5.4396135265700485, "grad_norm": 1.031955361366272, "learning_rate": 0.001, "loss": 2.2487, "step": 63056 }, { "epoch": 5.444444444444445, "grad_norm": 1.4858100414276123, "learning_rate": 0.001, "loss": 2.2451, "step": 63112 }, { "epoch": 5.449275362318841, "grad_norm": 1.1018794775009155, "learning_rate": 0.001, "loss": 2.2457, "step": 63168 }, { "epoch": 5.454106280193237, "grad_norm": 0.8303359746932983, "learning_rate": 0.001, "loss": 2.2573, "step": 63224 }, { "epoch": 5.458937198067633, "grad_norm": 1.1348568201065063, "learning_rate": 0.001, "loss": 2.2566, "step": 63280 }, { "epoch": 5.463768115942029, "grad_norm": 1.037598967552185, "learning_rate": 0.001, "loss": 2.2509, "step": 63336 }, { "epoch": 5.468599033816425, "grad_norm": 5.798953533172607, "learning_rate": 0.001, "loss": 2.2505, "step": 63392 }, { "epoch": 5.473429951690822, "grad_norm": 3.5276036262512207, "learning_rate": 0.001, "loss": 2.2477, "step": 63448 }, { "epoch": 5.478260869565218, "grad_norm": 1.2610571384429932, "learning_rate": 0.001, "loss": 2.2487, "step": 63504 }, { "epoch": 5.483091787439614, "grad_norm": 1.2737233638763428, "learning_rate": 0.001, "loss": 2.259, "step": 63560 }, { "epoch": 5.48792270531401, "grad_norm": 6.124037265777588, "learning_rate": 0.001, "loss": 2.2611, "step": 63616 }, { "epoch": 5.492753623188406, "grad_norm": 2.610708236694336, "learning_rate": 0.001, "loss": 2.2604, "step": 63672 }, { "epoch": 5.4975845410628015, "grad_norm": 2.1107146739959717, "learning_rate": 0.001, "loss": 2.2806, "step": 63728 }, { "epoch": 5.5024154589371985, "grad_norm": 1.5488975048065186, "learning_rate": 0.001, "loss": 2.2776, "step": 63784 }, { "epoch": 5.507246376811594, "grad_norm": 1.5342985391616821, "learning_rate": 0.001, "loss": 2.2661, "step": 63840 }, { "epoch": 5.512077294685991, "grad_norm": 1.1326875686645508, "learning_rate": 0.001, "loss": 2.2691, "step": 63896 }, { "epoch": 5.516908212560386, "grad_norm": 1.4246026277542114, "learning_rate": 0.001, "loss": 2.2542, "step": 63952 }, { "epoch": 5.521739130434782, "grad_norm": 2.2407288551330566, "learning_rate": 0.001, "loss": 2.2725, "step": 64008 }, { "epoch": 5.526570048309178, "grad_norm": 1.3345484733581543, "learning_rate": 0.001, "loss": 2.282, "step": 64064 }, { "epoch": 5.531400966183575, "grad_norm": 0.9723467826843262, "learning_rate": 0.001, "loss": 2.2746, "step": 64120 }, { "epoch": 5.536231884057971, "grad_norm": 0.6756110191345215, "learning_rate": 0.001, "loss": 2.2643, "step": 64176 }, { "epoch": 5.541062801932367, "grad_norm": 1.0184459686279297, "learning_rate": 0.001, "loss": 2.2647, "step": 64232 }, { "epoch": 5.545893719806763, "grad_norm": 1.639488935470581, "learning_rate": 0.001, "loss": 2.2621, "step": 64288 }, { "epoch": 5.550724637681159, "grad_norm": 1.3570550680160522, "learning_rate": 0.001, "loss": 2.2619, "step": 64344 }, { "epoch": 5.555555555555555, "grad_norm": 0.7477699518203735, "learning_rate": 0.001, "loss": 2.2644, "step": 64400 }, { "epoch": 5.5603864734299515, "grad_norm": 0.7368486523628235, "learning_rate": 0.001, "loss": 2.2627, "step": 64456 }, { "epoch": 5.565217391304348, "grad_norm": 1.2352867126464844, "learning_rate": 0.001, "loss": 2.2642, "step": 64512 }, { "epoch": 5.570048309178744, "grad_norm": 2.5308146476745605, "learning_rate": 0.001, "loss": 2.2629, "step": 64568 }, { "epoch": 5.57487922705314, "grad_norm": 1.1007472276687622, "learning_rate": 0.001, "loss": 2.2791, "step": 64624 }, { "epoch": 5.579710144927536, "grad_norm": 1.4356293678283691, "learning_rate": 0.001, "loss": 2.2925, "step": 64680 }, { "epoch": 5.584541062801932, "grad_norm": 1.9607384204864502, "learning_rate": 0.001, "loss": 2.2955, "step": 64736 }, { "epoch": 5.5893719806763285, "grad_norm": 1.1231130361557007, "learning_rate": 0.001, "loss": 2.3006, "step": 64792 }, { "epoch": 5.594202898550725, "grad_norm": 1.3609702587127686, "learning_rate": 0.001, "loss": 2.3073, "step": 64848 }, { "epoch": 5.599033816425121, "grad_norm": 1.394912838935852, "learning_rate": 0.001, "loss": 2.3038, "step": 64904 }, { "epoch": 5.603864734299517, "grad_norm": 1.9593900442123413, "learning_rate": 0.001, "loss": 2.2907, "step": 64960 }, { "epoch": 5.608695652173913, "grad_norm": 1.0045404434204102, "learning_rate": 0.001, "loss": 2.294, "step": 65016 }, { "epoch": 5.613526570048309, "grad_norm": 1.468446969985962, "learning_rate": 0.001, "loss": 2.2959, "step": 65072 }, { "epoch": 5.618357487922705, "grad_norm": 0.8016161918640137, "learning_rate": 0.001, "loss": 2.2989, "step": 65128 }, { "epoch": 5.6231884057971016, "grad_norm": 1.8443540334701538, "learning_rate": 0.001, "loss": 2.3027, "step": 65184 }, { "epoch": 5.628019323671498, "grad_norm": 0.7068531513214111, "learning_rate": 0.001, "loss": 2.291, "step": 65240 }, { "epoch": 5.632850241545894, "grad_norm": 0.5824094414710999, "learning_rate": 0.001, "loss": 2.2843, "step": 65296 }, { "epoch": 5.63768115942029, "grad_norm": 1.8601717948913574, "learning_rate": 0.001, "loss": 2.2819, "step": 65352 }, { "epoch": 5.642512077294686, "grad_norm": 0.8813537955284119, "learning_rate": 0.001, "loss": 2.2926, "step": 65408 }, { "epoch": 5.647342995169082, "grad_norm": 1.7260870933532715, "learning_rate": 0.001, "loss": 2.284, "step": 65464 }, { "epoch": 5.6521739130434785, "grad_norm": 1.0956451892852783, "learning_rate": 0.001, "loss": 2.2852, "step": 65520 }, { "epoch": 5.657004830917875, "grad_norm": 17.495576858520508, "learning_rate": 0.001, "loss": 2.2816, "step": 65576 }, { "epoch": 5.661835748792271, "grad_norm": 1.0441806316375732, "learning_rate": 0.001, "loss": 2.2706, "step": 65632 }, { "epoch": 5.666666666666667, "grad_norm": 1.7222089767456055, "learning_rate": 0.001, "loss": 2.2786, "step": 65688 }, { "epoch": 5.671497584541063, "grad_norm": 0.9240567088127136, "learning_rate": 0.001, "loss": 2.2586, "step": 65744 }, { "epoch": 5.676328502415459, "grad_norm": 1.1051487922668457, "learning_rate": 0.001, "loss": 2.2699, "step": 65800 }, { "epoch": 5.681159420289855, "grad_norm": 1.9677304029464722, "learning_rate": 0.001, "loss": 2.2705, "step": 65856 }, { "epoch": 5.685990338164252, "grad_norm": 4.950940132141113, "learning_rate": 0.001, "loss": 2.2722, "step": 65912 }, { "epoch": 5.690821256038648, "grad_norm": 10.856415748596191, "learning_rate": 0.001, "loss": 2.2818, "step": 65968 }, { "epoch": 5.695652173913043, "grad_norm": 1.793298363685608, "learning_rate": 0.001, "loss": 2.2698, "step": 66024 }, { "epoch": 5.70048309178744, "grad_norm": 0.6615787744522095, "learning_rate": 0.001, "loss": 2.2663, "step": 66080 }, { "epoch": 5.705314009661835, "grad_norm": 0.8052457571029663, "learning_rate": 0.001, "loss": 2.275, "step": 66136 }, { "epoch": 5.710144927536232, "grad_norm": 0.7035436034202576, "learning_rate": 0.001, "loss": 2.2749, "step": 66192 }, { "epoch": 5.714975845410628, "grad_norm": 1.8040974140167236, "learning_rate": 0.001, "loss": 2.2671, "step": 66248 }, { "epoch": 5.719806763285024, "grad_norm": 1.2789722681045532, "learning_rate": 0.001, "loss": 2.2826, "step": 66304 }, { "epoch": 5.72463768115942, "grad_norm": 1.8050535917282104, "learning_rate": 0.001, "loss": 2.2792, "step": 66360 }, { "epoch": 5.729468599033816, "grad_norm": 1.0771936178207397, "learning_rate": 0.001, "loss": 2.2736, "step": 66416 }, { "epoch": 5.734299516908212, "grad_norm": 1.2183178663253784, "learning_rate": 0.001, "loss": 2.2668, "step": 66472 }, { "epoch": 5.739130434782608, "grad_norm": 1.0171388387680054, "learning_rate": 0.001, "loss": 2.2857, "step": 66528 }, { "epoch": 5.743961352657005, "grad_norm": 1.5964573621749878, "learning_rate": 0.001, "loss": 2.2629, "step": 66584 }, { "epoch": 5.748792270531401, "grad_norm": 1.9840853214263916, "learning_rate": 0.001, "loss": 2.2657, "step": 66640 }, { "epoch": 5.753623188405797, "grad_norm": 1.0129733085632324, "learning_rate": 0.001, "loss": 2.2687, "step": 66696 }, { "epoch": 5.758454106280193, "grad_norm": 1.0350550413131714, "learning_rate": 0.001, "loss": 2.2665, "step": 66752 }, { "epoch": 5.763285024154589, "grad_norm": 1.2609366178512573, "learning_rate": 0.001, "loss": 2.256, "step": 66808 }, { "epoch": 5.768115942028985, "grad_norm": 0.7500649094581604, "learning_rate": 0.001, "loss": 2.2594, "step": 66864 }, { "epoch": 5.7729468599033815, "grad_norm": 1.179863691329956, "learning_rate": 0.001, "loss": 2.2613, "step": 66920 }, { "epoch": 5.777777777777778, "grad_norm": 0.6870161294937134, "learning_rate": 0.001, "loss": 2.2546, "step": 66976 }, { "epoch": 5.782608695652174, "grad_norm": 2.775158643722534, "learning_rate": 0.001, "loss": 2.2559, "step": 67032 }, { "epoch": 5.78743961352657, "grad_norm": 1.0315377712249756, "learning_rate": 0.001, "loss": 2.2865, "step": 67088 }, { "epoch": 5.792270531400966, "grad_norm": 2.08611798286438, "learning_rate": 0.001, "loss": 2.2739, "step": 67144 }, { "epoch": 5.797101449275362, "grad_norm": 1.2024372816085815, "learning_rate": 0.001, "loss": 2.2702, "step": 67200 }, { "epoch": 5.8019323671497585, "grad_norm": 0.8497179746627808, "learning_rate": 0.001, "loss": 2.2666, "step": 67256 }, { "epoch": 5.806763285024155, "grad_norm": 1.0432077646255493, "learning_rate": 0.001, "loss": 2.2739, "step": 67312 }, { "epoch": 5.811594202898551, "grad_norm": 1.8335925340652466, "learning_rate": 0.001, "loss": 2.275, "step": 67368 }, { "epoch": 5.816425120772947, "grad_norm": 1.273441195487976, "learning_rate": 0.001, "loss": 2.2725, "step": 67424 }, { "epoch": 5.821256038647343, "grad_norm": 1.55553138256073, "learning_rate": 0.001, "loss": 2.2798, "step": 67480 }, { "epoch": 5.826086956521739, "grad_norm": 1.0334409475326538, "learning_rate": 0.001, "loss": 2.2885, "step": 67536 }, { "epoch": 5.830917874396135, "grad_norm": 1.8036469221115112, "learning_rate": 0.001, "loss": 2.2719, "step": 67592 }, { "epoch": 5.835748792270532, "grad_norm": 1.5342423915863037, "learning_rate": 0.001, "loss": 2.2657, "step": 67648 }, { "epoch": 5.840579710144928, "grad_norm": 1.4219903945922852, "learning_rate": 0.001, "loss": 2.2592, "step": 67704 }, { "epoch": 5.845410628019324, "grad_norm": 0.7149852514266968, "learning_rate": 0.001, "loss": 2.2738, "step": 67760 }, { "epoch": 5.85024154589372, "grad_norm": 5.3016676902771, "learning_rate": 0.001, "loss": 2.2678, "step": 67816 }, { "epoch": 5.855072463768116, "grad_norm": 1.096000075340271, "learning_rate": 0.001, "loss": 2.2659, "step": 67872 }, { "epoch": 5.859903381642512, "grad_norm": 0.9542959928512573, "learning_rate": 0.001, "loss": 2.2773, "step": 67928 }, { "epoch": 5.8647342995169085, "grad_norm": 0.5224072337150574, "learning_rate": 0.001, "loss": 2.2673, "step": 67984 }, { "epoch": 5.869565217391305, "grad_norm": 0.7417703866958618, "learning_rate": 0.001, "loss": 2.2751, "step": 68040 }, { "epoch": 5.874396135265701, "grad_norm": 1.5904245376586914, "learning_rate": 0.001, "loss": 2.2715, "step": 68096 }, { "epoch": 5.879227053140097, "grad_norm": 0.864590048789978, "learning_rate": 0.001, "loss": 2.269, "step": 68152 }, { "epoch": 5.884057971014493, "grad_norm": 1.5074886083602905, "learning_rate": 0.001, "loss": 2.2769, "step": 68208 }, { "epoch": 5.888888888888889, "grad_norm": 1.6723995208740234, "learning_rate": 0.001, "loss": 2.2773, "step": 68264 }, { "epoch": 5.8937198067632846, "grad_norm": 1.0331274271011353, "learning_rate": 0.001, "loss": 2.2765, "step": 68320 }, { "epoch": 5.898550724637682, "grad_norm": 1.7588037252426147, "learning_rate": 0.001, "loss": 2.2803, "step": 68376 }, { "epoch": 5.903381642512077, "grad_norm": 4.212721824645996, "learning_rate": 0.001, "loss": 2.2727, "step": 68432 }, { "epoch": 5.908212560386474, "grad_norm": 2.2231578826904297, "learning_rate": 0.001, "loss": 2.2831, "step": 68488 }, { "epoch": 5.913043478260869, "grad_norm": 2.2967207431793213, "learning_rate": 0.001, "loss": 2.2694, "step": 68544 }, { "epoch": 5.917874396135265, "grad_norm": 4.24258279800415, "learning_rate": 0.001, "loss": 2.2678, "step": 68600 }, { "epoch": 5.9227053140096615, "grad_norm": 1.2127195596694946, "learning_rate": 0.001, "loss": 2.2651, "step": 68656 }, { "epoch": 5.927536231884058, "grad_norm": 2.5435125827789307, "learning_rate": 0.001, "loss": 2.2685, "step": 68712 }, { "epoch": 5.932367149758454, "grad_norm": 3.300612688064575, "learning_rate": 0.001, "loss": 2.2539, "step": 68768 }, { "epoch": 5.93719806763285, "grad_norm": 0.6289768815040588, "learning_rate": 0.001, "loss": 2.2597, "step": 68824 }, { "epoch": 5.942028985507246, "grad_norm": 0.7482408881187439, "learning_rate": 0.001, "loss": 2.2725, "step": 68880 }, { "epoch": 5.946859903381642, "grad_norm": 1.8951795101165771, "learning_rate": 0.001, "loss": 2.2587, "step": 68936 }, { "epoch": 5.951690821256038, "grad_norm": 1.0117673873901367, "learning_rate": 0.001, "loss": 2.2665, "step": 68992 }, { "epoch": 5.956521739130435, "grad_norm": 0.553583562374115, "learning_rate": 0.001, "loss": 2.2683, "step": 69048 }, { "epoch": 5.961352657004831, "grad_norm": 2.6252622604370117, "learning_rate": 0.001, "loss": 2.2709, "step": 69104 }, { "epoch": 5.966183574879227, "grad_norm": 3.1359682083129883, "learning_rate": 0.001, "loss": 2.2571, "step": 69160 }, { "epoch": 5.971014492753623, "grad_norm": 0.9322596192359924, "learning_rate": 0.001, "loss": 2.2537, "step": 69216 }, { "epoch": 5.975845410628019, "grad_norm": 0.7134868502616882, "learning_rate": 0.001, "loss": 2.2535, "step": 69272 }, { "epoch": 5.980676328502415, "grad_norm": 0.679645836353302, "learning_rate": 0.001, "loss": 2.2545, "step": 69328 }, { "epoch": 5.9855072463768115, "grad_norm": 1.235795259475708, "learning_rate": 0.001, "loss": 2.2646, "step": 69384 }, { "epoch": 5.990338164251208, "grad_norm": 0.5267893075942993, "learning_rate": 0.001, "loss": 2.2545, "step": 69440 }, { "epoch": 5.995169082125604, "grad_norm": 0.6231503486633301, "learning_rate": 0.001, "loss": 2.2529, "step": 69496 }, { "epoch": 6.0, "grad_norm": 3.025648593902588, "learning_rate": 0.001, "loss": 2.2599, "step": 69552 }, { "epoch": 6.004830917874396, "grad_norm": 1.4466320276260376, "learning_rate": 0.001, "loss": 2.2099, "step": 69608 }, { "epoch": 6.009661835748792, "grad_norm": 0.5512242317199707, "learning_rate": 0.001, "loss": 2.2267, "step": 69664 }, { "epoch": 6.0144927536231885, "grad_norm": 1.1341667175292969, "learning_rate": 0.001, "loss": 2.224, "step": 69720 }, { "epoch": 6.019323671497585, "grad_norm": 1.3873621225357056, "learning_rate": 0.001, "loss": 2.2403, "step": 69776 }, { "epoch": 6.024154589371981, "grad_norm": 0.9209643006324768, "learning_rate": 0.001, "loss": 2.2248, "step": 69832 }, { "epoch": 6.028985507246377, "grad_norm": 0.7108995318412781, "learning_rate": 0.001, "loss": 2.2273, "step": 69888 }, { "epoch": 6.033816425120773, "grad_norm": 1.4621247053146362, "learning_rate": 0.001, "loss": 2.2126, "step": 69944 }, { "epoch": 6.038647342995169, "grad_norm": 1.5459744930267334, "learning_rate": 0.001, "loss": 2.2137, "step": 70000 }, { "epoch": 6.043478260869565, "grad_norm": 1.1624717712402344, "learning_rate": 0.001, "loss": 2.2296, "step": 70056 }, { "epoch": 6.048309178743962, "grad_norm": 0.7049641609191895, "learning_rate": 0.001, "loss": 2.2292, "step": 70112 }, { "epoch": 6.053140096618358, "grad_norm": 1.0620265007019043, "learning_rate": 0.001, "loss": 2.2118, "step": 70168 }, { "epoch": 6.057971014492754, "grad_norm": 1.0865387916564941, "learning_rate": 0.001, "loss": 2.2103, "step": 70224 }, { "epoch": 6.06280193236715, "grad_norm": 0.7274052500724792, "learning_rate": 0.001, "loss": 2.2221, "step": 70280 }, { "epoch": 6.067632850241546, "grad_norm": 2.709317684173584, "learning_rate": 0.001, "loss": 2.2219, "step": 70336 }, { "epoch": 6.072463768115942, "grad_norm": 0.8704111576080322, "learning_rate": 0.001, "loss": 2.2385, "step": 70392 }, { "epoch": 6.0772946859903385, "grad_norm": 1.0704371929168701, "learning_rate": 0.001, "loss": 2.2447, "step": 70448 }, { "epoch": 6.082125603864735, "grad_norm": 1.4001491069793701, "learning_rate": 0.001, "loss": 2.2549, "step": 70504 }, { "epoch": 6.086956521739131, "grad_norm": 1.9169684648513794, "learning_rate": 0.001, "loss": 2.2501, "step": 70560 }, { "epoch": 6.091787439613527, "grad_norm": 3.661534309387207, "learning_rate": 0.001, "loss": 2.2273, "step": 70616 }, { "epoch": 6.096618357487923, "grad_norm": 0.7327979207038879, "learning_rate": 0.001, "loss": 2.216, "step": 70672 }, { "epoch": 6.101449275362318, "grad_norm": 1.284769058227539, "learning_rate": 0.001, "loss": 2.2361, "step": 70728 }, { "epoch": 6.106280193236715, "grad_norm": 1.4347878694534302, "learning_rate": 0.001, "loss": 2.2399, "step": 70784 }, { "epoch": 6.111111111111111, "grad_norm": 0.9479652643203735, "learning_rate": 0.001, "loss": 2.2569, "step": 70840 }, { "epoch": 6.115942028985507, "grad_norm": 1.351218342781067, "learning_rate": 0.001, "loss": 2.2354, "step": 70896 }, { "epoch": 6.120772946859903, "grad_norm": 2.2644903659820557, "learning_rate": 0.001, "loss": 2.2392, "step": 70952 }, { "epoch": 6.125603864734299, "grad_norm": 4.74054479598999, "learning_rate": 0.001, "loss": 2.2352, "step": 71008 }, { "epoch": 6.130434782608695, "grad_norm": 0.6032834649085999, "learning_rate": 0.001, "loss": 2.2393, "step": 71064 }, { "epoch": 6.1352657004830915, "grad_norm": 1.5596091747283936, "learning_rate": 0.001, "loss": 2.2268, "step": 71120 }, { "epoch": 6.140096618357488, "grad_norm": 2.4547011852264404, "learning_rate": 0.001, "loss": 2.238, "step": 71176 }, { "epoch": 6.144927536231884, "grad_norm": 1.2931201457977295, "learning_rate": 0.001, "loss": 2.2611, "step": 71232 }, { "epoch": 6.14975845410628, "grad_norm": 3.7375707626342773, "learning_rate": 0.001, "loss": 2.2591, "step": 71288 }, { "epoch": 6.154589371980676, "grad_norm": 0.9994610548019409, "learning_rate": 0.001, "loss": 2.2564, "step": 71344 }, { "epoch": 6.159420289855072, "grad_norm": 1.5988612174987793, "learning_rate": 0.001, "loss": 2.2535, "step": 71400 }, { "epoch": 6.164251207729468, "grad_norm": 2.0000829696655273, "learning_rate": 0.001, "loss": 2.244, "step": 71456 }, { "epoch": 6.169082125603865, "grad_norm": 1.9968081712722778, "learning_rate": 0.001, "loss": 2.2332, "step": 71512 }, { "epoch": 6.173913043478261, "grad_norm": 0.7419748306274414, "learning_rate": 0.001, "loss": 2.2375, "step": 71568 }, { "epoch": 6.178743961352657, "grad_norm": 3.2289023399353027, "learning_rate": 0.001, "loss": 2.2352, "step": 71624 }, { "epoch": 6.183574879227053, "grad_norm": 1.1539735794067383, "learning_rate": 0.001, "loss": 2.2321, "step": 71680 }, { "epoch": 6.188405797101449, "grad_norm": 2.0009260177612305, "learning_rate": 0.001, "loss": 2.2345, "step": 71736 }, { "epoch": 6.193236714975845, "grad_norm": 1.4143390655517578, "learning_rate": 0.001, "loss": 2.2513, "step": 71792 }, { "epoch": 6.1980676328502415, "grad_norm": 0.821320116519928, "learning_rate": 0.001, "loss": 2.244, "step": 71848 }, { "epoch": 6.202898550724638, "grad_norm": 1.3411365747451782, "learning_rate": 0.001, "loss": 2.2337, "step": 71904 }, { "epoch": 6.207729468599034, "grad_norm": 0.9753725528717041, "learning_rate": 0.001, "loss": 2.2215, "step": 71960 }, { "epoch": 6.21256038647343, "grad_norm": 1.8683110475540161, "learning_rate": 0.001, "loss": 2.2348, "step": 72016 }, { "epoch": 6.217391304347826, "grad_norm": 2.7504191398620605, "learning_rate": 0.001, "loss": 2.2432, "step": 72072 }, { "epoch": 6.222222222222222, "grad_norm": 1.5068525075912476, "learning_rate": 0.001, "loss": 2.2317, "step": 72128 }, { "epoch": 6.2270531400966185, "grad_norm": 2.5044116973876953, "learning_rate": 0.001, "loss": 2.2489, "step": 72184 }, { "epoch": 6.231884057971015, "grad_norm": 2.02871036529541, "learning_rate": 0.001, "loss": 2.2476, "step": 72240 }, { "epoch": 6.236714975845411, "grad_norm": 3.200634002685547, "learning_rate": 0.001, "loss": 2.245, "step": 72296 }, { "epoch": 6.241545893719807, "grad_norm": 1.611269474029541, "learning_rate": 0.001, "loss": 2.2301, "step": 72352 }, { "epoch": 6.246376811594203, "grad_norm": 0.9519205093383789, "learning_rate": 0.001, "loss": 2.2352, "step": 72408 }, { "epoch": 6.251207729468599, "grad_norm": 0.6087763905525208, "learning_rate": 0.001, "loss": 2.2226, "step": 72464 }, { "epoch": 6.256038647342995, "grad_norm": 1.847956895828247, "learning_rate": 0.001, "loss": 2.2218, "step": 72520 }, { "epoch": 6.260869565217392, "grad_norm": 1.410396933555603, "learning_rate": 0.001, "loss": 2.2229, "step": 72576 }, { "epoch": 6.265700483091788, "grad_norm": 0.6673827767372131, "learning_rate": 0.001, "loss": 2.2204, "step": 72632 }, { "epoch": 6.270531400966184, "grad_norm": 0.9775403141975403, "learning_rate": 0.001, "loss": 2.2245, "step": 72688 }, { "epoch": 6.27536231884058, "grad_norm": 1.1978809833526611, "learning_rate": 0.001, "loss": 2.2278, "step": 72744 }, { "epoch": 6.280193236714976, "grad_norm": 1.3326667547225952, "learning_rate": 0.001, "loss": 2.228, "step": 72800 }, { "epoch": 6.285024154589372, "grad_norm": 2.3851635456085205, "learning_rate": 0.001, "loss": 2.2301, "step": 72856 }, { "epoch": 6.2898550724637685, "grad_norm": 1.7464731931686401, "learning_rate": 0.001, "loss": 2.2262, "step": 72912 }, { "epoch": 6.294685990338165, "grad_norm": 0.8745136857032776, "learning_rate": 0.001, "loss": 2.2318, "step": 72968 }, { "epoch": 6.29951690821256, "grad_norm": 1.8794758319854736, "learning_rate": 0.001, "loss": 2.2347, "step": 73024 }, { "epoch": 6.304347826086957, "grad_norm": 1.7814747095108032, "learning_rate": 0.001, "loss": 2.2199, "step": 73080 }, { "epoch": 6.309178743961352, "grad_norm": 1.2474782466888428, "learning_rate": 0.001, "loss": 2.2245, "step": 73136 }, { "epoch": 6.314009661835748, "grad_norm": 1.2666617631912231, "learning_rate": 0.001, "loss": 2.2356, "step": 73192 }, { "epoch": 6.318840579710145, "grad_norm": 1.0419021844863892, "learning_rate": 0.001, "loss": 2.2403, "step": 73248 }, { "epoch": 6.323671497584541, "grad_norm": 0.806438148021698, "learning_rate": 0.001, "loss": 2.237, "step": 73304 }, { "epoch": 6.328502415458937, "grad_norm": 7.231851577758789, "learning_rate": 0.001, "loss": 2.22, "step": 73360 }, { "epoch": 6.333333333333333, "grad_norm": 0.8140851855278015, "learning_rate": 0.001, "loss": 2.2358, "step": 73416 }, { "epoch": 6.338164251207729, "grad_norm": 1.1485127210617065, "learning_rate": 0.001, "loss": 2.2299, "step": 73472 }, { "epoch": 6.342995169082125, "grad_norm": 1.318444013595581, "learning_rate": 0.001, "loss": 2.2175, "step": 73528 }, { "epoch": 6.3478260869565215, "grad_norm": 1.9077707529067993, "learning_rate": 0.001, "loss": 2.2283, "step": 73584 }, { "epoch": 6.352657004830918, "grad_norm": 2.7870867252349854, "learning_rate": 0.001, "loss": 2.2295, "step": 73640 }, { "epoch": 6.357487922705314, "grad_norm": 0.8493619561195374, "learning_rate": 0.001, "loss": 2.2435, "step": 73696 }, { "epoch": 6.36231884057971, "grad_norm": 0.8781208992004395, "learning_rate": 0.001, "loss": 2.2417, "step": 73752 }, { "epoch": 6.367149758454106, "grad_norm": 1.2606106996536255, "learning_rate": 0.001, "loss": 2.2398, "step": 73808 }, { "epoch": 6.371980676328502, "grad_norm": 0.990530252456665, "learning_rate": 0.001, "loss": 2.2377, "step": 73864 }, { "epoch": 6.3768115942028984, "grad_norm": 1.307255744934082, "learning_rate": 0.001, "loss": 2.2466, "step": 73920 }, { "epoch": 6.381642512077295, "grad_norm": 1.8456193208694458, "learning_rate": 0.001, "loss": 2.2656, "step": 73976 }, { "epoch": 6.386473429951691, "grad_norm": 1.7953139543533325, "learning_rate": 0.001, "loss": 2.2413, "step": 74032 }, { "epoch": 6.391304347826087, "grad_norm": 1.7777838706970215, "learning_rate": 0.001, "loss": 2.2571, "step": 74088 }, { "epoch": 6.396135265700483, "grad_norm": 2.4233973026275635, "learning_rate": 0.001, "loss": 2.2799, "step": 74144 }, { "epoch": 6.400966183574879, "grad_norm": 0.886583149433136, "learning_rate": 0.001, "loss": 2.2642, "step": 74200 }, { "epoch": 6.405797101449275, "grad_norm": 1.8131858110427856, "learning_rate": 0.001, "loss": 2.2558, "step": 74256 }, { "epoch": 6.4106280193236715, "grad_norm": 1.290224313735962, "learning_rate": 0.001, "loss": 2.2607, "step": 74312 }, { "epoch": 6.415458937198068, "grad_norm": 1.22830069065094, "learning_rate": 0.001, "loss": 2.2563, "step": 74368 }, { "epoch": 6.420289855072464, "grad_norm": 1.52664315700531, "learning_rate": 0.001, "loss": 2.2447, "step": 74424 }, { "epoch": 6.42512077294686, "grad_norm": 0.9392516016960144, "learning_rate": 0.001, "loss": 2.2345, "step": 74480 }, { "epoch": 6.429951690821256, "grad_norm": 1.6208679676055908, "learning_rate": 0.001, "loss": 2.2428, "step": 74536 }, { "epoch": 6.434782608695652, "grad_norm": 2.518866539001465, "learning_rate": 0.001, "loss": 2.2451, "step": 74592 }, { "epoch": 6.4396135265700485, "grad_norm": 1.559274673461914, "learning_rate": 0.001, "loss": 2.2569, "step": 74648 }, { "epoch": 6.444444444444445, "grad_norm": 1.8809869289398193, "learning_rate": 0.001, "loss": 2.2337, "step": 74704 }, { "epoch": 6.449275362318841, "grad_norm": 0.9125791192054749, "learning_rate": 0.001, "loss": 2.2445, "step": 74760 }, { "epoch": 6.454106280193237, "grad_norm": 1.2871743440628052, "learning_rate": 0.001, "loss": 2.2389, "step": 74816 }, { "epoch": 6.458937198067633, "grad_norm": 1.753193974494934, "learning_rate": 0.001, "loss": 2.2217, "step": 74872 }, { "epoch": 6.463768115942029, "grad_norm": 1.5740556716918945, "learning_rate": 0.001, "loss": 2.2409, "step": 74928 }, { "epoch": 6.468599033816425, "grad_norm": 0.9776777029037476, "learning_rate": 0.001, "loss": 2.2501, "step": 74984 }, { "epoch": 6.473429951690822, "grad_norm": 2.7576019763946533, "learning_rate": 0.001, "loss": 2.2347, "step": 75040 }, { "epoch": 6.478260869565218, "grad_norm": 1.692238450050354, "learning_rate": 0.001, "loss": 2.2413, "step": 75096 }, { "epoch": 6.483091787439614, "grad_norm": 2.3111934661865234, "learning_rate": 0.001, "loss": 2.25, "step": 75152 }, { "epoch": 6.48792270531401, "grad_norm": 1.3569540977478027, "learning_rate": 0.001, "loss": 2.246, "step": 75208 }, { "epoch": 6.492753623188406, "grad_norm": 1.2361676692962646, "learning_rate": 0.001, "loss": 2.2471, "step": 75264 }, { "epoch": 6.4975845410628015, "grad_norm": 1.815401554107666, "learning_rate": 0.001, "loss": 2.2403, "step": 75320 }, { "epoch": 6.5024154589371985, "grad_norm": 0.7901356816291809, "learning_rate": 0.001, "loss": 2.2515, "step": 75376 }, { "epoch": 6.507246376811594, "grad_norm": 1.708166480064392, "learning_rate": 0.001, "loss": 2.2431, "step": 75432 }, { "epoch": 6.512077294685991, "grad_norm": 1.9460251331329346, "learning_rate": 0.001, "loss": 2.2422, "step": 75488 }, { "epoch": 6.516908212560386, "grad_norm": 1.216374158859253, "learning_rate": 0.001, "loss": 2.2542, "step": 75544 }, { "epoch": 6.521739130434782, "grad_norm": 1.1076815128326416, "learning_rate": 0.001, "loss": 2.2578, "step": 75600 }, { "epoch": 6.526570048309178, "grad_norm": 0.6794477701187134, "learning_rate": 0.001, "loss": 2.2626, "step": 75656 }, { "epoch": 6.531400966183575, "grad_norm": 0.8402165770530701, "learning_rate": 0.001, "loss": 2.2426, "step": 75712 }, { "epoch": 6.536231884057971, "grad_norm": 1.274253249168396, "learning_rate": 0.001, "loss": 2.2507, "step": 75768 }, { "epoch": 6.541062801932367, "grad_norm": 1.4902313947677612, "learning_rate": 0.001, "loss": 2.2423, "step": 75824 }, { "epoch": 6.545893719806763, "grad_norm": 4.480154037475586, "learning_rate": 0.001, "loss": 2.2486, "step": 75880 }, { "epoch": 6.550724637681159, "grad_norm": 1.0384182929992676, "learning_rate": 0.001, "loss": 2.2435, "step": 75936 }, { "epoch": 6.555555555555555, "grad_norm": 13.618975639343262, "learning_rate": 0.001, "loss": 2.2412, "step": 75992 }, { "epoch": 6.5603864734299515, "grad_norm": 3.3809757232666016, "learning_rate": 0.001, "loss": 2.2482, "step": 76048 }, { "epoch": 6.565217391304348, "grad_norm": 2.76737117767334, "learning_rate": 0.001, "loss": 2.2448, "step": 76104 }, { "epoch": 6.570048309178744, "grad_norm": 2.8874478340148926, "learning_rate": 0.001, "loss": 2.2387, "step": 76160 }, { "epoch": 6.57487922705314, "grad_norm": 3.6475632190704346, "learning_rate": 0.001, "loss": 2.241, "step": 76216 }, { "epoch": 6.579710144927536, "grad_norm": 1.2049617767333984, "learning_rate": 0.001, "loss": 2.2542, "step": 76272 }, { "epoch": 6.584541062801932, "grad_norm": 0.9089396595954895, "learning_rate": 0.001, "loss": 2.2429, "step": 76328 }, { "epoch": 6.5893719806763285, "grad_norm": 0.6770321726799011, "learning_rate": 0.001, "loss": 2.2398, "step": 76384 }, { "epoch": 6.594202898550725, "grad_norm": 1.1739739179611206, "learning_rate": 0.001, "loss": 2.2395, "step": 76440 }, { "epoch": 6.599033816425121, "grad_norm": 1.048844337463379, "learning_rate": 0.001, "loss": 2.2346, "step": 76496 }, { "epoch": 6.603864734299517, "grad_norm": 1.7137600183486938, "learning_rate": 0.001, "loss": 2.2425, "step": 76552 }, { "epoch": 6.608695652173913, "grad_norm": 1.755640983581543, "learning_rate": 0.001, "loss": 2.2484, "step": 76608 }, { "epoch": 6.613526570048309, "grad_norm": 1.1844213008880615, "learning_rate": 0.001, "loss": 2.274, "step": 76664 }, { "epoch": 6.618357487922705, "grad_norm": 1.3580411672592163, "learning_rate": 0.001, "loss": 2.2766, "step": 76720 }, { "epoch": 6.6231884057971016, "grad_norm": 1.012831211090088, "learning_rate": 0.001, "loss": 2.2566, "step": 76776 }, { "epoch": 6.628019323671498, "grad_norm": 0.9028856754302979, "learning_rate": 0.001, "loss": 2.2507, "step": 76832 }, { "epoch": 6.632850241545894, "grad_norm": 5.565920829772949, "learning_rate": 0.001, "loss": 2.2461, "step": 76888 }, { "epoch": 6.63768115942029, "grad_norm": 2.8750646114349365, "learning_rate": 0.001, "loss": 2.2417, "step": 76944 }, { "epoch": 6.642512077294686, "grad_norm": 0.9302629232406616, "learning_rate": 0.001, "loss": 2.2519, "step": 77000 }, { "epoch": 6.647342995169082, "grad_norm": 1.9370646476745605, "learning_rate": 0.001, "loss": 2.2519, "step": 77056 }, { "epoch": 6.6521739130434785, "grad_norm": 3.0804717540740967, "learning_rate": 0.001, "loss": 2.2349, "step": 77112 }, { "epoch": 6.657004830917875, "grad_norm": 2.720844030380249, "learning_rate": 0.001, "loss": 2.2414, "step": 77168 }, { "epoch": 6.661835748792271, "grad_norm": 0.8230652809143066, "learning_rate": 0.001, "loss": 2.2371, "step": 77224 }, { "epoch": 6.666666666666667, "grad_norm": 1.7672905921936035, "learning_rate": 0.001, "loss": 2.2415, "step": 77280 }, { "epoch": 6.671497584541063, "grad_norm": 3.14939022064209, "learning_rate": 0.001, "loss": 2.2349, "step": 77336 }, { "epoch": 6.676328502415459, "grad_norm": 1.050691843032837, "learning_rate": 0.001, "loss": 2.2318, "step": 77392 }, { "epoch": 6.681159420289855, "grad_norm": 1.1251823902130127, "learning_rate": 0.001, "loss": 2.2357, "step": 77448 }, { "epoch": 6.685990338164252, "grad_norm": 1.0894018411636353, "learning_rate": 0.001, "loss": 2.2333, "step": 77504 }, { "epoch": 6.690821256038648, "grad_norm": 1.1541368961334229, "learning_rate": 0.001, "loss": 2.221, "step": 77560 }, { "epoch": 6.695652173913043, "grad_norm": 1.9985852241516113, "learning_rate": 0.001, "loss": 2.2278, "step": 77616 }, { "epoch": 6.70048309178744, "grad_norm": 1.553889274597168, "learning_rate": 0.001, "loss": 2.2184, "step": 77672 }, { "epoch": 6.705314009661835, "grad_norm": 0.6037139296531677, "learning_rate": 0.001, "loss": 2.2242, "step": 77728 }, { "epoch": 6.710144927536232, "grad_norm": 0.6900601983070374, "learning_rate": 0.001, "loss": 2.2171, "step": 77784 }, { "epoch": 6.714975845410628, "grad_norm": 1.2441445589065552, "learning_rate": 0.001, "loss": 2.2232, "step": 77840 }, { "epoch": 6.719806763285024, "grad_norm": 1.2396204471588135, "learning_rate": 0.001, "loss": 2.2227, "step": 77896 }, { "epoch": 6.72463768115942, "grad_norm": 3.7759761810302734, "learning_rate": 0.001, "loss": 2.2273, "step": 77952 }, { "epoch": 6.729468599033816, "grad_norm": 1.964001178741455, "learning_rate": 0.001, "loss": 2.2272, "step": 78008 }, { "epoch": 6.734299516908212, "grad_norm": 0.6876692175865173, "learning_rate": 0.001, "loss": 2.2338, "step": 78064 }, { "epoch": 6.739130434782608, "grad_norm": 1.4361850023269653, "learning_rate": 0.001, "loss": 2.2463, "step": 78120 }, { "epoch": 6.743961352657005, "grad_norm": 2.5210978984832764, "learning_rate": 0.001, "loss": 2.2572, "step": 78176 }, { "epoch": 6.748792270531401, "grad_norm": 1.664169430732727, "learning_rate": 0.001, "loss": 2.2696, "step": 78232 }, { "epoch": 6.753623188405797, "grad_norm": 3.230752468109131, "learning_rate": 0.001, "loss": 2.2431, "step": 78288 }, { "epoch": 6.758454106280193, "grad_norm": 1.7360711097717285, "learning_rate": 0.001, "loss": 2.2514, "step": 78344 }, { "epoch": 6.763285024154589, "grad_norm": 0.8954463005065918, "learning_rate": 0.001, "loss": 2.2382, "step": 78400 }, { "epoch": 6.768115942028985, "grad_norm": 1.1951336860656738, "learning_rate": 0.001, "loss": 2.2545, "step": 78456 }, { "epoch": 6.7729468599033815, "grad_norm": 1.5610833168029785, "learning_rate": 0.001, "loss": 2.2428, "step": 78512 }, { "epoch": 6.777777777777778, "grad_norm": 0.7657495737075806, "learning_rate": 0.001, "loss": 2.2534, "step": 78568 }, { "epoch": 6.782608695652174, "grad_norm": 1.01096510887146, "learning_rate": 0.001, "loss": 2.257, "step": 78624 }, { "epoch": 6.78743961352657, "grad_norm": 0.55475914478302, "learning_rate": 0.001, "loss": 2.2394, "step": 78680 }, { "epoch": 6.792270531400966, "grad_norm": 2.5494346618652344, "learning_rate": 0.001, "loss": 2.2471, "step": 78736 }, { "epoch": 6.797101449275362, "grad_norm": 0.659465491771698, "learning_rate": 0.001, "loss": 2.2477, "step": 78792 }, { "epoch": 6.8019323671497585, "grad_norm": 0.8349037766456604, "learning_rate": 0.001, "loss": 2.2452, "step": 78848 }, { "epoch": 6.806763285024155, "grad_norm": 7.057497978210449, "learning_rate": 0.001, "loss": 2.2435, "step": 78904 }, { "epoch": 6.811594202898551, "grad_norm": 3.615966558456421, "learning_rate": 0.001, "loss": 2.2336, "step": 78960 }, { "epoch": 6.816425120772947, "grad_norm": 0.8992481231689453, "learning_rate": 0.001, "loss": 2.2412, "step": 79016 }, { "epoch": 6.821256038647343, "grad_norm": 4.003536701202393, "learning_rate": 0.001, "loss": 2.2411, "step": 79072 }, { "epoch": 6.826086956521739, "grad_norm": 1.813976764678955, "learning_rate": 0.001, "loss": 2.2483, "step": 79128 }, { "epoch": 6.830917874396135, "grad_norm": 1.2164362668991089, "learning_rate": 0.001, "loss": 2.262, "step": 79184 }, { "epoch": 6.835748792270532, "grad_norm": 2.0430679321289062, "learning_rate": 0.001, "loss": 2.2718, "step": 79240 }, { "epoch": 6.840579710144928, "grad_norm": 0.9740785956382751, "learning_rate": 0.001, "loss": 2.2625, "step": 79296 }, { "epoch": 6.845410628019324, "grad_norm": 1.4133433103561401, "learning_rate": 0.001, "loss": 2.2542, "step": 79352 }, { "epoch": 6.85024154589372, "grad_norm": 0.9220939874649048, "learning_rate": 0.001, "loss": 2.2553, "step": 79408 }, { "epoch": 6.855072463768116, "grad_norm": 1.1142477989196777, "learning_rate": 0.001, "loss": 2.2389, "step": 79464 }, { "epoch": 6.859903381642512, "grad_norm": 0.8727350831031799, "learning_rate": 0.001, "loss": 2.235, "step": 79520 }, { "epoch": 6.8647342995169085, "grad_norm": 1.6178958415985107, "learning_rate": 0.001, "loss": 2.2438, "step": 79576 }, { "epoch": 6.869565217391305, "grad_norm": 1.7619056701660156, "learning_rate": 0.001, "loss": 2.2479, "step": 79632 }, { "epoch": 6.874396135265701, "grad_norm": 1.4165889024734497, "learning_rate": 0.001, "loss": 2.2468, "step": 79688 }, { "epoch": 6.879227053140097, "grad_norm": 4.498415470123291, "learning_rate": 0.001, "loss": 2.232, "step": 79744 }, { "epoch": 6.884057971014493, "grad_norm": 1.6327322721481323, "learning_rate": 0.001, "loss": 2.2396, "step": 79800 }, { "epoch": 6.888888888888889, "grad_norm": 2.4959449768066406, "learning_rate": 0.001, "loss": 2.245, "step": 79856 }, { "epoch": 6.8937198067632846, "grad_norm": 1.4688910245895386, "learning_rate": 0.001, "loss": 2.264, "step": 79912 }, { "epoch": 6.898550724637682, "grad_norm": 0.7763709425926208, "learning_rate": 0.001, "loss": 2.2553, "step": 79968 }, { "epoch": 6.903381642512077, "grad_norm": 1.0467098951339722, "learning_rate": 0.001, "loss": 2.2452, "step": 80024 }, { "epoch": 6.908212560386474, "grad_norm": 0.8108733892440796, "learning_rate": 0.001, "loss": 2.2377, "step": 80080 }, { "epoch": 6.913043478260869, "grad_norm": 1.369469404220581, "learning_rate": 0.001, "loss": 2.2315, "step": 80136 }, { "epoch": 6.917874396135265, "grad_norm": 0.9366617202758789, "learning_rate": 0.001, "loss": 2.2349, "step": 80192 }, { "epoch": 6.9227053140096615, "grad_norm": 1.6140553951263428, "learning_rate": 0.001, "loss": 2.2507, "step": 80248 }, { "epoch": 6.927536231884058, "grad_norm": 2.0513768196105957, "learning_rate": 0.001, "loss": 2.2569, "step": 80304 }, { "epoch": 6.932367149758454, "grad_norm": 1.9008907079696655, "learning_rate": 0.001, "loss": 2.2514, "step": 80360 }, { "epoch": 6.93719806763285, "grad_norm": 1.3423054218292236, "learning_rate": 0.001, "loss": 2.2426, "step": 80416 }, { "epoch": 6.942028985507246, "grad_norm": 1.5680043697357178, "learning_rate": 0.001, "loss": 2.2475, "step": 80472 }, { "epoch": 6.946859903381642, "grad_norm": 1.1729775667190552, "learning_rate": 0.001, "loss": 2.2409, "step": 80528 }, { "epoch": 6.951690821256038, "grad_norm": 1.0784313678741455, "learning_rate": 0.001, "loss": 2.2388, "step": 80584 }, { "epoch": 6.956521739130435, "grad_norm": 1.2925214767456055, "learning_rate": 0.001, "loss": 2.2514, "step": 80640 }, { "epoch": 6.961352657004831, "grad_norm": 13.722448348999023, "learning_rate": 0.001, "loss": 2.2625, "step": 80696 }, { "epoch": 6.966183574879227, "grad_norm": 1.8979257345199585, "learning_rate": 0.001, "loss": 2.264, "step": 80752 }, { "epoch": 6.971014492753623, "grad_norm": 0.9370417594909668, "learning_rate": 0.001, "loss": 2.2566, "step": 80808 }, { "epoch": 6.975845410628019, "grad_norm": 0.6652025580406189, "learning_rate": 0.001, "loss": 2.2436, "step": 80864 }, { "epoch": 6.980676328502415, "grad_norm": 0.8809888958930969, "learning_rate": 0.001, "loss": 2.2367, "step": 80920 }, { "epoch": 6.9855072463768115, "grad_norm": 1.762130618095398, "learning_rate": 0.001, "loss": 2.2375, "step": 80976 }, { "epoch": 6.990338164251208, "grad_norm": 1.014733910560608, "learning_rate": 0.001, "loss": 2.2391, "step": 81032 }, { "epoch": 6.995169082125604, "grad_norm": 1.2004274129867554, "learning_rate": 0.001, "loss": 2.2385, "step": 81088 }, { "epoch": 7.0, "grad_norm": 1.8822312355041504, "learning_rate": 0.001, "loss": 2.2413, "step": 81144 }, { "epoch": 7.004830917874396, "grad_norm": 1.796080231666565, "learning_rate": 0.001, "loss": 2.2041, "step": 81200 }, { "epoch": 7.009661835748792, "grad_norm": 1.3070597648620605, "learning_rate": 0.001, "loss": 2.2204, "step": 81256 }, { "epoch": 7.0144927536231885, "grad_norm": 1.2614771127700806, "learning_rate": 0.001, "loss": 2.2149, "step": 81312 }, { "epoch": 7.019323671497585, "grad_norm": 1.524032711982727, "learning_rate": 0.001, "loss": 2.2134, "step": 81368 }, { "epoch": 7.024154589371981, "grad_norm": 3.0564124584198, "learning_rate": 0.001, "loss": 2.2144, "step": 81424 }, { "epoch": 7.028985507246377, "grad_norm": 2.0705578327178955, "learning_rate": 0.001, "loss": 2.2052, "step": 81480 }, { "epoch": 7.033816425120773, "grad_norm": 1.733300805091858, "learning_rate": 0.001, "loss": 2.2065, "step": 81536 }, { "epoch": 7.038647342995169, "grad_norm": 1.6060824394226074, "learning_rate": 0.001, "loss": 2.2169, "step": 81592 }, { "epoch": 7.043478260869565, "grad_norm": 1.5984892845153809, "learning_rate": 0.001, "loss": 2.2119, "step": 81648 }, { "epoch": 7.048309178743962, "grad_norm": 1.4020204544067383, "learning_rate": 0.001, "loss": 2.2215, "step": 81704 }, { "epoch": 7.053140096618358, "grad_norm": 1.1294715404510498, "learning_rate": 0.001, "loss": 2.2039, "step": 81760 }, { "epoch": 7.057971014492754, "grad_norm": 0.8854770660400391, "learning_rate": 0.001, "loss": 2.2105, "step": 81816 }, { "epoch": 7.06280193236715, "grad_norm": 0.8262887597084045, "learning_rate": 0.001, "loss": 2.2215, "step": 81872 }, { "epoch": 7.067632850241546, "grad_norm": 2.3546652793884277, "learning_rate": 0.001, "loss": 2.2215, "step": 81928 }, { "epoch": 7.072463768115942, "grad_norm": 0.6846729516983032, "learning_rate": 0.001, "loss": 2.2077, "step": 81984 }, { "epoch": 7.0772946859903385, "grad_norm": 0.5966305136680603, "learning_rate": 0.001, "loss": 2.1964, "step": 82040 }, { "epoch": 7.082125603864735, "grad_norm": 1.664222002029419, "learning_rate": 0.001, "loss": 2.206, "step": 82096 }, { "epoch": 7.086956521739131, "grad_norm": 1.0919033288955688, "learning_rate": 0.001, "loss": 2.2054, "step": 82152 }, { "epoch": 7.091787439613527, "grad_norm": 1.8295831680297852, "learning_rate": 0.001, "loss": 2.2127, "step": 82208 }, { "epoch": 7.096618357487923, "grad_norm": 2.2516045570373535, "learning_rate": 0.001, "loss": 2.2337, "step": 82264 }, { "epoch": 7.101449275362318, "grad_norm": 5.178409576416016, "learning_rate": 0.001, "loss": 2.2471, "step": 82320 }, { "epoch": 7.106280193236715, "grad_norm": 0.7064712643623352, "learning_rate": 0.001, "loss": 2.2287, "step": 82376 }, { "epoch": 7.111111111111111, "grad_norm": 3.479949474334717, "learning_rate": 0.001, "loss": 2.2156, "step": 82432 }, { "epoch": 7.115942028985507, "grad_norm": 0.8123447299003601, "learning_rate": 0.001, "loss": 2.2151, "step": 82488 }, { "epoch": 7.120772946859903, "grad_norm": 2.58071231842041, "learning_rate": 0.001, "loss": 2.207, "step": 82544 }, { "epoch": 7.125603864734299, "grad_norm": 1.3098042011260986, "learning_rate": 0.001, "loss": 2.2162, "step": 82600 }, { "epoch": 7.130434782608695, "grad_norm": 1.6242272853851318, "learning_rate": 0.001, "loss": 2.2239, "step": 82656 }, { "epoch": 7.1352657004830915, "grad_norm": 3.981215476989746, "learning_rate": 0.001, "loss": 2.2344, "step": 82712 }, { "epoch": 7.140096618357488, "grad_norm": 0.9496408700942993, "learning_rate": 0.001, "loss": 2.241, "step": 82768 }, { "epoch": 7.144927536231884, "grad_norm": 0.976984977722168, "learning_rate": 0.001, "loss": 2.235, "step": 82824 }, { "epoch": 7.14975845410628, "grad_norm": 2.24456787109375, "learning_rate": 0.001, "loss": 2.2109, "step": 82880 }, { "epoch": 7.154589371980676, "grad_norm": 3.479337692260742, "learning_rate": 0.001, "loss": 2.2146, "step": 82936 }, { "epoch": 7.159420289855072, "grad_norm": 1.3374027013778687, "learning_rate": 0.001, "loss": 2.2204, "step": 82992 }, { "epoch": 7.164251207729468, "grad_norm": 1.1138250827789307, "learning_rate": 0.001, "loss": 2.221, "step": 83048 }, { "epoch": 7.169082125603865, "grad_norm": 3.209102153778076, "learning_rate": 0.001, "loss": 2.2176, "step": 83104 }, { "epoch": 7.173913043478261, "grad_norm": 37.93331527709961, "learning_rate": 0.001, "loss": 2.2227, "step": 83160 }, { "epoch": 7.178743961352657, "grad_norm": 5.81146240234375, "learning_rate": 0.001, "loss": 2.2122, "step": 83216 }, { "epoch": 7.183574879227053, "grad_norm": 1.0278918743133545, "learning_rate": 0.001, "loss": 2.2159, "step": 83272 }, { "epoch": 7.188405797101449, "grad_norm": 1.2807291746139526, "learning_rate": 0.001, "loss": 2.2051, "step": 83328 }, { "epoch": 7.193236714975845, "grad_norm": 1.3941764831542969, "learning_rate": 0.001, "loss": 2.195, "step": 83384 }, { "epoch": 7.1980676328502415, "grad_norm": 1.1143519878387451, "learning_rate": 0.001, "loss": 2.1979, "step": 83440 }, { "epoch": 7.202898550724638, "grad_norm": 3.5452349185943604, "learning_rate": 0.001, "loss": 2.2041, "step": 83496 }, { "epoch": 7.207729468599034, "grad_norm": 1.208636999130249, "learning_rate": 0.001, "loss": 2.2085, "step": 83552 }, { "epoch": 7.21256038647343, "grad_norm": 3.902401924133301, "learning_rate": 0.001, "loss": 2.2062, "step": 83608 }, { "epoch": 7.217391304347826, "grad_norm": 1.7481462955474854, "learning_rate": 0.001, "loss": 2.2024, "step": 83664 }, { "epoch": 7.222222222222222, "grad_norm": 4.274281024932861, "learning_rate": 0.001, "loss": 2.208, "step": 83720 }, { "epoch": 7.2270531400966185, "grad_norm": 1.5858558416366577, "learning_rate": 0.001, "loss": 2.2057, "step": 83776 }, { "epoch": 7.231884057971015, "grad_norm": 1.0880944728851318, "learning_rate": 0.001, "loss": 2.2037, "step": 83832 }, { "epoch": 7.236714975845411, "grad_norm": 2.803246259689331, "learning_rate": 0.001, "loss": 2.2075, "step": 83888 }, { "epoch": 7.241545893719807, "grad_norm": 3.473266363143921, "learning_rate": 0.001, "loss": 2.2299, "step": 83944 }, { "epoch": 7.246376811594203, "grad_norm": 0.6685742139816284, "learning_rate": 0.001, "loss": 2.2155, "step": 84000 }, { "epoch": 7.251207729468599, "grad_norm": 1.4059321880340576, "learning_rate": 0.001, "loss": 2.2108, "step": 84056 }, { "epoch": 7.256038647342995, "grad_norm": 1.5146740674972534, "learning_rate": 0.001, "loss": 2.2106, "step": 84112 }, { "epoch": 7.260869565217392, "grad_norm": 0.7235586047172546, "learning_rate": 0.001, "loss": 2.2076, "step": 84168 }, { "epoch": 7.265700483091788, "grad_norm": 2.5590157508850098, "learning_rate": 0.001, "loss": 2.197, "step": 84224 }, { "epoch": 7.270531400966184, "grad_norm": 3.471428155899048, "learning_rate": 0.001, "loss": 2.1998, "step": 84280 }, { "epoch": 7.27536231884058, "grad_norm": 0.5133311152458191, "learning_rate": 0.001, "loss": 2.1858, "step": 84336 }, { "epoch": 7.280193236714976, "grad_norm": 0.6764386296272278, "learning_rate": 0.001, "loss": 2.1977, "step": 84392 }, { "epoch": 7.285024154589372, "grad_norm": 0.5385075807571411, "learning_rate": 0.001, "loss": 2.1901, "step": 84448 }, { "epoch": 7.2898550724637685, "grad_norm": 4.9898481369018555, "learning_rate": 0.001, "loss": 2.1931, "step": 84504 }, { "epoch": 7.294685990338165, "grad_norm": 0.6643825173377991, "learning_rate": 0.001, "loss": 2.1954, "step": 84560 }, { "epoch": 7.29951690821256, "grad_norm": 0.7024177312850952, "learning_rate": 0.001, "loss": 2.2061, "step": 84616 }, { "epoch": 7.304347826086957, "grad_norm": 1.3688068389892578, "learning_rate": 0.001, "loss": 2.2016, "step": 84672 }, { "epoch": 7.309178743961352, "grad_norm": 1.1807702779769897, "learning_rate": 0.001, "loss": 2.1983, "step": 84728 }, { "epoch": 7.314009661835748, "grad_norm": 1.9392491579055786, "learning_rate": 0.001, "loss": 2.2, "step": 84784 }, { "epoch": 7.318840579710145, "grad_norm": 1.2109184265136719, "learning_rate": 0.001, "loss": 2.2115, "step": 84840 }, { "epoch": 7.323671497584541, "grad_norm": 0.7793823480606079, "learning_rate": 0.001, "loss": 2.2187, "step": 84896 }, { "epoch": 7.328502415458937, "grad_norm": 1.2927424907684326, "learning_rate": 0.001, "loss": 2.2221, "step": 84952 }, { "epoch": 7.333333333333333, "grad_norm": 1.0203886032104492, "learning_rate": 0.001, "loss": 2.2025, "step": 85008 }, { "epoch": 7.338164251207729, "grad_norm": 1.8057535886764526, "learning_rate": 0.001, "loss": 2.1936, "step": 85064 }, { "epoch": 7.342995169082125, "grad_norm": 0.8386402726173401, "learning_rate": 0.001, "loss": 2.1963, "step": 85120 }, { "epoch": 7.3478260869565215, "grad_norm": 0.7808031439781189, "learning_rate": 0.001, "loss": 2.1943, "step": 85176 }, { "epoch": 7.352657004830918, "grad_norm": 0.9829596877098083, "learning_rate": 0.001, "loss": 2.1888, "step": 85232 }, { "epoch": 7.357487922705314, "grad_norm": 6.140841007232666, "learning_rate": 0.001, "loss": 2.1846, "step": 85288 }, { "epoch": 7.36231884057971, "grad_norm": 0.734255313873291, "learning_rate": 0.001, "loss": 2.1944, "step": 85344 }, { "epoch": 7.367149758454106, "grad_norm": 1.21676766872406, "learning_rate": 0.001, "loss": 2.1993, "step": 85400 }, { "epoch": 7.371980676328502, "grad_norm": 0.9128022789955139, "learning_rate": 0.001, "loss": 2.2153, "step": 85456 }, { "epoch": 7.3768115942028984, "grad_norm": 0.7662680149078369, "learning_rate": 0.001, "loss": 2.2212, "step": 85512 }, { "epoch": 7.381642512077295, "grad_norm": 0.6822033524513245, "learning_rate": 0.001, "loss": 2.2165, "step": 85568 }, { "epoch": 7.386473429951691, "grad_norm": 1.139710783958435, "learning_rate": 0.001, "loss": 2.2116, "step": 85624 }, { "epoch": 7.391304347826087, "grad_norm": 1.697280764579773, "learning_rate": 0.001, "loss": 2.2045, "step": 85680 }, { "epoch": 7.396135265700483, "grad_norm": 1.1251060962677002, "learning_rate": 0.001, "loss": 2.1933, "step": 85736 }, { "epoch": 7.400966183574879, "grad_norm": 11.62124252319336, "learning_rate": 0.001, "loss": 2.2, "step": 85792 }, { "epoch": 7.405797101449275, "grad_norm": 1.5566468238830566, "learning_rate": 0.001, "loss": 2.2132, "step": 85848 }, { "epoch": 7.4106280193236715, "grad_norm": 0.5916629433631897, "learning_rate": 0.001, "loss": 2.218, "step": 85904 }, { "epoch": 7.415458937198068, "grad_norm": 0.5133522748947144, "learning_rate": 0.001, "loss": 2.2169, "step": 85960 }, { "epoch": 7.420289855072464, "grad_norm": 1.4335851669311523, "learning_rate": 0.001, "loss": 2.2005, "step": 86016 }, { "epoch": 7.42512077294686, "grad_norm": 1.2312803268432617, "learning_rate": 0.001, "loss": 2.1896, "step": 86072 }, { "epoch": 7.429951690821256, "grad_norm": 2.4313249588012695, "learning_rate": 0.001, "loss": 2.2068, "step": 86128 }, { "epoch": 7.434782608695652, "grad_norm": 0.9686657190322876, "learning_rate": 0.001, "loss": 2.2088, "step": 86184 }, { "epoch": 7.4396135265700485, "grad_norm": 1.062145709991455, "learning_rate": 0.001, "loss": 2.2127, "step": 86240 }, { "epoch": 7.444444444444445, "grad_norm": 2.3246278762817383, "learning_rate": 0.001, "loss": 2.2043, "step": 86296 }, { "epoch": 7.449275362318841, "grad_norm": 4.4462666511535645, "learning_rate": 0.001, "loss": 2.2115, "step": 86352 }, { "epoch": 7.454106280193237, "grad_norm": 0.818331778049469, "learning_rate": 0.001, "loss": 2.2074, "step": 86408 }, { "epoch": 7.458937198067633, "grad_norm": 0.9463566541671753, "learning_rate": 0.001, "loss": 2.2058, "step": 86464 }, { "epoch": 7.463768115942029, "grad_norm": 1.2033153772354126, "learning_rate": 0.001, "loss": 2.1978, "step": 86520 }, { "epoch": 7.468599033816425, "grad_norm": 0.770296037197113, "learning_rate": 0.001, "loss": 2.1916, "step": 86576 }, { "epoch": 7.473429951690822, "grad_norm": 0.6733403205871582, "learning_rate": 0.001, "loss": 2.2066, "step": 86632 }, { "epoch": 7.478260869565218, "grad_norm": 1.081403374671936, "learning_rate": 0.001, "loss": 2.2136, "step": 86688 }, { "epoch": 7.483091787439614, "grad_norm": 1.9005900621414185, "learning_rate": 0.001, "loss": 2.2174, "step": 86744 }, { "epoch": 7.48792270531401, "grad_norm": 1.2378462553024292, "learning_rate": 0.001, "loss": 2.245, "step": 86800 }, { "epoch": 7.492753623188406, "grad_norm": 1.2297759056091309, "learning_rate": 0.001, "loss": 2.2454, "step": 86856 }, { "epoch": 7.4975845410628015, "grad_norm": 1.5183945894241333, "learning_rate": 0.001, "loss": 2.2391, "step": 86912 }, { "epoch": 7.5024154589371985, "grad_norm": 0.6511871218681335, "learning_rate": 0.001, "loss": 2.2295, "step": 86968 }, { "epoch": 7.507246376811594, "grad_norm": 2.7433860301971436, "learning_rate": 0.001, "loss": 2.2174, "step": 87024 }, { "epoch": 7.512077294685991, "grad_norm": 1.301574945449829, "learning_rate": 0.001, "loss": 2.2301, "step": 87080 }, { "epoch": 7.516908212560386, "grad_norm": 0.941912829875946, "learning_rate": 0.001, "loss": 2.2168, "step": 87136 }, { "epoch": 7.521739130434782, "grad_norm": 0.9183884263038635, "learning_rate": 0.001, "loss": 2.2215, "step": 87192 }, { "epoch": 7.526570048309178, "grad_norm": 0.6286992430686951, "learning_rate": 0.001, "loss": 2.2281, "step": 87248 }, { "epoch": 7.531400966183575, "grad_norm": 0.9259273409843445, "learning_rate": 0.001, "loss": 2.2257, "step": 87304 }, { "epoch": 7.536231884057971, "grad_norm": 0.8690216541290283, "learning_rate": 0.001, "loss": 2.229, "step": 87360 }, { "epoch": 7.541062801932367, "grad_norm": 0.6534125208854675, "learning_rate": 0.001, "loss": 2.2241, "step": 87416 }, { "epoch": 7.545893719806763, "grad_norm": 1.9568803310394287, "learning_rate": 0.001, "loss": 2.2081, "step": 87472 }, { "epoch": 7.550724637681159, "grad_norm": 0.7396754622459412, "learning_rate": 0.001, "loss": 2.2085, "step": 87528 }, { "epoch": 7.555555555555555, "grad_norm": 1.5574623346328735, "learning_rate": 0.001, "loss": 2.224, "step": 87584 }, { "epoch": 7.5603864734299515, "grad_norm": 7.3153977394104, "learning_rate": 0.001, "loss": 2.2291, "step": 87640 }, { "epoch": 7.565217391304348, "grad_norm": 2.7998430728912354, "learning_rate": 0.001, "loss": 2.2335, "step": 87696 }, { "epoch": 7.570048309178744, "grad_norm": 0.7265670895576477, "learning_rate": 0.001, "loss": 2.2203, "step": 87752 }, { "epoch": 7.57487922705314, "grad_norm": 0.7056453227996826, "learning_rate": 0.001, "loss": 2.2171, "step": 87808 }, { "epoch": 7.579710144927536, "grad_norm": 0.8008729219436646, "learning_rate": 0.001, "loss": 2.2118, "step": 87864 }, { "epoch": 7.584541062801932, "grad_norm": 0.6358042359352112, "learning_rate": 0.001, "loss": 2.2028, "step": 87920 }, { "epoch": 7.5893719806763285, "grad_norm": 0.6235592365264893, "learning_rate": 0.001, "loss": 2.2018, "step": 87976 }, { "epoch": 7.594202898550725, "grad_norm": 0.8529534339904785, "learning_rate": 0.001, "loss": 2.1901, "step": 88032 }, { "epoch": 7.599033816425121, "grad_norm": 1.933587908744812, "learning_rate": 0.001, "loss": 2.1873, "step": 88088 }, { "epoch": 7.603864734299517, "grad_norm": 1.0945197343826294, "learning_rate": 0.001, "loss": 2.1867, "step": 88144 }, { "epoch": 7.608695652173913, "grad_norm": 7.0664544105529785, "learning_rate": 0.001, "loss": 2.1921, "step": 88200 }, { "epoch": 7.613526570048309, "grad_norm": 0.8298237919807434, "learning_rate": 0.001, "loss": 2.1961, "step": 88256 }, { "epoch": 7.618357487922705, "grad_norm": 1.1436445713043213, "learning_rate": 0.001, "loss": 2.1956, "step": 88312 }, { "epoch": 7.6231884057971016, "grad_norm": 2.2656469345092773, "learning_rate": 0.001, "loss": 2.199, "step": 88368 }, { "epoch": 7.628019323671498, "grad_norm": 2.6382322311401367, "learning_rate": 0.001, "loss": 2.2054, "step": 88424 }, { "epoch": 7.632850241545894, "grad_norm": 1.5132769346237183, "learning_rate": 0.001, "loss": 2.1956, "step": 88480 }, { "epoch": 7.63768115942029, "grad_norm": 1.0352468490600586, "learning_rate": 0.001, "loss": 2.2094, "step": 88536 }, { "epoch": 7.642512077294686, "grad_norm": 0.8177199959754944, "learning_rate": 0.001, "loss": 2.2215, "step": 88592 }, { "epoch": 7.647342995169082, "grad_norm": 1.1241800785064697, "learning_rate": 0.001, "loss": 2.2044, "step": 88648 }, { "epoch": 7.6521739130434785, "grad_norm": 1.8689950704574585, "learning_rate": 0.001, "loss": 2.1874, "step": 88704 }, { "epoch": 7.657004830917875, "grad_norm": 5.128040790557861, "learning_rate": 0.001, "loss": 2.2042, "step": 88760 }, { "epoch": 7.661835748792271, "grad_norm": 0.718639075756073, "learning_rate": 0.001, "loss": 2.2074, "step": 88816 }, { "epoch": 7.666666666666667, "grad_norm": 4.167617321014404, "learning_rate": 0.001, "loss": 2.2048, "step": 88872 }, { "epoch": 7.671497584541063, "grad_norm": 0.6368312835693359, "learning_rate": 0.001, "loss": 2.1949, "step": 88928 }, { "epoch": 7.676328502415459, "grad_norm": 1.9549771547317505, "learning_rate": 0.001, "loss": 2.2016, "step": 88984 }, { "epoch": 7.681159420289855, "grad_norm": 0.9314081072807312, "learning_rate": 0.001, "loss": 2.2097, "step": 89040 }, { "epoch": 7.685990338164252, "grad_norm": 0.6683293581008911, "learning_rate": 0.001, "loss": 2.1973, "step": 89096 }, { "epoch": 7.690821256038648, "grad_norm": 6.989969253540039, "learning_rate": 0.001, "loss": 2.2074, "step": 89152 }, { "epoch": 7.695652173913043, "grad_norm": 3.9997243881225586, "learning_rate": 0.001, "loss": 2.1933, "step": 89208 }, { "epoch": 7.70048309178744, "grad_norm": 1.7864831686019897, "learning_rate": 0.001, "loss": 2.2055, "step": 89264 }, { "epoch": 7.705314009661835, "grad_norm": 52.917266845703125, "learning_rate": 0.001, "loss": 2.2008, "step": 89320 }, { "epoch": 7.710144927536232, "grad_norm": 2.218038558959961, "learning_rate": 0.001, "loss": 2.2094, "step": 89376 }, { "epoch": 7.714975845410628, "grad_norm": 1.853356957435608, "learning_rate": 0.001, "loss": 2.2182, "step": 89432 }, { "epoch": 7.719806763285024, "grad_norm": 1.5955795049667358, "learning_rate": 0.001, "loss": 2.2294, "step": 89488 }, { "epoch": 7.72463768115942, "grad_norm": 1.6717778444290161, "learning_rate": 0.001, "loss": 2.2275, "step": 89544 }, { "epoch": 7.729468599033816, "grad_norm": 2.0329580307006836, "learning_rate": 0.001, "loss": 2.2164, "step": 89600 }, { "epoch": 7.734299516908212, "grad_norm": 2.770157814025879, "learning_rate": 0.001, "loss": 2.2225, "step": 89656 }, { "epoch": 7.739130434782608, "grad_norm": 1.4086664915084839, "learning_rate": 0.001, "loss": 2.2087, "step": 89712 }, { "epoch": 7.743961352657005, "grad_norm": 0.9120765924453735, "learning_rate": 0.001, "loss": 2.2146, "step": 89768 }, { "epoch": 7.748792270531401, "grad_norm": 0.8088471293449402, "learning_rate": 0.001, "loss": 2.1993, "step": 89824 }, { "epoch": 7.753623188405797, "grad_norm": 1.652666449546814, "learning_rate": 0.001, "loss": 2.1927, "step": 89880 }, { "epoch": 7.758454106280193, "grad_norm": 1.7669382095336914, "learning_rate": 0.001, "loss": 2.1954, "step": 89936 }, { "epoch": 7.763285024154589, "grad_norm": 0.5624907612800598, "learning_rate": 0.001, "loss": 2.2078, "step": 89992 }, { "epoch": 7.768115942028985, "grad_norm": 4.082359313964844, "learning_rate": 0.001, "loss": 2.2113, "step": 90048 }, { "epoch": 7.7729468599033815, "grad_norm": 3.621448516845703, "learning_rate": 0.001, "loss": 2.2033, "step": 90104 }, { "epoch": 7.777777777777778, "grad_norm": 0.8657955527305603, "learning_rate": 0.001, "loss": 2.1914, "step": 90160 }, { "epoch": 7.782608695652174, "grad_norm": 0.6873970031738281, "learning_rate": 0.001, "loss": 2.1991, "step": 90216 }, { "epoch": 7.78743961352657, "grad_norm": 0.7847729921340942, "learning_rate": 0.001, "loss": 2.1947, "step": 90272 }, { "epoch": 7.792270531400966, "grad_norm": 1.9442344903945923, "learning_rate": 0.001, "loss": 2.1848, "step": 90328 }, { "epoch": 7.797101449275362, "grad_norm": 0.575197160243988, "learning_rate": 0.001, "loss": 2.1963, "step": 90384 }, { "epoch": 7.8019323671497585, "grad_norm": 5.769298076629639, "learning_rate": 0.001, "loss": 2.1888, "step": 90440 }, { "epoch": 7.806763285024155, "grad_norm": 1.173632264137268, "learning_rate": 0.001, "loss": 2.1859, "step": 90496 }, { "epoch": 7.811594202898551, "grad_norm": 1.359930396080017, "learning_rate": 0.001, "loss": 2.192, "step": 90552 }, { "epoch": 7.816425120772947, "grad_norm": 4.218874931335449, "learning_rate": 0.001, "loss": 2.1801, "step": 90608 }, { "epoch": 7.821256038647343, "grad_norm": 2.3309452533721924, "learning_rate": 0.001, "loss": 2.1896, "step": 90664 }, { "epoch": 7.826086956521739, "grad_norm": 0.7840883135795593, "learning_rate": 0.001, "loss": 2.1914, "step": 90720 }, { "epoch": 7.830917874396135, "grad_norm": 0.8858395218849182, "learning_rate": 0.001, "loss": 2.1815, "step": 90776 }, { "epoch": 7.835748792270532, "grad_norm": 0.5710480809211731, "learning_rate": 0.001, "loss": 2.1912, "step": 90832 }, { "epoch": 7.840579710144928, "grad_norm": 2.7840609550476074, "learning_rate": 0.001, "loss": 2.1852, "step": 90888 }, { "epoch": 7.845410628019324, "grad_norm": 1.8541259765625, "learning_rate": 0.001, "loss": 2.1845, "step": 90944 }, { "epoch": 7.85024154589372, "grad_norm": 2.0087831020355225, "learning_rate": 0.001, "loss": 2.1877, "step": 91000 }, { "epoch": 7.855072463768116, "grad_norm": 1.6200796365737915, "learning_rate": 0.001, "loss": 2.1731, "step": 91056 }, { "epoch": 7.859903381642512, "grad_norm": 0.52519690990448, "learning_rate": 0.001, "loss": 2.181, "step": 91112 }, { "epoch": 7.8647342995169085, "grad_norm": 0.44364088773727417, "learning_rate": 0.001, "loss": 2.1881, "step": 91168 }, { "epoch": 7.869565217391305, "grad_norm": 0.8852233290672302, "learning_rate": 0.001, "loss": 2.1879, "step": 91224 }, { "epoch": 7.874396135265701, "grad_norm": 0.858814537525177, "learning_rate": 0.001, "loss": 2.1933, "step": 91280 }, { "epoch": 7.879227053140097, "grad_norm": 0.45903480052948, "learning_rate": 0.001, "loss": 2.1868, "step": 91336 }, { "epoch": 7.884057971014493, "grad_norm": 2.9495484828948975, "learning_rate": 0.001, "loss": 2.1938, "step": 91392 }, { "epoch": 7.888888888888889, "grad_norm": 0.49399885535240173, "learning_rate": 0.001, "loss": 2.1889, "step": 91448 }, { "epoch": 7.8937198067632846, "grad_norm": 6.074120998382568, "learning_rate": 0.001, "loss": 2.1937, "step": 91504 }, { "epoch": 7.898550724637682, "grad_norm": 0.9523034691810608, "learning_rate": 0.001, "loss": 2.198, "step": 91560 }, { "epoch": 7.903381642512077, "grad_norm": 1.7021063566207886, "learning_rate": 0.001, "loss": 2.2166, "step": 91616 }, { "epoch": 7.908212560386474, "grad_norm": 0.6701593399047852, "learning_rate": 0.001, "loss": 2.2101, "step": 91672 }, { "epoch": 7.913043478260869, "grad_norm": 0.4944227933883667, "learning_rate": 0.001, "loss": 2.1882, "step": 91728 }, { "epoch": 7.917874396135265, "grad_norm": 0.6741495132446289, "learning_rate": 0.001, "loss": 2.1885, "step": 91784 }, { "epoch": 7.9227053140096615, "grad_norm": 0.7432145476341248, "learning_rate": 0.001, "loss": 2.194, "step": 91840 }, { "epoch": 7.927536231884058, "grad_norm": 0.7453572750091553, "learning_rate": 0.001, "loss": 2.1912, "step": 91896 }, { "epoch": 7.932367149758454, "grad_norm": 0.5134963989257812, "learning_rate": 0.001, "loss": 2.1898, "step": 91952 }, { "epoch": 7.93719806763285, "grad_norm": 1.3348597288131714, "learning_rate": 0.001, "loss": 2.1981, "step": 92008 }, { "epoch": 7.942028985507246, "grad_norm": 1.9832841157913208, "learning_rate": 0.001, "loss": 2.1847, "step": 92064 }, { "epoch": 7.946859903381642, "grad_norm": 2.156421184539795, "learning_rate": 0.001, "loss": 2.1818, "step": 92120 }, { "epoch": 7.951690821256038, "grad_norm": 1.3970669507980347, "learning_rate": 0.001, "loss": 2.1859, "step": 92176 }, { "epoch": 7.956521739130435, "grad_norm": 1.942958116531372, "learning_rate": 0.001, "loss": 2.1815, "step": 92232 }, { "epoch": 7.961352657004831, "grad_norm": 0.6472920775413513, "learning_rate": 0.001, "loss": 2.1847, "step": 92288 }, { "epoch": 7.966183574879227, "grad_norm": 1.6507965326309204, "learning_rate": 0.001, "loss": 2.1974, "step": 92344 }, { "epoch": 7.971014492753623, "grad_norm": 1.4307045936584473, "learning_rate": 0.001, "loss": 2.2161, "step": 92400 }, { "epoch": 7.975845410628019, "grad_norm": 1.8888859748840332, "learning_rate": 0.001, "loss": 2.2025, "step": 92456 }, { "epoch": 7.980676328502415, "grad_norm": 2.721738576889038, "learning_rate": 0.001, "loss": 2.2216, "step": 92512 }, { "epoch": 7.9855072463768115, "grad_norm": 1.122875690460205, "learning_rate": 0.001, "loss": 2.2279, "step": 92568 }, { "epoch": 7.990338164251208, "grad_norm": 2.393629312515259, "learning_rate": 0.001, "loss": 2.206, "step": 92624 }, { "epoch": 7.995169082125604, "grad_norm": 1.147132158279419, "learning_rate": 0.001, "loss": 2.2115, "step": 92680 }, { "epoch": 8.0, "grad_norm": 1.2341094017028809, "learning_rate": 0.001, "loss": 2.2159, "step": 92736 }, { "epoch": 8.004830917874395, "grad_norm": 0.8923884630203247, "learning_rate": 0.001, "loss": 2.1539, "step": 92792 }, { "epoch": 8.009661835748792, "grad_norm": 4.119471549987793, "learning_rate": 0.001, "loss": 2.1729, "step": 92848 }, { "epoch": 8.014492753623188, "grad_norm": 1.702335000038147, "learning_rate": 0.001, "loss": 2.2047, "step": 92904 }, { "epoch": 8.019323671497585, "grad_norm": 1.6770316362380981, "learning_rate": 0.001, "loss": 2.2026, "step": 92960 }, { "epoch": 8.02415458937198, "grad_norm": 0.8387877941131592, "learning_rate": 0.001, "loss": 2.1985, "step": 93016 }, { "epoch": 8.028985507246377, "grad_norm": 1.9522343873977661, "learning_rate": 0.001, "loss": 2.1867, "step": 93072 }, { "epoch": 8.033816425120772, "grad_norm": 3.7816824913024902, "learning_rate": 0.001, "loss": 2.1839, "step": 93128 }, { "epoch": 8.03864734299517, "grad_norm": 6.2592644691467285, "learning_rate": 0.001, "loss": 2.1701, "step": 93184 }, { "epoch": 8.043478260869565, "grad_norm": 0.7666885852813721, "learning_rate": 0.001, "loss": 2.1828, "step": 93240 }, { "epoch": 8.048309178743962, "grad_norm": 0.8134300112724304, "learning_rate": 0.001, "loss": 2.1728, "step": 93296 }, { "epoch": 8.053140096618357, "grad_norm": 2.354168176651001, "learning_rate": 0.001, "loss": 2.1881, "step": 93352 }, { "epoch": 8.057971014492754, "grad_norm": 1.0118948221206665, "learning_rate": 0.001, "loss": 2.1763, "step": 93408 }, { "epoch": 8.06280193236715, "grad_norm": 0.5297091007232666, "learning_rate": 0.001, "loss": 2.1641, "step": 93464 }, { "epoch": 8.067632850241546, "grad_norm": 2.466261625289917, "learning_rate": 0.001, "loss": 2.1735, "step": 93520 }, { "epoch": 8.072463768115941, "grad_norm": 1.4624782800674438, "learning_rate": 0.001, "loss": 2.1616, "step": 93576 }, { "epoch": 8.077294685990339, "grad_norm": 0.5605184435844421, "learning_rate": 0.001, "loss": 2.1751, "step": 93632 }, { "epoch": 8.082125603864734, "grad_norm": 1.3781135082244873, "learning_rate": 0.001, "loss": 2.1655, "step": 93688 }, { "epoch": 8.08695652173913, "grad_norm": 1.0592467784881592, "learning_rate": 0.001, "loss": 2.1661, "step": 93744 }, { "epoch": 8.091787439613526, "grad_norm": 1.1301743984222412, "learning_rate": 0.001, "loss": 2.1659, "step": 93800 }, { "epoch": 8.096618357487923, "grad_norm": 1.3702608346939087, "learning_rate": 0.001, "loss": 2.1732, "step": 93856 }, { "epoch": 8.101449275362318, "grad_norm": 1.1399483680725098, "learning_rate": 0.001, "loss": 2.17, "step": 93912 }, { "epoch": 8.106280193236715, "grad_norm": 16.944828033447266, "learning_rate": 0.001, "loss": 2.1682, "step": 93968 }, { "epoch": 8.11111111111111, "grad_norm": 1.5871983766555786, "learning_rate": 0.001, "loss": 2.1581, "step": 94024 }, { "epoch": 8.115942028985508, "grad_norm": 1.0786978006362915, "learning_rate": 0.001, "loss": 2.159, "step": 94080 }, { "epoch": 8.120772946859903, "grad_norm": 0.6798693537712097, "learning_rate": 0.001, "loss": 2.1563, "step": 94136 }, { "epoch": 8.1256038647343, "grad_norm": 0.4582396149635315, "learning_rate": 0.001, "loss": 2.1585, "step": 94192 }, { "epoch": 8.130434782608695, "grad_norm": 0.5516502857208252, "learning_rate": 0.001, "loss": 2.173, "step": 94248 }, { "epoch": 8.135265700483092, "grad_norm": 1.9195241928100586, "learning_rate": 0.001, "loss": 2.1535, "step": 94304 }, { "epoch": 8.140096618357488, "grad_norm": 0.7121683955192566, "learning_rate": 0.001, "loss": 2.1457, "step": 94360 }, { "epoch": 8.144927536231885, "grad_norm": 0.49970006942749023, "learning_rate": 0.001, "loss": 2.1489, "step": 94416 }, { "epoch": 8.14975845410628, "grad_norm": 0.5234577059745789, "learning_rate": 0.001, "loss": 2.1571, "step": 94472 }, { "epoch": 8.154589371980677, "grad_norm": 0.5559391379356384, "learning_rate": 0.001, "loss": 2.1392, "step": 94528 }, { "epoch": 8.159420289855072, "grad_norm": 0.3659181296825409, "learning_rate": 0.001, "loss": 2.1491, "step": 94584 }, { "epoch": 8.16425120772947, "grad_norm": 0.6247583627700806, "learning_rate": 0.001, "loss": 2.1371, "step": 94640 }, { "epoch": 8.169082125603865, "grad_norm": 1.5511116981506348, "learning_rate": 0.001, "loss": 2.1407, "step": 94696 }, { "epoch": 8.173913043478262, "grad_norm": 0.6923587322235107, "learning_rate": 0.001, "loss": 2.1301, "step": 94752 }, { "epoch": 8.178743961352657, "grad_norm": 2.3903884887695312, "learning_rate": 0.001, "loss": 2.1388, "step": 94808 }, { "epoch": 8.183574879227054, "grad_norm": 0.7824434041976929, "learning_rate": 0.001, "loss": 2.145, "step": 94864 }, { "epoch": 8.18840579710145, "grad_norm": 0.6399248242378235, "learning_rate": 0.001, "loss": 2.1457, "step": 94920 }, { "epoch": 8.193236714975846, "grad_norm": 1.8636595010757446, "learning_rate": 0.001, "loss": 2.1398, "step": 94976 }, { "epoch": 8.198067632850242, "grad_norm": 0.8062956929206848, "learning_rate": 0.001, "loss": 2.1383, "step": 95032 }, { "epoch": 8.202898550724637, "grad_norm": 1.3062527179718018, "learning_rate": 0.001, "loss": 2.131, "step": 95088 }, { "epoch": 8.207729468599034, "grad_norm": 2.9180166721343994, "learning_rate": 0.001, "loss": 2.1501, "step": 95144 }, { "epoch": 8.21256038647343, "grad_norm": 0.9812737107276917, "learning_rate": 0.001, "loss": 2.1643, "step": 95200 }, { "epoch": 8.217391304347826, "grad_norm": 3.122885227203369, "learning_rate": 0.001, "loss": 2.1654, "step": 95256 }, { "epoch": 8.222222222222221, "grad_norm": 0.9283051490783691, "learning_rate": 0.001, "loss": 2.1704, "step": 95312 }, { "epoch": 8.227053140096618, "grad_norm": 2.0975658893585205, "learning_rate": 0.001, "loss": 2.1705, "step": 95368 }, { "epoch": 8.231884057971014, "grad_norm": 0.7845547199249268, "learning_rate": 0.001, "loss": 2.1616, "step": 95424 }, { "epoch": 8.23671497584541, "grad_norm": 0.9119700789451599, "learning_rate": 0.001, "loss": 2.1593, "step": 95480 }, { "epoch": 8.241545893719806, "grad_norm": 4.552369117736816, "learning_rate": 0.001, "loss": 2.1599, "step": 95536 }, { "epoch": 8.246376811594203, "grad_norm": 0.8500065207481384, "learning_rate": 0.001, "loss": 2.1605, "step": 95592 }, { "epoch": 8.251207729468598, "grad_norm": 0.9760816693305969, "learning_rate": 0.001, "loss": 2.1686, "step": 95648 }, { "epoch": 8.256038647342995, "grad_norm": 1.4121878147125244, "learning_rate": 0.001, "loss": 2.155, "step": 95704 }, { "epoch": 8.26086956521739, "grad_norm": 1.4397386312484741, "learning_rate": 0.001, "loss": 2.1753, "step": 95760 }, { "epoch": 8.265700483091788, "grad_norm": 0.7009803056716919, "learning_rate": 0.001, "loss": 2.1649, "step": 95816 }, { "epoch": 8.270531400966183, "grad_norm": 0.6118043065071106, "learning_rate": 0.001, "loss": 2.1591, "step": 95872 }, { "epoch": 8.27536231884058, "grad_norm": 2.562849760055542, "learning_rate": 0.001, "loss": 2.1592, "step": 95928 }, { "epoch": 8.280193236714975, "grad_norm": 0.6190630793571472, "learning_rate": 0.001, "loss": 2.1639, "step": 95984 }, { "epoch": 8.285024154589372, "grad_norm": 1.7951046228408813, "learning_rate": 0.001, "loss": 2.177, "step": 96040 }, { "epoch": 8.289855072463768, "grad_norm": 0.9006018042564392, "learning_rate": 0.001, "loss": 2.1659, "step": 96096 }, { "epoch": 8.294685990338165, "grad_norm": 4.852769374847412, "learning_rate": 0.001, "loss": 2.1471, "step": 96152 }, { "epoch": 8.29951690821256, "grad_norm": 1.3894976377487183, "learning_rate": 0.001, "loss": 2.1584, "step": 96208 }, { "epoch": 8.304347826086957, "grad_norm": 0.8317987322807312, "learning_rate": 0.001, "loss": 2.1566, "step": 96264 }, { "epoch": 8.309178743961352, "grad_norm": 0.7113953232765198, "learning_rate": 0.001, "loss": 2.1752, "step": 96320 }, { "epoch": 8.31400966183575, "grad_norm": 0.6266844272613525, "learning_rate": 0.001, "loss": 2.1716, "step": 96376 }, { "epoch": 8.318840579710145, "grad_norm": 0.6783342957496643, "learning_rate": 0.001, "loss": 2.1718, "step": 96432 }, { "epoch": 8.323671497584542, "grad_norm": 0.48579269647598267, "learning_rate": 0.001, "loss": 2.1651, "step": 96488 }, { "epoch": 8.328502415458937, "grad_norm": 0.5348486304283142, "learning_rate": 0.001, "loss": 2.1703, "step": 96544 }, { "epoch": 8.333333333333334, "grad_norm": 0.9735738039016724, "learning_rate": 0.001, "loss": 2.1747, "step": 96600 }, { "epoch": 8.33816425120773, "grad_norm": 1.0612797737121582, "learning_rate": 0.001, "loss": 2.1737, "step": 96656 }, { "epoch": 8.342995169082126, "grad_norm": 0.7015147805213928, "learning_rate": 0.001, "loss": 2.1646, "step": 96712 }, { "epoch": 8.347826086956522, "grad_norm": 0.9940712451934814, "learning_rate": 0.001, "loss": 2.1683, "step": 96768 }, { "epoch": 8.352657004830919, "grad_norm": 1.1415281295776367, "learning_rate": 0.001, "loss": 2.1649, "step": 96824 }, { "epoch": 8.357487922705314, "grad_norm": 1.21604585647583, "learning_rate": 0.001, "loss": 2.1727, "step": 96880 }, { "epoch": 8.36231884057971, "grad_norm": 1.1453938484191895, "learning_rate": 0.001, "loss": 2.1631, "step": 96936 }, { "epoch": 8.367149758454106, "grad_norm": 0.8788756728172302, "learning_rate": 0.001, "loss": 2.1518, "step": 96992 }, { "epoch": 8.371980676328503, "grad_norm": 4.4988694190979, "learning_rate": 0.001, "loss": 2.1537, "step": 97048 }, { "epoch": 8.376811594202898, "grad_norm": 0.41735342144966125, "learning_rate": 0.001, "loss": 2.1734, "step": 97104 }, { "epoch": 8.381642512077295, "grad_norm": 0.9588692784309387, "learning_rate": 0.001, "loss": 2.1615, "step": 97160 }, { "epoch": 8.38647342995169, "grad_norm": 0.5758078098297119, "learning_rate": 0.001, "loss": 2.1513, "step": 97216 }, { "epoch": 8.391304347826088, "grad_norm": 2.615999221801758, "learning_rate": 0.001, "loss": 2.1599, "step": 97272 }, { "epoch": 8.396135265700483, "grad_norm": 1.309092402458191, "learning_rate": 0.001, "loss": 2.1498, "step": 97328 }, { "epoch": 8.40096618357488, "grad_norm": 2.516789436340332, "learning_rate": 0.001, "loss": 2.1479, "step": 97384 }, { "epoch": 8.405797101449275, "grad_norm": 1.4301115274429321, "learning_rate": 0.001, "loss": 2.1533, "step": 97440 }, { "epoch": 8.41062801932367, "grad_norm": 1.1253650188446045, "learning_rate": 0.001, "loss": 2.1529, "step": 97496 }, { "epoch": 8.415458937198068, "grad_norm": 0.5327553749084473, "learning_rate": 0.001, "loss": 2.1589, "step": 97552 }, { "epoch": 8.420289855072463, "grad_norm": 1.2098220586776733, "learning_rate": 0.001, "loss": 2.1482, "step": 97608 }, { "epoch": 8.42512077294686, "grad_norm": 0.6792951822280884, "learning_rate": 0.001, "loss": 2.1455, "step": 97664 }, { "epoch": 8.429951690821255, "grad_norm": 0.7897965312004089, "learning_rate": 0.001, "loss": 2.1528, "step": 97720 }, { "epoch": 8.434782608695652, "grad_norm": 1.8171448707580566, "learning_rate": 0.001, "loss": 2.1665, "step": 97776 }, { "epoch": 8.439613526570048, "grad_norm": 0.6654888391494751, "learning_rate": 0.001, "loss": 2.1696, "step": 97832 }, { "epoch": 8.444444444444445, "grad_norm": 1.768674373626709, "learning_rate": 0.001, "loss": 2.1728, "step": 97888 }, { "epoch": 8.44927536231884, "grad_norm": 0.6529266238212585, "learning_rate": 0.001, "loss": 2.1757, "step": 97944 }, { "epoch": 8.454106280193237, "grad_norm": 1.8409507274627686, "learning_rate": 0.001, "loss": 2.169, "step": 98000 }, { "epoch": 8.458937198067632, "grad_norm": 2.000258684158325, "learning_rate": 0.001, "loss": 2.1679, "step": 98056 }, { "epoch": 8.46376811594203, "grad_norm": 0.7438505291938782, "learning_rate": 0.001, "loss": 2.1631, "step": 98112 }, { "epoch": 8.468599033816425, "grad_norm": 1.2274531126022339, "learning_rate": 0.001, "loss": 2.1674, "step": 98168 }, { "epoch": 8.473429951690822, "grad_norm": 1.0805602073669434, "learning_rate": 0.001, "loss": 2.1793, "step": 98224 }, { "epoch": 8.478260869565217, "grad_norm": 1.033689022064209, "learning_rate": 0.001, "loss": 2.1678, "step": 98280 }, { "epoch": 8.483091787439614, "grad_norm": 1.1021382808685303, "learning_rate": 0.001, "loss": 2.1658, "step": 98336 }, { "epoch": 8.48792270531401, "grad_norm": 1.0279886722564697, "learning_rate": 0.001, "loss": 2.1785, "step": 98392 }, { "epoch": 8.492753623188406, "grad_norm": 1.193163514137268, "learning_rate": 0.001, "loss": 2.1683, "step": 98448 }, { "epoch": 8.497584541062801, "grad_norm": 0.682515561580658, "learning_rate": 0.001, "loss": 2.1691, "step": 98504 }, { "epoch": 8.502415458937199, "grad_norm": 1.073784351348877, "learning_rate": 0.001, "loss": 2.1682, "step": 98560 }, { "epoch": 8.507246376811594, "grad_norm": 1.4975244998931885, "learning_rate": 0.001, "loss": 2.1791, "step": 98616 }, { "epoch": 8.51207729468599, "grad_norm": 1.937719464302063, "learning_rate": 0.001, "loss": 2.1682, "step": 98672 }, { "epoch": 8.516908212560386, "grad_norm": 1.4181437492370605, "learning_rate": 0.001, "loss": 2.1641, "step": 98728 }, { "epoch": 8.521739130434783, "grad_norm": 0.86642986536026, "learning_rate": 0.001, "loss": 2.1648, "step": 98784 }, { "epoch": 8.526570048309178, "grad_norm": 1.679457426071167, "learning_rate": 0.001, "loss": 2.165, "step": 98840 }, { "epoch": 8.531400966183575, "grad_norm": 2.2595467567443848, "learning_rate": 0.001, "loss": 2.1585, "step": 98896 }, { "epoch": 8.53623188405797, "grad_norm": 1.4689456224441528, "learning_rate": 0.001, "loss": 2.1649, "step": 98952 }, { "epoch": 8.541062801932368, "grad_norm": 0.45793503522872925, "learning_rate": 0.001, "loss": 2.1831, "step": 99008 }, { "epoch": 8.545893719806763, "grad_norm": 1.0827895402908325, "learning_rate": 0.001, "loss": 2.1811, "step": 99064 }, { "epoch": 8.55072463768116, "grad_norm": 0.6022539734840393, "learning_rate": 0.001, "loss": 2.171, "step": 99120 }, { "epoch": 8.555555555555555, "grad_norm": 5.037740230560303, "learning_rate": 0.001, "loss": 2.1781, "step": 99176 }, { "epoch": 8.560386473429952, "grad_norm": 0.8766276240348816, "learning_rate": 0.001, "loss": 2.1487, "step": 99232 }, { "epoch": 8.565217391304348, "grad_norm": 3.8220744132995605, "learning_rate": 0.001, "loss": 2.1715, "step": 99288 }, { "epoch": 8.570048309178745, "grad_norm": 0.882576584815979, "learning_rate": 0.001, "loss": 2.1682, "step": 99344 }, { "epoch": 8.57487922705314, "grad_norm": 2.8687374591827393, "learning_rate": 0.001, "loss": 2.1851, "step": 99400 }, { "epoch": 8.579710144927537, "grad_norm": 0.8978865146636963, "learning_rate": 0.001, "loss": 2.1775, "step": 99456 }, { "epoch": 8.584541062801932, "grad_norm": 2.5586705207824707, "learning_rate": 0.001, "loss": 2.1802, "step": 99512 }, { "epoch": 8.58937198067633, "grad_norm": 1.1842213869094849, "learning_rate": 0.001, "loss": 2.1616, "step": 99568 }, { "epoch": 8.594202898550725, "grad_norm": 2.43939208984375, "learning_rate": 0.001, "loss": 2.1754, "step": 99624 }, { "epoch": 8.59903381642512, "grad_norm": 2.8253753185272217, "learning_rate": 0.001, "loss": 2.1737, "step": 99680 }, { "epoch": 8.603864734299517, "grad_norm": 3.521724224090576, "learning_rate": 0.001, "loss": 2.2008, "step": 99736 }, { "epoch": 8.608695652173914, "grad_norm": 5.175851345062256, "learning_rate": 0.001, "loss": 2.2124, "step": 99792 }, { "epoch": 8.61352657004831, "grad_norm": 0.8156281113624573, "learning_rate": 0.001, "loss": 2.1964, "step": 99848 }, { "epoch": 8.618357487922705, "grad_norm": 1.0975911617279053, "learning_rate": 0.001, "loss": 2.1744, "step": 99904 }, { "epoch": 8.623188405797102, "grad_norm": 0.8442077040672302, "learning_rate": 0.001, "loss": 2.1622, "step": 99960 }, { "epoch": 8.628019323671497, "grad_norm": 1.4752484560012817, "learning_rate": 0.001, "loss": 2.1721, "step": 100016 }, { "epoch": 8.632850241545894, "grad_norm": 3.280287742614746, "learning_rate": 0.001, "loss": 2.1676, "step": 100072 }, { "epoch": 8.63768115942029, "grad_norm": 1.961851954460144, "learning_rate": 0.001, "loss": 2.1675, "step": 100128 }, { "epoch": 8.642512077294686, "grad_norm": 1.1180341243743896, "learning_rate": 0.001, "loss": 2.1785, "step": 100184 }, { "epoch": 8.647342995169081, "grad_norm": 2.5496907234191895, "learning_rate": 0.001, "loss": 2.172, "step": 100240 }, { "epoch": 8.652173913043478, "grad_norm": 2.1761929988861084, "learning_rate": 0.001, "loss": 2.1734, "step": 100296 }, { "epoch": 8.657004830917874, "grad_norm": 10.224677085876465, "learning_rate": 0.001, "loss": 2.1923, "step": 100352 }, { "epoch": 8.66183574879227, "grad_norm": 1.2687700986862183, "learning_rate": 0.001, "loss": 2.204, "step": 100408 }, { "epoch": 8.666666666666666, "grad_norm": 1.0741652250289917, "learning_rate": 0.001, "loss": 2.1874, "step": 100464 }, { "epoch": 8.671497584541063, "grad_norm": 3.31343674659729, "learning_rate": 0.001, "loss": 2.197, "step": 100520 }, { "epoch": 8.676328502415458, "grad_norm": 1.0871697664260864, "learning_rate": 0.001, "loss": 2.2159, "step": 100576 }, { "epoch": 8.681159420289855, "grad_norm": 0.6209431290626526, "learning_rate": 0.001, "loss": 2.2235, "step": 100632 }, { "epoch": 8.68599033816425, "grad_norm": 1.3920118808746338, "learning_rate": 0.001, "loss": 2.2001, "step": 100688 }, { "epoch": 8.690821256038648, "grad_norm": 2.679579496383667, "learning_rate": 0.001, "loss": 2.1922, "step": 100744 }, { "epoch": 8.695652173913043, "grad_norm": 0.6335155367851257, "learning_rate": 0.001, "loss": 2.2013, "step": 100800 }, { "epoch": 8.70048309178744, "grad_norm": 2.219010829925537, "learning_rate": 0.001, "loss": 2.1966, "step": 100856 }, { "epoch": 8.705314009661835, "grad_norm": 1.9883054494857788, "learning_rate": 0.001, "loss": 2.1833, "step": 100912 }, { "epoch": 8.710144927536232, "grad_norm": 0.6085325479507446, "learning_rate": 0.001, "loss": 2.1857, "step": 100968 }, { "epoch": 8.714975845410628, "grad_norm": 0.6379920840263367, "learning_rate": 0.001, "loss": 2.18, "step": 101024 }, { "epoch": 8.719806763285025, "grad_norm": 1.0121045112609863, "learning_rate": 0.001, "loss": 2.1757, "step": 101080 }, { "epoch": 8.72463768115942, "grad_norm": 1.31614351272583, "learning_rate": 0.001, "loss": 2.1875, "step": 101136 }, { "epoch": 8.729468599033817, "grad_norm": 4.128320217132568, "learning_rate": 0.001, "loss": 2.1793, "step": 101192 }, { "epoch": 8.734299516908212, "grad_norm": 3.072277784347534, "learning_rate": 0.001, "loss": 2.1834, "step": 101248 }, { "epoch": 8.73913043478261, "grad_norm": 0.8143222332000732, "learning_rate": 0.001, "loss": 2.1787, "step": 101304 }, { "epoch": 8.743961352657005, "grad_norm": 1.0020711421966553, "learning_rate": 0.001, "loss": 2.1944, "step": 101360 }, { "epoch": 8.748792270531402, "grad_norm": 1.4619877338409424, "learning_rate": 0.001, "loss": 2.1801, "step": 101416 }, { "epoch": 8.753623188405797, "grad_norm": 1.8221583366394043, "learning_rate": 0.001, "loss": 2.1969, "step": 101472 }, { "epoch": 8.758454106280194, "grad_norm": 1.1051881313323975, "learning_rate": 0.001, "loss": 2.2227, "step": 101528 }, { "epoch": 8.76328502415459, "grad_norm": 0.569442093372345, "learning_rate": 0.001, "loss": 2.2234, "step": 101584 }, { "epoch": 8.768115942028986, "grad_norm": 1.9901152849197388, "learning_rate": 0.001, "loss": 2.2059, "step": 101640 }, { "epoch": 8.772946859903382, "grad_norm": 1.6924843788146973, "learning_rate": 0.001, "loss": 2.1941, "step": 101696 }, { "epoch": 8.777777777777779, "grad_norm": 5.261808395385742, "learning_rate": 0.001, "loss": 2.1987, "step": 101752 }, { "epoch": 8.782608695652174, "grad_norm": 2.2304725646972656, "learning_rate": 0.001, "loss": 2.1942, "step": 101808 }, { "epoch": 8.78743961352657, "grad_norm": 0.7107840180397034, "learning_rate": 0.001, "loss": 2.1935, "step": 101864 }, { "epoch": 8.792270531400966, "grad_norm": 2.6893982887268066, "learning_rate": 0.001, "loss": 2.1859, "step": 101920 }, { "epoch": 8.797101449275363, "grad_norm": 5.3514580726623535, "learning_rate": 0.001, "loss": 2.2025, "step": 101976 }, { "epoch": 8.801932367149758, "grad_norm": 1.1213563680648804, "learning_rate": 0.001, "loss": 2.2021, "step": 102032 }, { "epoch": 8.806763285024154, "grad_norm": 5.150223255157471, "learning_rate": 0.001, "loss": 2.2162, "step": 102088 }, { "epoch": 8.81159420289855, "grad_norm": 1.5704729557037354, "learning_rate": 0.001, "loss": 2.2082, "step": 102144 }, { "epoch": 8.816425120772946, "grad_norm": 1.5262094736099243, "learning_rate": 0.001, "loss": 2.1913, "step": 102200 }, { "epoch": 8.821256038647343, "grad_norm": 1.3060446977615356, "learning_rate": 0.001, "loss": 2.1829, "step": 102256 }, { "epoch": 8.826086956521738, "grad_norm": 1.790102481842041, "learning_rate": 0.001, "loss": 2.1871, "step": 102312 }, { "epoch": 8.830917874396135, "grad_norm": 1.2355036735534668, "learning_rate": 0.001, "loss": 2.1935, "step": 102368 }, { "epoch": 8.83574879227053, "grad_norm": 0.8439340591430664, "learning_rate": 0.001, "loss": 2.1835, "step": 102424 }, { "epoch": 8.840579710144928, "grad_norm": 1.7418797016143799, "learning_rate": 0.001, "loss": 2.1836, "step": 102480 }, { "epoch": 8.845410628019323, "grad_norm": 1.4281748533248901, "learning_rate": 0.001, "loss": 2.1908, "step": 102536 }, { "epoch": 8.85024154589372, "grad_norm": 1.382093906402588, "learning_rate": 0.001, "loss": 2.1806, "step": 102592 }, { "epoch": 8.855072463768115, "grad_norm": 1.306244969367981, "learning_rate": 0.001, "loss": 2.1767, "step": 102648 }, { "epoch": 8.859903381642512, "grad_norm": 1.1055924892425537, "learning_rate": 0.001, "loss": 2.1899, "step": 102704 }, { "epoch": 8.864734299516908, "grad_norm": 2.741365671157837, "learning_rate": 0.001, "loss": 2.1841, "step": 102760 }, { "epoch": 8.869565217391305, "grad_norm": 1.1803412437438965, "learning_rate": 0.001, "loss": 2.1877, "step": 102816 }, { "epoch": 8.8743961352657, "grad_norm": 0.6093894243240356, "learning_rate": 0.001, "loss": 2.1893, "step": 102872 }, { "epoch": 8.879227053140097, "grad_norm": 3.0857136249542236, "learning_rate": 0.001, "loss": 2.1818, "step": 102928 }, { "epoch": 8.884057971014492, "grad_norm": 14.2505521774292, "learning_rate": 0.001, "loss": 2.1915, "step": 102984 }, { "epoch": 8.88888888888889, "grad_norm": 2.1241681575775146, "learning_rate": 0.001, "loss": 2.22, "step": 103040 }, { "epoch": 8.893719806763285, "grad_norm": 20.421709060668945, "learning_rate": 0.001, "loss": 2.2492, "step": 103096 }, { "epoch": 8.898550724637682, "grad_norm": 5.274570941925049, "learning_rate": 0.001, "loss": 2.235, "step": 103152 }, { "epoch": 8.903381642512077, "grad_norm": 13.373640060424805, "learning_rate": 0.001, "loss": 2.2229, "step": 103208 }, { "epoch": 8.908212560386474, "grad_norm": 1.3923168182373047, "learning_rate": 0.001, "loss": 2.2151, "step": 103264 }, { "epoch": 8.91304347826087, "grad_norm": 1.205040454864502, "learning_rate": 0.001, "loss": 2.2069, "step": 103320 }, { "epoch": 8.917874396135266, "grad_norm": 1.282835602760315, "learning_rate": 0.001, "loss": 2.2045, "step": 103376 }, { "epoch": 8.922705314009661, "grad_norm": 3.139082908630371, "learning_rate": 0.001, "loss": 2.2119, "step": 103432 }, { "epoch": 8.927536231884059, "grad_norm": 0.7206853628158569, "learning_rate": 0.001, "loss": 2.1976, "step": 103488 }, { "epoch": 8.932367149758454, "grad_norm": 0.8087875843048096, "learning_rate": 0.001, "loss": 2.2041, "step": 103544 }, { "epoch": 8.93719806763285, "grad_norm": 0.742152214050293, "learning_rate": 0.001, "loss": 2.1842, "step": 103600 }, { "epoch": 8.942028985507246, "grad_norm": 0.900603711605072, "learning_rate": 0.001, "loss": 2.2008, "step": 103656 }, { "epoch": 8.946859903381643, "grad_norm": 1.6056673526763916, "learning_rate": 0.001, "loss": 2.2029, "step": 103712 }, { "epoch": 8.951690821256038, "grad_norm": 0.666178822517395, "learning_rate": 0.001, "loss": 2.204, "step": 103768 }, { "epoch": 8.956521739130435, "grad_norm": 1.124280333518982, "learning_rate": 0.001, "loss": 2.1965, "step": 103824 }, { "epoch": 8.96135265700483, "grad_norm": 1.827895164489746, "learning_rate": 0.001, "loss": 2.1878, "step": 103880 }, { "epoch": 8.966183574879228, "grad_norm": 1.0022568702697754, "learning_rate": 0.001, "loss": 2.1889, "step": 103936 }, { "epoch": 8.971014492753623, "grad_norm": 1.6951721906661987, "learning_rate": 0.001, "loss": 2.1928, "step": 103992 }, { "epoch": 8.97584541062802, "grad_norm": 1.7363156080245972, "learning_rate": 0.001, "loss": 2.1912, "step": 104048 }, { "epoch": 8.980676328502415, "grad_norm": 2.5289146900177, "learning_rate": 0.001, "loss": 2.1939, "step": 104104 }, { "epoch": 8.985507246376812, "grad_norm": 1.0142520666122437, "learning_rate": 0.001, "loss": 2.1959, "step": 104160 }, { "epoch": 8.990338164251208, "grad_norm": 0.6294025182723999, "learning_rate": 0.001, "loss": 2.1821, "step": 104216 }, { "epoch": 8.995169082125603, "grad_norm": 0.784690797328949, "learning_rate": 0.001, "loss": 2.185, "step": 104272 }, { "epoch": 9.0, "grad_norm": 2.744249105453491, "learning_rate": 0.001, "loss": 2.1917, "step": 104328 }, { "epoch": 9.004830917874395, "grad_norm": 1.0873427391052246, "learning_rate": 0.001, "loss": 2.1707, "step": 104384 }, { "epoch": 9.009661835748792, "grad_norm": 1.0031739473342896, "learning_rate": 0.001, "loss": 2.1732, "step": 104440 }, { "epoch": 9.014492753623188, "grad_norm": 1.0339198112487793, "learning_rate": 0.001, "loss": 2.1602, "step": 104496 }, { "epoch": 9.019323671497585, "grad_norm": 1.4131979942321777, "learning_rate": 0.001, "loss": 2.1643, "step": 104552 }, { "epoch": 9.02415458937198, "grad_norm": 2.60891056060791, "learning_rate": 0.001, "loss": 2.1774, "step": 104608 }, { "epoch": 9.028985507246377, "grad_norm": 3.1860134601593018, "learning_rate": 0.001, "loss": 2.1834, "step": 104664 }, { "epoch": 9.033816425120772, "grad_norm": 1.3319181203842163, "learning_rate": 0.001, "loss": 2.2006, "step": 104720 }, { "epoch": 9.03864734299517, "grad_norm": 1.3886021375656128, "learning_rate": 0.001, "loss": 2.2042, "step": 104776 }, { "epoch": 9.043478260869565, "grad_norm": 1.2281749248504639, "learning_rate": 0.001, "loss": 2.2066, "step": 104832 }, { "epoch": 9.048309178743962, "grad_norm": 1.7456295490264893, "learning_rate": 0.001, "loss": 2.1788, "step": 104888 }, { "epoch": 9.053140096618357, "grad_norm": 1.5728164911270142, "learning_rate": 0.001, "loss": 2.154, "step": 104944 }, { "epoch": 9.057971014492754, "grad_norm": 0.6722880005836487, "learning_rate": 0.001, "loss": 2.1564, "step": 105000 }, { "epoch": 9.06280193236715, "grad_norm": 1.349203109741211, "learning_rate": 0.001, "loss": 2.1661, "step": 105056 }, { "epoch": 9.067632850241546, "grad_norm": 1.1271167993545532, "learning_rate": 0.001, "loss": 2.1715, "step": 105112 }, { "epoch": 9.072463768115941, "grad_norm": 0.9470292329788208, "learning_rate": 0.001, "loss": 2.1697, "step": 105168 }, { "epoch": 9.077294685990339, "grad_norm": 2.252506971359253, "learning_rate": 0.001, "loss": 2.1741, "step": 105224 }, { "epoch": 9.082125603864734, "grad_norm": 0.9157953858375549, "learning_rate": 0.001, "loss": 2.1664, "step": 105280 }, { "epoch": 9.08695652173913, "grad_norm": 3.8057448863983154, "learning_rate": 0.001, "loss": 2.1784, "step": 105336 }, { "epoch": 9.091787439613526, "grad_norm": 1.052957534790039, "learning_rate": 0.001, "loss": 2.1677, "step": 105392 }, { "epoch": 9.096618357487923, "grad_norm": 1.5191885232925415, "learning_rate": 0.001, "loss": 2.1721, "step": 105448 }, { "epoch": 9.101449275362318, "grad_norm": 2.828864336013794, "learning_rate": 0.001, "loss": 2.1662, "step": 105504 }, { "epoch": 9.106280193236715, "grad_norm": 3.506580114364624, "learning_rate": 0.001, "loss": 2.1694, "step": 105560 }, { "epoch": 9.11111111111111, "grad_norm": 1.8464301824569702, "learning_rate": 0.001, "loss": 2.1664, "step": 105616 }, { "epoch": 9.115942028985508, "grad_norm": 2.55763578414917, "learning_rate": 0.001, "loss": 2.1833, "step": 105672 }, { "epoch": 9.120772946859903, "grad_norm": 1.7007839679718018, "learning_rate": 0.001, "loss": 2.1982, "step": 105728 }, { "epoch": 9.1256038647343, "grad_norm": 1.8573815822601318, "learning_rate": 0.001, "loss": 2.1733, "step": 105784 }, { "epoch": 9.130434782608695, "grad_norm": 0.9144348502159119, "learning_rate": 0.001, "loss": 2.1735, "step": 105840 }, { "epoch": 9.135265700483092, "grad_norm": 0.9272240400314331, "learning_rate": 0.001, "loss": 2.1523, "step": 105896 }, { "epoch": 9.140096618357488, "grad_norm": 1.7727491855621338, "learning_rate": 0.001, "loss": 2.1569, "step": 105952 }, { "epoch": 9.144927536231885, "grad_norm": 1.354276418685913, "learning_rate": 0.001, "loss": 2.1621, "step": 106008 }, { "epoch": 9.14975845410628, "grad_norm": 1.5559078454971313, "learning_rate": 0.001, "loss": 2.1794, "step": 106064 }, { "epoch": 9.154589371980677, "grad_norm": 2.2065834999084473, "learning_rate": 0.001, "loss": 2.1581, "step": 106120 }, { "epoch": 9.159420289855072, "grad_norm": 1.2813655138015747, "learning_rate": 0.001, "loss": 2.1528, "step": 106176 }, { "epoch": 9.16425120772947, "grad_norm": 4.577296733856201, "learning_rate": 0.001, "loss": 2.1515, "step": 106232 }, { "epoch": 9.169082125603865, "grad_norm": 2.1098666191101074, "learning_rate": 0.001, "loss": 2.1502, "step": 106288 }, { "epoch": 9.173913043478262, "grad_norm": 1.1017571687698364, "learning_rate": 0.001, "loss": 2.1512, "step": 106344 }, { "epoch": 9.178743961352657, "grad_norm": 2.2283856868743896, "learning_rate": 0.001, "loss": 2.147, "step": 106400 }, { "epoch": 9.183574879227054, "grad_norm": 3.9506516456604004, "learning_rate": 0.001, "loss": 2.1782, "step": 106456 }, { "epoch": 9.18840579710145, "grad_norm": 1.1358847618103027, "learning_rate": 0.001, "loss": 2.1711, "step": 106512 }, { "epoch": 9.193236714975846, "grad_norm": 0.7863360047340393, "learning_rate": 0.001, "loss": 2.1594, "step": 106568 }, { "epoch": 9.198067632850242, "grad_norm": 1.1799285411834717, "learning_rate": 0.001, "loss": 2.153, "step": 106624 }, { "epoch": 9.202898550724637, "grad_norm": 12.447437286376953, "learning_rate": 0.001, "loss": 2.1776, "step": 106680 }, { "epoch": 9.207729468599034, "grad_norm": 1.1490237712860107, "learning_rate": 0.001, "loss": 2.1833, "step": 106736 }, { "epoch": 9.21256038647343, "grad_norm": 0.7120616436004639, "learning_rate": 0.001, "loss": 2.1889, "step": 106792 }, { "epoch": 9.217391304347826, "grad_norm": 1.934424638748169, "learning_rate": 0.001, "loss": 2.2002, "step": 106848 }, { "epoch": 9.222222222222221, "grad_norm": 1.3106374740600586, "learning_rate": 0.001, "loss": 2.1844, "step": 106904 }, { "epoch": 9.227053140096618, "grad_norm": 1.4730052947998047, "learning_rate": 0.001, "loss": 2.1683, "step": 106960 }, { "epoch": 9.231884057971014, "grad_norm": 1.317531943321228, "learning_rate": 0.001, "loss": 2.166, "step": 107016 }, { "epoch": 9.23671497584541, "grad_norm": 0.8591775894165039, "learning_rate": 0.001, "loss": 2.1578, "step": 107072 }, { "epoch": 9.241545893719806, "grad_norm": 1.2508798837661743, "learning_rate": 0.001, "loss": 2.1471, "step": 107128 }, { "epoch": 9.246376811594203, "grad_norm": 2.97257924079895, "learning_rate": 0.001, "loss": 2.1496, "step": 107184 }, { "epoch": 9.251207729468598, "grad_norm": 1.3477306365966797, "learning_rate": 0.001, "loss": 2.1568, "step": 107240 }, { "epoch": 9.256038647342995, "grad_norm": 2.1898815631866455, "learning_rate": 0.001, "loss": 2.1492, "step": 107296 }, { "epoch": 9.26086956521739, "grad_norm": 4.246581554412842, "learning_rate": 0.001, "loss": 2.1609, "step": 107352 }, { "epoch": 9.265700483091788, "grad_norm": 0.9123812913894653, "learning_rate": 0.001, "loss": 2.1547, "step": 107408 }, { "epoch": 9.270531400966183, "grad_norm": 1.096420407295227, "learning_rate": 0.001, "loss": 2.1662, "step": 107464 }, { "epoch": 9.27536231884058, "grad_norm": 1.0242642164230347, "learning_rate": 0.001, "loss": 2.1561, "step": 107520 }, { "epoch": 9.280193236714975, "grad_norm": 1.5103753805160522, "learning_rate": 0.001, "loss": 2.149, "step": 107576 }, { "epoch": 9.285024154589372, "grad_norm": 1.3984957933425903, "learning_rate": 0.001, "loss": 2.1554, "step": 107632 }, { "epoch": 9.289855072463768, "grad_norm": 1.3554457426071167, "learning_rate": 0.001, "loss": 2.1482, "step": 107688 }, { "epoch": 9.294685990338165, "grad_norm": 0.9875816702842712, "learning_rate": 0.001, "loss": 2.1525, "step": 107744 }, { "epoch": 9.29951690821256, "grad_norm": 3.760477304458618, "learning_rate": 0.001, "loss": 2.1418, "step": 107800 }, { "epoch": 9.304347826086957, "grad_norm": 1.089552640914917, "learning_rate": 0.001, "loss": 2.1552, "step": 107856 }, { "epoch": 9.309178743961352, "grad_norm": 2.134943723678589, "learning_rate": 0.001, "loss": 2.1519, "step": 107912 }, { "epoch": 9.31400966183575, "grad_norm": 1.717183232307434, "learning_rate": 0.001, "loss": 2.1462, "step": 107968 }, { "epoch": 9.318840579710145, "grad_norm": 0.8495106101036072, "learning_rate": 0.001, "loss": 2.1372, "step": 108024 }, { "epoch": 9.323671497584542, "grad_norm": 2.423074245452881, "learning_rate": 0.001, "loss": 2.1359, "step": 108080 }, { "epoch": 9.328502415458937, "grad_norm": 5.244233131408691, "learning_rate": 0.001, "loss": 2.1487, "step": 108136 }, { "epoch": 9.333333333333334, "grad_norm": 1.1853563785552979, "learning_rate": 0.001, "loss": 2.1417, "step": 108192 }, { "epoch": 9.33816425120773, "grad_norm": 1.2336734533309937, "learning_rate": 0.001, "loss": 2.1455, "step": 108248 }, { "epoch": 9.342995169082126, "grad_norm": 1.2554857730865479, "learning_rate": 0.001, "loss": 2.1532, "step": 108304 }, { "epoch": 9.347826086956522, "grad_norm": 0.6140924096107483, "learning_rate": 0.001, "loss": 2.1555, "step": 108360 }, { "epoch": 9.352657004830919, "grad_norm": 1.9439496994018555, "learning_rate": 0.001, "loss": 2.176, "step": 108416 }, { "epoch": 9.357487922705314, "grad_norm": 6.822696685791016, "learning_rate": 0.001, "loss": 2.164, "step": 108472 }, { "epoch": 9.36231884057971, "grad_norm": 13.39431095123291, "learning_rate": 0.001, "loss": 2.1556, "step": 108528 }, { "epoch": 9.367149758454106, "grad_norm": 2.1246016025543213, "learning_rate": 0.001, "loss": 2.15, "step": 108584 }, { "epoch": 9.371980676328503, "grad_norm": 5.193103313446045, "learning_rate": 0.001, "loss": 2.1737, "step": 108640 }, { "epoch": 9.376811594202898, "grad_norm": 1.4407577514648438, "learning_rate": 0.001, "loss": 2.1622, "step": 108696 }, { "epoch": 9.381642512077295, "grad_norm": 1.016109585762024, "learning_rate": 0.001, "loss": 2.177, "step": 108752 }, { "epoch": 9.38647342995169, "grad_norm": 4.421100616455078, "learning_rate": 0.001, "loss": 2.1711, "step": 108808 }, { "epoch": 9.391304347826088, "grad_norm": 1.0442662239074707, "learning_rate": 0.001, "loss": 2.1762, "step": 108864 }, { "epoch": 9.396135265700483, "grad_norm": 1.4544677734375, "learning_rate": 0.001, "loss": 2.1649, "step": 108920 }, { "epoch": 9.40096618357488, "grad_norm": 1.0817885398864746, "learning_rate": 0.001, "loss": 2.1552, "step": 108976 }, { "epoch": 9.405797101449275, "grad_norm": 0.9749849438667297, "learning_rate": 0.001, "loss": 2.1529, "step": 109032 }, { "epoch": 9.41062801932367, "grad_norm": 60.81449890136719, "learning_rate": 0.001, "loss": 2.1571, "step": 109088 }, { "epoch": 9.415458937198068, "grad_norm": 1.931065320968628, "learning_rate": 0.001, "loss": 2.1533, "step": 109144 }, { "epoch": 9.420289855072463, "grad_norm": 0.7593839764595032, "learning_rate": 0.001, "loss": 2.1659, "step": 109200 }, { "epoch": 9.42512077294686, "grad_norm": 1.9358320236206055, "learning_rate": 0.001, "loss": 2.1591, "step": 109256 }, { "epoch": 9.429951690821255, "grad_norm": 1.0968706607818604, "learning_rate": 0.001, "loss": 2.1709, "step": 109312 }, { "epoch": 9.434782608695652, "grad_norm": 3.0567328929901123, "learning_rate": 0.001, "loss": 2.1692, "step": 109368 }, { "epoch": 9.439613526570048, "grad_norm": 2.960401773452759, "learning_rate": 0.001, "loss": 2.1677, "step": 109424 }, { "epoch": 9.444444444444445, "grad_norm": 0.9102815985679626, "learning_rate": 0.001, "loss": 2.1849, "step": 109480 }, { "epoch": 9.44927536231884, "grad_norm": 1.1373631954193115, "learning_rate": 0.001, "loss": 2.1796, "step": 109536 }, { "epoch": 9.454106280193237, "grad_norm": 0.7736930847167969, "learning_rate": 0.001, "loss": 2.1781, "step": 109592 }, { "epoch": 9.458937198067632, "grad_norm": 1.9168815612792969, "learning_rate": 0.001, "loss": 2.1783, "step": 109648 }, { "epoch": 9.46376811594203, "grad_norm": 1.665312647819519, "learning_rate": 0.001, "loss": 2.1805, "step": 109704 }, { "epoch": 9.468599033816425, "grad_norm": 0.9396242499351501, "learning_rate": 0.001, "loss": 2.1662, "step": 109760 }, { "epoch": 9.473429951690822, "grad_norm": 0.7805250883102417, "learning_rate": 0.001, "loss": 2.1605, "step": 109816 }, { "epoch": 9.478260869565217, "grad_norm": 0.7406161427497864, "learning_rate": 0.001, "loss": 2.1566, "step": 109872 }, { "epoch": 9.483091787439614, "grad_norm": 1.0757853984832764, "learning_rate": 0.001, "loss": 2.1488, "step": 109928 }, { "epoch": 9.48792270531401, "grad_norm": 0.8147990107536316, "learning_rate": 0.001, "loss": 2.156, "step": 109984 }, { "epoch": 9.492753623188406, "grad_norm": 1.850212574005127, "learning_rate": 0.001, "loss": 2.1587, "step": 110040 }, { "epoch": 9.497584541062801, "grad_norm": 2.406399965286255, "learning_rate": 0.001, "loss": 2.1521, "step": 110096 }, { "epoch": 9.502415458937199, "grad_norm": 1.3661991357803345, "learning_rate": 0.001, "loss": 2.1549, "step": 110152 }, { "epoch": 9.507246376811594, "grad_norm": 53.109100341796875, "learning_rate": 0.001, "loss": 2.1556, "step": 110208 }, { "epoch": 9.51207729468599, "grad_norm": 1.399226427078247, "learning_rate": 0.001, "loss": 2.1653, "step": 110264 }, { "epoch": 9.516908212560386, "grad_norm": 0.9833760857582092, "learning_rate": 0.001, "loss": 2.1491, "step": 110320 }, { "epoch": 9.521739130434783, "grad_norm": 3.0959413051605225, "learning_rate": 0.001, "loss": 2.1497, "step": 110376 }, { "epoch": 9.526570048309178, "grad_norm": 1.8578912019729614, "learning_rate": 0.001, "loss": 2.1451, "step": 110432 }, { "epoch": 9.531400966183575, "grad_norm": 3.6915457248687744, "learning_rate": 0.001, "loss": 2.1632, "step": 110488 }, { "epoch": 9.53623188405797, "grad_norm": 0.5958594679832458, "learning_rate": 0.001, "loss": 2.1623, "step": 110544 }, { "epoch": 9.541062801932368, "grad_norm": 1.164693832397461, "learning_rate": 0.001, "loss": 2.1522, "step": 110600 }, { "epoch": 9.545893719806763, "grad_norm": 1.7256300449371338, "learning_rate": 0.001, "loss": 2.1501, "step": 110656 }, { "epoch": 9.55072463768116, "grad_norm": 1.6894583702087402, "learning_rate": 0.001, "loss": 2.1586, "step": 110712 }, { "epoch": 9.555555555555555, "grad_norm": 17.32471466064453, "learning_rate": 0.001, "loss": 2.1679, "step": 110768 }, { "epoch": 9.560386473429952, "grad_norm": 1.196189522743225, "learning_rate": 0.001, "loss": 2.17, "step": 110824 }, { "epoch": 9.565217391304348, "grad_norm": 1.9024502038955688, "learning_rate": 0.001, "loss": 2.152, "step": 110880 }, { "epoch": 9.570048309178745, "grad_norm": 1.2744781970977783, "learning_rate": 0.001, "loss": 2.1484, "step": 110936 }, { "epoch": 9.57487922705314, "grad_norm": 1.4014837741851807, "learning_rate": 0.001, "loss": 2.1663, "step": 110992 }, { "epoch": 9.579710144927537, "grad_norm": 1.0443902015686035, "learning_rate": 0.001, "loss": 2.1792, "step": 111048 }, { "epoch": 9.584541062801932, "grad_norm": 1.8221991062164307, "learning_rate": 0.001, "loss": 2.1664, "step": 111104 }, { "epoch": 9.58937198067633, "grad_norm": 0.8239999413490295, "learning_rate": 0.001, "loss": 2.1849, "step": 111160 }, { "epoch": 9.594202898550725, "grad_norm": 3.60684871673584, "learning_rate": 0.001, "loss": 2.1722, "step": 111216 }, { "epoch": 9.59903381642512, "grad_norm": 1.3424363136291504, "learning_rate": 0.001, "loss": 2.1686, "step": 111272 }, { "epoch": 9.603864734299517, "grad_norm": 0.8992599844932556, "learning_rate": 0.001, "loss": 2.1637, "step": 111328 }, { "epoch": 9.608695652173914, "grad_norm": 1.7839155197143555, "learning_rate": 0.001, "loss": 2.1564, "step": 111384 }, { "epoch": 9.61352657004831, "grad_norm": 2.079591751098633, "learning_rate": 0.001, "loss": 2.1632, "step": 111440 }, { "epoch": 9.618357487922705, "grad_norm": 1.8810837268829346, "learning_rate": 0.001, "loss": 2.1653, "step": 111496 }, { "epoch": 9.623188405797102, "grad_norm": 3.459331512451172, "learning_rate": 0.001, "loss": 2.1547, "step": 111552 }, { "epoch": 9.628019323671497, "grad_norm": 1.6049835681915283, "learning_rate": 0.001, "loss": 2.1696, "step": 111608 }, { "epoch": 9.632850241545894, "grad_norm": 1.325453758239746, "learning_rate": 0.001, "loss": 2.1868, "step": 111664 }, { "epoch": 9.63768115942029, "grad_norm": 4.009735107421875, "learning_rate": 0.001, "loss": 2.1699, "step": 111720 }, { "epoch": 9.642512077294686, "grad_norm": 1.6072872877120972, "learning_rate": 0.001, "loss": 2.166, "step": 111776 }, { "epoch": 9.647342995169081, "grad_norm": 1.1499840021133423, "learning_rate": 0.001, "loss": 2.1646, "step": 111832 }, { "epoch": 9.652173913043478, "grad_norm": 0.8730283975601196, "learning_rate": 0.001, "loss": 2.1686, "step": 111888 }, { "epoch": 9.657004830917874, "grad_norm": 1.9930415153503418, "learning_rate": 0.001, "loss": 2.1654, "step": 111944 }, { "epoch": 9.66183574879227, "grad_norm": 1.2306714057922363, "learning_rate": 0.001, "loss": 2.1788, "step": 112000 }, { "epoch": 9.666666666666666, "grad_norm": 2.412792444229126, "learning_rate": 0.001, "loss": 2.1812, "step": 112056 }, { "epoch": 9.671497584541063, "grad_norm": 1.0699572563171387, "learning_rate": 0.001, "loss": 2.1845, "step": 112112 }, { "epoch": 9.676328502415458, "grad_norm": 5.60946798324585, "learning_rate": 0.001, "loss": 2.1791, "step": 112168 }, { "epoch": 9.681159420289855, "grad_norm": 2.9559884071350098, "learning_rate": 0.001, "loss": 2.1961, "step": 112224 }, { "epoch": 9.68599033816425, "grad_norm": 1.124289631843567, "learning_rate": 0.001, "loss": 2.1916, "step": 112280 }, { "epoch": 9.690821256038648, "grad_norm": 1.33521568775177, "learning_rate": 0.001, "loss": 2.1817, "step": 112336 }, { "epoch": 9.695652173913043, "grad_norm": 1.4196842908859253, "learning_rate": 0.001, "loss": 2.1772, "step": 112392 }, { "epoch": 9.70048309178744, "grad_norm": 1.963876724243164, "learning_rate": 0.001, "loss": 2.1909, "step": 112448 }, { "epoch": 9.705314009661835, "grad_norm": 2.2582218647003174, "learning_rate": 0.001, "loss": 2.1939, "step": 112504 }, { "epoch": 9.710144927536232, "grad_norm": 2.133075714111328, "learning_rate": 0.001, "loss": 2.1901, "step": 112560 }, { "epoch": 9.714975845410628, "grad_norm": 0.8689550161361694, "learning_rate": 0.001, "loss": 2.1928, "step": 112616 }, { "epoch": 9.719806763285025, "grad_norm": 1.3315383195877075, "learning_rate": 0.001, "loss": 2.1973, "step": 112672 }, { "epoch": 9.72463768115942, "grad_norm": 1.694207787513733, "learning_rate": 0.001, "loss": 2.1817, "step": 112728 }, { "epoch": 9.729468599033817, "grad_norm": 0.88382488489151, "learning_rate": 0.001, "loss": 2.1899, "step": 112784 }, { "epoch": 9.734299516908212, "grad_norm": 1.7167845964431763, "learning_rate": 0.001, "loss": 2.2142, "step": 112840 }, { "epoch": 9.73913043478261, "grad_norm": 3.2816264629364014, "learning_rate": 0.001, "loss": 2.2034, "step": 112896 }, { "epoch": 9.743961352657005, "grad_norm": 9.500353813171387, "learning_rate": 0.001, "loss": 2.2049, "step": 112952 }, { "epoch": 9.748792270531402, "grad_norm": 14.029086112976074, "learning_rate": 0.001, "loss": 2.2004, "step": 113008 }, { "epoch": 9.753623188405797, "grad_norm": 3.5523064136505127, "learning_rate": 0.001, "loss": 2.1968, "step": 113064 }, { "epoch": 9.758454106280194, "grad_norm": 2.552189350128174, "learning_rate": 0.001, "loss": 2.1881, "step": 113120 }, { "epoch": 9.76328502415459, "grad_norm": 1.7633849382400513, "learning_rate": 0.001, "loss": 2.1924, "step": 113176 }, { "epoch": 9.768115942028986, "grad_norm": 1.5622775554656982, "learning_rate": 0.001, "loss": 2.1879, "step": 113232 }, { "epoch": 9.772946859903382, "grad_norm": 1.186415672302246, "learning_rate": 0.001, "loss": 2.1956, "step": 113288 }, { "epoch": 9.777777777777779, "grad_norm": 1.710774302482605, "learning_rate": 0.001, "loss": 2.1866, "step": 113344 }, { "epoch": 9.782608695652174, "grad_norm": 1.7137800455093384, "learning_rate": 0.001, "loss": 2.1871, "step": 113400 }, { "epoch": 9.78743961352657, "grad_norm": 1.5322470664978027, "learning_rate": 0.001, "loss": 2.1924, "step": 113456 }, { "epoch": 9.792270531400966, "grad_norm": 1.3550103902816772, "learning_rate": 0.001, "loss": 2.1964, "step": 113512 }, { "epoch": 9.797101449275363, "grad_norm": 1.6661609411239624, "learning_rate": 0.001, "loss": 2.1969, "step": 113568 }, { "epoch": 9.801932367149758, "grad_norm": 1.63509202003479, "learning_rate": 0.001, "loss": 2.1916, "step": 113624 }, { "epoch": 9.806763285024154, "grad_norm": 1.312102198600769, "learning_rate": 0.001, "loss": 2.2097, "step": 113680 }, { "epoch": 9.81159420289855, "grad_norm": 1.6421797275543213, "learning_rate": 0.001, "loss": 2.1935, "step": 113736 }, { "epoch": 9.816425120772946, "grad_norm": 1.615949273109436, "learning_rate": 0.001, "loss": 2.1851, "step": 113792 }, { "epoch": 9.821256038647343, "grad_norm": 3.91939115524292, "learning_rate": 0.001, "loss": 2.1883, "step": 113848 }, { "epoch": 9.826086956521738, "grad_norm": 1.369583249092102, "learning_rate": 0.001, "loss": 2.2042, "step": 113904 }, { "epoch": 9.830917874396135, "grad_norm": 3.2655386924743652, "learning_rate": 0.001, "loss": 2.1877, "step": 113960 }, { "epoch": 9.83574879227053, "grad_norm": 0.9581074714660645, "learning_rate": 0.001, "loss": 2.2027, "step": 114016 }, { "epoch": 9.840579710144928, "grad_norm": 3.2936856746673584, "learning_rate": 0.001, "loss": 2.1996, "step": 114072 }, { "epoch": 9.845410628019323, "grad_norm": 1.1268800497055054, "learning_rate": 0.001, "loss": 2.1959, "step": 114128 }, { "epoch": 9.85024154589372, "grad_norm": 1.1536705493927002, "learning_rate": 0.001, "loss": 2.1947, "step": 114184 }, { "epoch": 9.855072463768115, "grad_norm": 1.005270004272461, "learning_rate": 0.001, "loss": 2.1832, "step": 114240 }, { "epoch": 9.859903381642512, "grad_norm": 4.5433783531188965, "learning_rate": 0.001, "loss": 2.182, "step": 114296 }, { "epoch": 9.864734299516908, "grad_norm": 1.4915143251419067, "learning_rate": 0.001, "loss": 2.1697, "step": 114352 }, { "epoch": 9.869565217391305, "grad_norm": 1.7086212635040283, "learning_rate": 0.001, "loss": 2.1653, "step": 114408 }, { "epoch": 9.8743961352657, "grad_norm": 2.2827277183532715, "learning_rate": 0.001, "loss": 2.1818, "step": 114464 }, { "epoch": 9.879227053140097, "grad_norm": 2.942509412765503, "learning_rate": 0.001, "loss": 2.1755, "step": 114520 }, { "epoch": 9.884057971014492, "grad_norm": 3.590705633163452, "learning_rate": 0.001, "loss": 2.1699, "step": 114576 }, { "epoch": 9.88888888888889, "grad_norm": 9.507326126098633, "learning_rate": 0.001, "loss": 2.1685, "step": 114632 }, { "epoch": 9.893719806763285, "grad_norm": 1.8674633502960205, "learning_rate": 0.001, "loss": 2.1755, "step": 114688 }, { "epoch": 9.898550724637682, "grad_norm": 1.510111689567566, "learning_rate": 0.001, "loss": 2.1737, "step": 114744 }, { "epoch": 9.903381642512077, "grad_norm": 1.132345199584961, "learning_rate": 0.001, "loss": 2.1684, "step": 114800 }, { "epoch": 9.908212560386474, "grad_norm": 2.128943920135498, "learning_rate": 0.001, "loss": 2.1788, "step": 114856 }, { "epoch": 9.91304347826087, "grad_norm": 0.9413778781890869, "learning_rate": 0.001, "loss": 2.1952, "step": 114912 }, { "epoch": 9.917874396135266, "grad_norm": 1.1830946207046509, "learning_rate": 0.001, "loss": 2.1836, "step": 114968 }, { "epoch": 9.922705314009661, "grad_norm": 2.6084482669830322, "learning_rate": 0.001, "loss": 2.1742, "step": 115024 }, { "epoch": 9.927536231884059, "grad_norm": 2.902265787124634, "learning_rate": 0.001, "loss": 2.1742, "step": 115080 }, { "epoch": 9.932367149758454, "grad_norm": 0.7828874588012695, "learning_rate": 0.001, "loss": 2.1884, "step": 115136 }, { "epoch": 9.93719806763285, "grad_norm": 1.3071789741516113, "learning_rate": 0.001, "loss": 2.1708, "step": 115192 }, { "epoch": 9.942028985507246, "grad_norm": 2.4230844974517822, "learning_rate": 0.001, "loss": 2.1827, "step": 115248 }, { "epoch": 9.946859903381643, "grad_norm": 1.1827428340911865, "learning_rate": 0.001, "loss": 2.1866, "step": 115304 }, { "epoch": 9.951690821256038, "grad_norm": 0.6334431171417236, "learning_rate": 0.001, "loss": 2.1995, "step": 115360 }, { "epoch": 9.956521739130435, "grad_norm": 1.0589326620101929, "learning_rate": 0.001, "loss": 2.1958, "step": 115416 }, { "epoch": 9.96135265700483, "grad_norm": 0.9200308322906494, "learning_rate": 0.001, "loss": 2.1966, "step": 115472 }, { "epoch": 9.966183574879228, "grad_norm": 4.167257308959961, "learning_rate": 0.001, "loss": 2.2287, "step": 115528 }, { "epoch": 9.971014492753623, "grad_norm": 1.3077119588851929, "learning_rate": 0.001, "loss": 2.2365, "step": 115584 }, { "epoch": 9.97584541062802, "grad_norm": 1.1540191173553467, "learning_rate": 0.001, "loss": 2.2286, "step": 115640 }, { "epoch": 9.980676328502415, "grad_norm": 0.6693556308746338, "learning_rate": 0.001, "loss": 2.233, "step": 115696 }, { "epoch": 9.985507246376812, "grad_norm": 0.5801311135292053, "learning_rate": 0.001, "loss": 2.2333, "step": 115752 }, { "epoch": 9.990338164251208, "grad_norm": 2.2549819946289062, "learning_rate": 0.001, "loss": 2.2139, "step": 115808 }, { "epoch": 9.995169082125603, "grad_norm": 1.105181097984314, "learning_rate": 0.001, "loss": 2.1973, "step": 115864 }, { "epoch": 10.0, "grad_norm": 1.5114330053329468, "learning_rate": 0.001, "loss": 2.2032, "step": 115920 }, { "epoch": 10.004830917874395, "grad_norm": 5.178586483001709, "learning_rate": 0.001, "loss": 2.1612, "step": 115976 }, { "epoch": 10.009661835748792, "grad_norm": 1.6251518726348877, "learning_rate": 0.001, "loss": 2.1602, "step": 116032 }, { "epoch": 10.014492753623188, "grad_norm": 2.286675453186035, "learning_rate": 0.001, "loss": 2.1547, "step": 116088 }, { "epoch": 10.019323671497585, "grad_norm": 1.1216520071029663, "learning_rate": 0.001, "loss": 2.1655, "step": 116144 }, { "epoch": 10.02415458937198, "grad_norm": 2.1495821475982666, "learning_rate": 0.001, "loss": 2.1414, "step": 116200 }, { "epoch": 10.028985507246377, "grad_norm": 1.5574885606765747, "learning_rate": 0.001, "loss": 2.1341, "step": 116256 }, { "epoch": 10.033816425120772, "grad_norm": 1.5737286806106567, "learning_rate": 0.001, "loss": 2.1399, "step": 116312 }, { "epoch": 10.03864734299517, "grad_norm": 6.003429412841797, "learning_rate": 0.001, "loss": 2.135, "step": 116368 }, { "epoch": 10.043478260869565, "grad_norm": 1.8048330545425415, "learning_rate": 0.001, "loss": 2.1464, "step": 116424 }, { "epoch": 10.048309178743962, "grad_norm": 1.5536140203475952, "learning_rate": 0.001, "loss": 2.1513, "step": 116480 }, { "epoch": 10.053140096618357, "grad_norm": 7.6224775314331055, "learning_rate": 0.001, "loss": 2.1605, "step": 116536 }, { "epoch": 10.057971014492754, "grad_norm": 1.0521200895309448, "learning_rate": 0.001, "loss": 2.1657, "step": 116592 }, { "epoch": 10.06280193236715, "grad_norm": 1.350377082824707, "learning_rate": 0.001, "loss": 2.1852, "step": 116648 }, { "epoch": 10.067632850241546, "grad_norm": 1.0037842988967896, "learning_rate": 0.001, "loss": 2.178, "step": 116704 }, { "epoch": 10.072463768115941, "grad_norm": 0.8875685930252075, "learning_rate": 0.001, "loss": 2.172, "step": 116760 }, { "epoch": 10.077294685990339, "grad_norm": 4.938282012939453, "learning_rate": 0.001, "loss": 2.1573, "step": 116816 }, { "epoch": 10.082125603864734, "grad_norm": 0.5736998915672302, "learning_rate": 0.001, "loss": 2.1408, "step": 116872 }, { "epoch": 10.08695652173913, "grad_norm": 1.6928740739822388, "learning_rate": 0.001, "loss": 2.1426, "step": 116928 }, { "epoch": 10.091787439613526, "grad_norm": 0.7179147005081177, "learning_rate": 0.001, "loss": 2.1476, "step": 116984 }, { "epoch": 10.096618357487923, "grad_norm": 1.4036375284194946, "learning_rate": 0.001, "loss": 2.1505, "step": 117040 }, { "epoch": 10.101449275362318, "grad_norm": 1.5228289365768433, "learning_rate": 0.001, "loss": 2.1659, "step": 117096 }, { "epoch": 10.106280193236715, "grad_norm": 1.519679307937622, "learning_rate": 0.001, "loss": 2.1831, "step": 117152 }, { "epoch": 10.11111111111111, "grad_norm": 1.7719697952270508, "learning_rate": 0.001, "loss": 2.1725, "step": 117208 }, { "epoch": 10.115942028985508, "grad_norm": 1.4096169471740723, "learning_rate": 0.001, "loss": 2.1852, "step": 117264 }, { "epoch": 10.120772946859903, "grad_norm": 1.4333758354187012, "learning_rate": 0.001, "loss": 2.1574, "step": 117320 }, { "epoch": 10.1256038647343, "grad_norm": 2.2329275608062744, "learning_rate": 0.001, "loss": 2.1499, "step": 117376 }, { "epoch": 10.130434782608695, "grad_norm": 1.9216878414154053, "learning_rate": 0.001, "loss": 2.1551, "step": 117432 }, { "epoch": 10.135265700483092, "grad_norm": 1.291159987449646, "learning_rate": 0.001, "loss": 2.1671, "step": 117488 }, { "epoch": 10.140096618357488, "grad_norm": 3.5178771018981934, "learning_rate": 0.001, "loss": 2.1656, "step": 117544 }, { "epoch": 10.144927536231885, "grad_norm": 0.962126612663269, "learning_rate": 0.001, "loss": 2.1668, "step": 117600 }, { "epoch": 10.14975845410628, "grad_norm": 0.7964538335800171, "learning_rate": 0.001, "loss": 2.1575, "step": 117656 }, { "epoch": 10.154589371980677, "grad_norm": 0.5703120231628418, "learning_rate": 0.001, "loss": 2.1566, "step": 117712 }, { "epoch": 10.159420289855072, "grad_norm": 6.9353461265563965, "learning_rate": 0.001, "loss": 2.1644, "step": 117768 }, { "epoch": 10.16425120772947, "grad_norm": 2.117323875427246, "learning_rate": 0.001, "loss": 2.1638, "step": 117824 }, { "epoch": 10.169082125603865, "grad_norm": 1.7028167247772217, "learning_rate": 0.001, "loss": 2.1622, "step": 117880 }, { "epoch": 10.173913043478262, "grad_norm": 2.015174150466919, "learning_rate": 0.001, "loss": 2.1675, "step": 117936 }, { "epoch": 10.178743961352657, "grad_norm": 1.4474927186965942, "learning_rate": 0.001, "loss": 2.1773, "step": 117992 }, { "epoch": 10.183574879227054, "grad_norm": 1.9509433507919312, "learning_rate": 0.001, "loss": 2.1792, "step": 118048 }, { "epoch": 10.18840579710145, "grad_norm": 3.8931899070739746, "learning_rate": 0.001, "loss": 2.1739, "step": 118104 }, { "epoch": 10.193236714975846, "grad_norm": 11.294742584228516, "learning_rate": 0.001, "loss": 2.1798, "step": 118160 }, { "epoch": 10.198067632850242, "grad_norm": 1.359959363937378, "learning_rate": 0.001, "loss": 2.1741, "step": 118216 }, { "epoch": 10.202898550724637, "grad_norm": 1.4701234102249146, "learning_rate": 0.001, "loss": 2.1738, "step": 118272 }, { "epoch": 10.207729468599034, "grad_norm": 5.7130632400512695, "learning_rate": 0.001, "loss": 2.1667, "step": 118328 }, { "epoch": 10.21256038647343, "grad_norm": 11.105052947998047, "learning_rate": 0.001, "loss": 2.1774, "step": 118384 }, { "epoch": 10.217391304347826, "grad_norm": 1.362142562866211, "learning_rate": 0.001, "loss": 2.1781, "step": 118440 }, { "epoch": 10.222222222222221, "grad_norm": 1.7012754678726196, "learning_rate": 0.001, "loss": 2.1578, "step": 118496 }, { "epoch": 10.227053140096618, "grad_norm": 2.72023606300354, "learning_rate": 0.001, "loss": 2.1584, "step": 118552 }, { "epoch": 10.231884057971014, "grad_norm": 2.6579036712646484, "learning_rate": 0.001, "loss": 2.1582, "step": 118608 }, { "epoch": 10.23671497584541, "grad_norm": 3.486313581466675, "learning_rate": 0.001, "loss": 2.1577, "step": 118664 }, { "epoch": 10.241545893719806, "grad_norm": 4.33111572265625, "learning_rate": 0.001, "loss": 2.1522, "step": 118720 }, { "epoch": 10.246376811594203, "grad_norm": 0.9709329009056091, "learning_rate": 0.001, "loss": 2.158, "step": 118776 }, { "epoch": 10.251207729468598, "grad_norm": 1.9588534832000732, "learning_rate": 0.001, "loss": 2.1596, "step": 118832 }, { "epoch": 10.256038647342995, "grad_norm": 1.4229439496994019, "learning_rate": 0.001, "loss": 2.1604, "step": 118888 }, { "epoch": 10.26086956521739, "grad_norm": 1.3870952129364014, "learning_rate": 0.001, "loss": 2.1672, "step": 118944 }, { "epoch": 10.265700483091788, "grad_norm": 2.0013091564178467, "learning_rate": 0.001, "loss": 2.1833, "step": 119000 }, { "epoch": 10.270531400966183, "grad_norm": 1.0226377248764038, "learning_rate": 0.001, "loss": 2.1621, "step": 119056 }, { "epoch": 10.27536231884058, "grad_norm": 3.3339829444885254, "learning_rate": 0.001, "loss": 2.1581, "step": 119112 }, { "epoch": 10.280193236714975, "grad_norm": 1.468506097793579, "learning_rate": 0.001, "loss": 2.1621, "step": 119168 }, { "epoch": 10.285024154589372, "grad_norm": 1.8375539779663086, "learning_rate": 0.001, "loss": 2.1681, "step": 119224 }, { "epoch": 10.289855072463768, "grad_norm": 2.368908643722534, "learning_rate": 0.001, "loss": 2.1732, "step": 119280 }, { "epoch": 10.294685990338165, "grad_norm": 0.7699496746063232, "learning_rate": 0.001, "loss": 2.1618, "step": 119336 }, { "epoch": 10.29951690821256, "grad_norm": 5.445796012878418, "learning_rate": 0.001, "loss": 2.1587, "step": 119392 }, { "epoch": 10.304347826086957, "grad_norm": 1.6550136804580688, "learning_rate": 0.001, "loss": 2.1526, "step": 119448 }, { "epoch": 10.309178743961352, "grad_norm": 0.967065155506134, "learning_rate": 0.001, "loss": 2.1569, "step": 119504 }, { "epoch": 10.31400966183575, "grad_norm": 0.7379729151725769, "learning_rate": 0.001, "loss": 2.1524, "step": 119560 }, { "epoch": 10.318840579710145, "grad_norm": 1.800350308418274, "learning_rate": 0.001, "loss": 2.1544, "step": 119616 }, { "epoch": 10.323671497584542, "grad_norm": 1.2661592960357666, "learning_rate": 0.001, "loss": 2.1591, "step": 119672 }, { "epoch": 10.328502415458937, "grad_norm": 4.901576519012451, "learning_rate": 0.001, "loss": 2.1428, "step": 119728 }, { "epoch": 10.333333333333334, "grad_norm": 2.654900312423706, "learning_rate": 0.001, "loss": 2.145, "step": 119784 }, { "epoch": 10.33816425120773, "grad_norm": 2.5500903129577637, "learning_rate": 0.001, "loss": 2.1315, "step": 119840 }, { "epoch": 10.342995169082126, "grad_norm": 1.9111007452011108, "learning_rate": 0.001, "loss": 2.1436, "step": 119896 }, { "epoch": 10.347826086956522, "grad_norm": 1.2700903415679932, "learning_rate": 0.001, "loss": 2.1443, "step": 119952 }, { "epoch": 10.352657004830919, "grad_norm": 1.0220659971237183, "learning_rate": 0.001, "loss": 2.142, "step": 120008 }, { "epoch": 10.357487922705314, "grad_norm": 1.5118297338485718, "learning_rate": 0.001, "loss": 2.1379, "step": 120064 }, { "epoch": 10.36231884057971, "grad_norm": 0.7818297147750854, "learning_rate": 0.001, "loss": 2.1425, "step": 120120 }, { "epoch": 10.367149758454106, "grad_norm": 2.0427939891815186, "learning_rate": 0.001, "loss": 2.1497, "step": 120176 }, { "epoch": 10.371980676328503, "grad_norm": 3.5443224906921387, "learning_rate": 0.001, "loss": 2.1559, "step": 120232 }, { "epoch": 10.376811594202898, "grad_norm": 2.7562294006347656, "learning_rate": 0.001, "loss": 2.1672, "step": 120288 }, { "epoch": 10.381642512077295, "grad_norm": 1.0163748264312744, "learning_rate": 0.001, "loss": 2.1703, "step": 120344 }, { "epoch": 10.38647342995169, "grad_norm": 0.8418638706207275, "learning_rate": 0.001, "loss": 2.162, "step": 120400 }, { "epoch": 10.391304347826088, "grad_norm": 0.8190461993217468, "learning_rate": 0.001, "loss": 2.1607, "step": 120456 }, { "epoch": 10.396135265700483, "grad_norm": 1.17571222782135, "learning_rate": 0.001, "loss": 2.1501, "step": 120512 }, { "epoch": 10.40096618357488, "grad_norm": 1.0930935144424438, "learning_rate": 0.001, "loss": 2.1556, "step": 120568 }, { "epoch": 10.405797101449275, "grad_norm": 0.7186071276664734, "learning_rate": 0.001, "loss": 2.147, "step": 120624 }, { "epoch": 10.41062801932367, "grad_norm": 0.8140950798988342, "learning_rate": 0.001, "loss": 2.1433, "step": 120680 }, { "epoch": 10.415458937198068, "grad_norm": 1.202014684677124, "learning_rate": 0.001, "loss": 2.1453, "step": 120736 }, { "epoch": 10.420289855072463, "grad_norm": 0.7525999546051025, "learning_rate": 0.001, "loss": 2.1478, "step": 120792 }, { "epoch": 10.42512077294686, "grad_norm": 0.7609923481941223, "learning_rate": 0.001, "loss": 2.1499, "step": 120848 }, { "epoch": 10.429951690821255, "grad_norm": 5.396042823791504, "learning_rate": 0.001, "loss": 2.1476, "step": 120904 }, { "epoch": 10.434782608695652, "grad_norm": 1.032334804534912, "learning_rate": 0.001, "loss": 2.1488, "step": 120960 }, { "epoch": 10.439613526570048, "grad_norm": 1.0725252628326416, "learning_rate": 0.001, "loss": 2.1625, "step": 121016 }, { "epoch": 10.444444444444445, "grad_norm": 0.7636986374855042, "learning_rate": 0.001, "loss": 2.1707, "step": 121072 }, { "epoch": 10.44927536231884, "grad_norm": 1.3219259977340698, "learning_rate": 0.001, "loss": 2.1801, "step": 121128 }, { "epoch": 10.454106280193237, "grad_norm": 1.2190625667572021, "learning_rate": 0.001, "loss": 2.1805, "step": 121184 }, { "epoch": 10.458937198067632, "grad_norm": 2.133542776107788, "learning_rate": 0.001, "loss": 2.1935, "step": 121240 }, { "epoch": 10.46376811594203, "grad_norm": 6.187461853027344, "learning_rate": 0.001, "loss": 2.1962, "step": 121296 }, { "epoch": 10.468599033816425, "grad_norm": 1.1827586889266968, "learning_rate": 0.001, "loss": 2.1904, "step": 121352 }, { "epoch": 10.473429951690822, "grad_norm": 1.6734812259674072, "learning_rate": 0.001, "loss": 2.1576, "step": 121408 }, { "epoch": 10.478260869565217, "grad_norm": 1.8922410011291504, "learning_rate": 0.001, "loss": 2.1535, "step": 121464 }, { "epoch": 10.483091787439614, "grad_norm": 0.827451765537262, "learning_rate": 0.001, "loss": 2.1394, "step": 121520 }, { "epoch": 10.48792270531401, "grad_norm": 2.311612844467163, "learning_rate": 0.001, "loss": 2.1376, "step": 121576 }, { "epoch": 10.492753623188406, "grad_norm": 1.1984610557556152, "learning_rate": 0.001, "loss": 2.1345, "step": 121632 }, { "epoch": 10.497584541062801, "grad_norm": 1.046425223350525, "learning_rate": 0.001, "loss": 2.1359, "step": 121688 }, { "epoch": 10.502415458937199, "grad_norm": 3.223189115524292, "learning_rate": 0.001, "loss": 2.1565, "step": 121744 }, { "epoch": 10.507246376811594, "grad_norm": 1.6878868341445923, "learning_rate": 0.001, "loss": 2.1459, "step": 121800 }, { "epoch": 10.51207729468599, "grad_norm": 0.9725037813186646, "learning_rate": 0.001, "loss": 2.1424, "step": 121856 }, { "epoch": 10.516908212560386, "grad_norm": 0.5949786305427551, "learning_rate": 0.001, "loss": 2.1485, "step": 121912 }, { "epoch": 10.521739130434783, "grad_norm": 1.2117705345153809, "learning_rate": 0.001, "loss": 2.1444, "step": 121968 }, { "epoch": 10.526570048309178, "grad_norm": 1.1697850227355957, "learning_rate": 0.001, "loss": 2.1415, "step": 122024 }, { "epoch": 10.531400966183575, "grad_norm": 2.1525418758392334, "learning_rate": 0.001, "loss": 2.1492, "step": 122080 }, { "epoch": 10.53623188405797, "grad_norm": 5.725024700164795, "learning_rate": 0.001, "loss": 2.1523, "step": 122136 }, { "epoch": 10.541062801932368, "grad_norm": 2.6298274993896484, "learning_rate": 0.001, "loss": 2.1799, "step": 122192 }, { "epoch": 10.545893719806763, "grad_norm": 2.074086904525757, "learning_rate": 0.001, "loss": 2.1718, "step": 122248 }, { "epoch": 10.55072463768116, "grad_norm": 2.927433967590332, "learning_rate": 0.001, "loss": 2.1838, "step": 122304 }, { "epoch": 10.555555555555555, "grad_norm": 2.6468324661254883, "learning_rate": 0.001, "loss": 2.1839, "step": 122360 }, { "epoch": 10.560386473429952, "grad_norm": 3.347214698791504, "learning_rate": 0.001, "loss": 2.1812, "step": 122416 }, { "epoch": 10.565217391304348, "grad_norm": 1.0131034851074219, "learning_rate": 0.001, "loss": 2.1923, "step": 122472 }, { "epoch": 10.570048309178745, "grad_norm": 1.5336928367614746, "learning_rate": 0.001, "loss": 2.1805, "step": 122528 }, { "epoch": 10.57487922705314, "grad_norm": 5.579852104187012, "learning_rate": 0.001, "loss": 2.1784, "step": 122584 }, { "epoch": 10.579710144927537, "grad_norm": 3.902099609375, "learning_rate": 0.001, "loss": 2.1826, "step": 122640 }, { "epoch": 10.584541062801932, "grad_norm": 1.4280476570129395, "learning_rate": 0.001, "loss": 2.179, "step": 122696 }, { "epoch": 10.58937198067633, "grad_norm": 2.199371814727783, "learning_rate": 0.001, "loss": 2.1884, "step": 122752 }, { "epoch": 10.594202898550725, "grad_norm": 1.8500422239303589, "learning_rate": 0.001, "loss": 2.1975, "step": 122808 }, { "epoch": 10.59903381642512, "grad_norm": 1.2162278890609741, "learning_rate": 0.001, "loss": 2.1831, "step": 122864 }, { "epoch": 10.603864734299517, "grad_norm": 1.2840019464492798, "learning_rate": 0.001, "loss": 2.1973, "step": 122920 }, { "epoch": 10.608695652173914, "grad_norm": 1.730076789855957, "learning_rate": 0.001, "loss": 2.2062, "step": 122976 }, { "epoch": 10.61352657004831, "grad_norm": 1.1242142915725708, "learning_rate": 0.001, "loss": 2.1959, "step": 123032 }, { "epoch": 10.618357487922705, "grad_norm": 3.820575714111328, "learning_rate": 0.001, "loss": 2.1967, "step": 123088 }, { "epoch": 10.623188405797102, "grad_norm": 2.3786606788635254, "learning_rate": 0.001, "loss": 2.1959, "step": 123144 }, { "epoch": 10.628019323671497, "grad_norm": 4.595869541168213, "learning_rate": 0.001, "loss": 2.1948, "step": 123200 }, { "epoch": 10.632850241545894, "grad_norm": 1.2908978462219238, "learning_rate": 0.001, "loss": 2.166, "step": 123256 }, { "epoch": 10.63768115942029, "grad_norm": 2.2055788040161133, "learning_rate": 0.001, "loss": 2.1674, "step": 123312 }, { "epoch": 10.642512077294686, "grad_norm": 1.5530894994735718, "learning_rate": 0.001, "loss": 2.1765, "step": 123368 }, { "epoch": 10.647342995169081, "grad_norm": 2.4489030838012695, "learning_rate": 0.001, "loss": 2.1696, "step": 123424 }, { "epoch": 10.652173913043478, "grad_norm": 1.9906662702560425, "learning_rate": 0.001, "loss": 2.1919, "step": 123480 }, { "epoch": 10.657004830917874, "grad_norm": 2.92899489402771, "learning_rate": 0.001, "loss": 2.1816, "step": 123536 }, { "epoch": 10.66183574879227, "grad_norm": 1.9007189273834229, "learning_rate": 0.001, "loss": 2.2027, "step": 123592 }, { "epoch": 10.666666666666666, "grad_norm": 1.301443338394165, "learning_rate": 0.001, "loss": 2.2062, "step": 123648 }, { "epoch": 10.671497584541063, "grad_norm": 1.3572661876678467, "learning_rate": 0.001, "loss": 2.2066, "step": 123704 }, { "epoch": 10.676328502415458, "grad_norm": 3.151829242706299, "learning_rate": 0.001, "loss": 2.2135, "step": 123760 }, { "epoch": 10.681159420289855, "grad_norm": 6.817243576049805, "learning_rate": 0.001, "loss": 2.2025, "step": 123816 }, { "epoch": 10.68599033816425, "grad_norm": 2.553074598312378, "learning_rate": 0.001, "loss": 2.1885, "step": 123872 }, { "epoch": 10.690821256038648, "grad_norm": 7.715940952301025, "learning_rate": 0.001, "loss": 2.1763, "step": 123928 }, { "epoch": 10.695652173913043, "grad_norm": 1.651444911956787, "learning_rate": 0.001, "loss": 2.173, "step": 123984 }, { "epoch": 10.70048309178744, "grad_norm": 1.3205150365829468, "learning_rate": 0.001, "loss": 2.1886, "step": 124040 }, { "epoch": 10.705314009661835, "grad_norm": 1.0851335525512695, "learning_rate": 0.001, "loss": 2.1901, "step": 124096 }, { "epoch": 10.710144927536232, "grad_norm": 1.4675530195236206, "learning_rate": 0.001, "loss": 2.1846, "step": 124152 }, { "epoch": 10.714975845410628, "grad_norm": 1.7252501249313354, "learning_rate": 0.001, "loss": 2.1878, "step": 124208 }, { "epoch": 10.719806763285025, "grad_norm": 2.2222039699554443, "learning_rate": 0.001, "loss": 2.1849, "step": 124264 }, { "epoch": 10.72463768115942, "grad_norm": 4.243847370147705, "learning_rate": 0.001, "loss": 2.1787, "step": 124320 }, { "epoch": 10.729468599033817, "grad_norm": 146.34423828125, "learning_rate": 0.001, "loss": 2.1765, "step": 124376 }, { "epoch": 10.734299516908212, "grad_norm": 6.666025638580322, "learning_rate": 0.001, "loss": 2.1898, "step": 124432 }, { "epoch": 10.73913043478261, "grad_norm": 1.2949142456054688, "learning_rate": 0.001, "loss": 2.1871, "step": 124488 }, { "epoch": 10.743961352657005, "grad_norm": 2.8519201278686523, "learning_rate": 0.001, "loss": 2.1809, "step": 124544 }, { "epoch": 10.748792270531402, "grad_norm": 2.52215576171875, "learning_rate": 0.001, "loss": 2.1885, "step": 124600 }, { "epoch": 10.753623188405797, "grad_norm": 1.3800928592681885, "learning_rate": 0.001, "loss": 2.1792, "step": 124656 }, { "epoch": 10.758454106280194, "grad_norm": 1.5619258880615234, "learning_rate": 0.001, "loss": 2.1761, "step": 124712 }, { "epoch": 10.76328502415459, "grad_norm": 5.771429061889648, "learning_rate": 0.001, "loss": 2.1715, "step": 124768 }, { "epoch": 10.768115942028986, "grad_norm": 2.991969108581543, "learning_rate": 0.001, "loss": 2.1681, "step": 124824 }, { "epoch": 10.772946859903382, "grad_norm": 7.448109149932861, "learning_rate": 0.001, "loss": 2.1793, "step": 124880 }, { "epoch": 10.777777777777779, "grad_norm": 2.432189464569092, "learning_rate": 0.001, "loss": 2.2094, "step": 124936 }, { "epoch": 10.782608695652174, "grad_norm": 1.4482777118682861, "learning_rate": 0.001, "loss": 2.1856, "step": 124992 }, { "epoch": 10.78743961352657, "grad_norm": 1.8974546194076538, "learning_rate": 0.001, "loss": 2.1959, "step": 125048 }, { "epoch": 10.792270531400966, "grad_norm": 3.711042881011963, "learning_rate": 0.001, "loss": 2.1835, "step": 125104 }, { "epoch": 10.797101449275363, "grad_norm": 0.7546063661575317, "learning_rate": 0.001, "loss": 2.1694, "step": 125160 }, { "epoch": 10.801932367149758, "grad_norm": 1.4827537536621094, "learning_rate": 0.001, "loss": 2.163, "step": 125216 }, { "epoch": 10.806763285024154, "grad_norm": 1.6204676628112793, "learning_rate": 0.001, "loss": 2.1715, "step": 125272 }, { "epoch": 10.81159420289855, "grad_norm": 3.52702260017395, "learning_rate": 0.001, "loss": 2.1776, "step": 125328 }, { "epoch": 10.816425120772946, "grad_norm": 1.9293352365493774, "learning_rate": 0.001, "loss": 2.1799, "step": 125384 }, { "epoch": 10.821256038647343, "grad_norm": 3.903444528579712, "learning_rate": 0.001, "loss": 2.1913, "step": 125440 }, { "epoch": 10.826086956521738, "grad_norm": 1.6531853675842285, "learning_rate": 0.001, "loss": 2.1973, "step": 125496 }, { "epoch": 10.830917874396135, "grad_norm": 1.8802529573440552, "learning_rate": 0.001, "loss": 2.2041, "step": 125552 }, { "epoch": 10.83574879227053, "grad_norm": 4.121514797210693, "learning_rate": 0.001, "loss": 2.18, "step": 125608 }, { "epoch": 10.840579710144928, "grad_norm": 1.433053731918335, "learning_rate": 0.001, "loss": 2.1811, "step": 125664 }, { "epoch": 10.845410628019323, "grad_norm": 1.7132673263549805, "learning_rate": 0.001, "loss": 2.1901, "step": 125720 }, { "epoch": 10.85024154589372, "grad_norm": 3.892742395401001, "learning_rate": 0.001, "loss": 2.1978, "step": 125776 }, { "epoch": 10.855072463768115, "grad_norm": 1.0258229970932007, "learning_rate": 0.001, "loss": 2.1856, "step": 125832 }, { "epoch": 10.859903381642512, "grad_norm": 1.0017156600952148, "learning_rate": 0.001, "loss": 2.1848, "step": 125888 }, { "epoch": 10.864734299516908, "grad_norm": 4.105360507965088, "learning_rate": 0.001, "loss": 2.1838, "step": 125944 }, { "epoch": 10.869565217391305, "grad_norm": 1.692204236984253, "learning_rate": 0.001, "loss": 2.1758, "step": 126000 }, { "epoch": 10.8743961352657, "grad_norm": 1.0897178649902344, "learning_rate": 0.001, "loss": 2.175, "step": 126056 }, { "epoch": 10.879227053140097, "grad_norm": 1.420247197151184, "learning_rate": 0.001, "loss": 2.1745, "step": 126112 }, { "epoch": 10.884057971014492, "grad_norm": 0.9701505303382874, "learning_rate": 0.001, "loss": 2.1654, "step": 126168 }, { "epoch": 10.88888888888889, "grad_norm": 1.5234839916229248, "learning_rate": 0.001, "loss": 2.1707, "step": 126224 }, { "epoch": 10.893719806763285, "grad_norm": 1.4400521516799927, "learning_rate": 0.001, "loss": 2.1791, "step": 126280 }, { "epoch": 10.898550724637682, "grad_norm": 1.0738338232040405, "learning_rate": 0.001, "loss": 2.1646, "step": 126336 }, { "epoch": 10.903381642512077, "grad_norm": 2.5842297077178955, "learning_rate": 0.001, "loss": 2.1558, "step": 126392 }, { "epoch": 10.908212560386474, "grad_norm": 1.2197184562683105, "learning_rate": 0.001, "loss": 2.151, "step": 126448 }, { "epoch": 10.91304347826087, "grad_norm": 5.161417484283447, "learning_rate": 0.001, "loss": 2.1601, "step": 126504 }, { "epoch": 10.917874396135266, "grad_norm": 3.266233205795288, "learning_rate": 0.001, "loss": 2.1568, "step": 126560 }, { "epoch": 10.922705314009661, "grad_norm": 1.3476234674453735, "learning_rate": 0.001, "loss": 2.1645, "step": 126616 }, { "epoch": 10.927536231884059, "grad_norm": 2.2833454608917236, "learning_rate": 0.001, "loss": 2.1716, "step": 126672 }, { "epoch": 10.932367149758454, "grad_norm": 1.0055052042007446, "learning_rate": 0.001, "loss": 2.1674, "step": 126728 }, { "epoch": 10.93719806763285, "grad_norm": 1.4048173427581787, "learning_rate": 0.001, "loss": 2.164, "step": 126784 }, { "epoch": 10.942028985507246, "grad_norm": 1.6197589635849, "learning_rate": 0.001, "loss": 2.1719, "step": 126840 }, { "epoch": 10.946859903381643, "grad_norm": 1.6318053007125854, "learning_rate": 0.001, "loss": 2.1755, "step": 126896 }, { "epoch": 10.951690821256038, "grad_norm": 3.1759567260742188, "learning_rate": 0.001, "loss": 2.1653, "step": 126952 }, { "epoch": 10.956521739130435, "grad_norm": 2.743807554244995, "learning_rate": 0.001, "loss": 2.165, "step": 127008 }, { "epoch": 10.96135265700483, "grad_norm": 1.2281099557876587, "learning_rate": 0.001, "loss": 2.1625, "step": 127064 }, { "epoch": 10.966183574879228, "grad_norm": 1.0311485528945923, "learning_rate": 0.001, "loss": 2.1712, "step": 127120 }, { "epoch": 10.971014492753623, "grad_norm": 7.506829261779785, "learning_rate": 0.001, "loss": 2.1828, "step": 127176 }, { "epoch": 10.97584541062802, "grad_norm": 1.3829762935638428, "learning_rate": 0.001, "loss": 2.1875, "step": 127232 }, { "epoch": 10.980676328502415, "grad_norm": 1.8979687690734863, "learning_rate": 0.001, "loss": 2.1774, "step": 127288 }, { "epoch": 10.985507246376812, "grad_norm": 6.267608165740967, "learning_rate": 0.001, "loss": 2.1829, "step": 127344 }, { "epoch": 10.990338164251208, "grad_norm": 1.2983322143554688, "learning_rate": 0.001, "loss": 2.1832, "step": 127400 }, { "epoch": 10.995169082125603, "grad_norm": 1.2553904056549072, "learning_rate": 0.001, "loss": 2.1818, "step": 127456 }, { "epoch": 11.0, "grad_norm": 2.4120144844055176, "learning_rate": 0.001, "loss": 2.182, "step": 127512 }, { "epoch": 11.004830917874395, "grad_norm": 1.2111910581588745, "learning_rate": 0.001, "loss": 2.1361, "step": 127568 }, { "epoch": 11.009661835748792, "grad_norm": 1.063620686531067, "learning_rate": 0.001, "loss": 2.1304, "step": 127624 }, { "epoch": 11.014492753623188, "grad_norm": 1.7111425399780273, "learning_rate": 0.001, "loss": 2.1328, "step": 127680 }, { "epoch": 11.019323671497585, "grad_norm": 1.431949257850647, "learning_rate": 0.001, "loss": 2.139, "step": 127736 }, { "epoch": 11.02415458937198, "grad_norm": 1.9574198722839355, "learning_rate": 0.001, "loss": 2.1329, "step": 127792 }, { "epoch": 11.028985507246377, "grad_norm": 1.8330541849136353, "learning_rate": 0.001, "loss": 2.1608, "step": 127848 }, { "epoch": 11.033816425120772, "grad_norm": 1.199495792388916, "learning_rate": 0.001, "loss": 2.1735, "step": 127904 }, { "epoch": 11.03864734299517, "grad_norm": 5.455221652984619, "learning_rate": 0.001, "loss": 2.1544, "step": 127960 }, { "epoch": 11.043478260869565, "grad_norm": 1.5906717777252197, "learning_rate": 0.001, "loss": 2.1561, "step": 128016 }, { "epoch": 11.048309178743962, "grad_norm": 4.229872703552246, "learning_rate": 0.001, "loss": 2.158, "step": 128072 }, { "epoch": 11.053140096618357, "grad_norm": 2.4469618797302246, "learning_rate": 0.001, "loss": 2.1599, "step": 128128 }, { "epoch": 11.057971014492754, "grad_norm": 1.4909590482711792, "learning_rate": 0.001, "loss": 2.1614, "step": 128184 }, { "epoch": 11.06280193236715, "grad_norm": 1.8766353130340576, "learning_rate": 0.001, "loss": 2.1615, "step": 128240 }, { "epoch": 11.067632850241546, "grad_norm": 2.002718687057495, "learning_rate": 0.001, "loss": 2.1544, "step": 128296 }, { "epoch": 11.072463768115941, "grad_norm": 2.9536755084991455, "learning_rate": 0.001, "loss": 2.1572, "step": 128352 }, { "epoch": 11.077294685990339, "grad_norm": 7.31076192855835, "learning_rate": 0.001, "loss": 2.1772, "step": 128408 }, { "epoch": 11.082125603864734, "grad_norm": 3.5101513862609863, "learning_rate": 0.001, "loss": 2.1876, "step": 128464 }, { "epoch": 11.08695652173913, "grad_norm": 7.660124778747559, "learning_rate": 0.001, "loss": 2.1807, "step": 128520 }, { "epoch": 11.091787439613526, "grad_norm": 2.4206292629241943, "learning_rate": 0.001, "loss": 2.1796, "step": 128576 }, { "epoch": 11.096618357487923, "grad_norm": 2.630587100982666, "learning_rate": 0.001, "loss": 2.1876, "step": 128632 }, { "epoch": 11.101449275362318, "grad_norm": 6.899549961090088, "learning_rate": 0.001, "loss": 2.2005, "step": 128688 }, { "epoch": 11.106280193236715, "grad_norm": 13.317012786865234, "learning_rate": 0.001, "loss": 2.2191, "step": 128744 }, { "epoch": 11.11111111111111, "grad_norm": 2.4693360328674316, "learning_rate": 0.001, "loss": 2.2013, "step": 128800 }, { "epoch": 11.115942028985508, "grad_norm": 1.7507514953613281, "learning_rate": 0.001, "loss": 2.1827, "step": 128856 }, { "epoch": 11.120772946859903, "grad_norm": 25.85240364074707, "learning_rate": 0.001, "loss": 2.1829, "step": 128912 }, { "epoch": 11.1256038647343, "grad_norm": 4.992105960845947, "learning_rate": 0.001, "loss": 2.1848, "step": 128968 }, { "epoch": 11.130434782608695, "grad_norm": 1.643103003501892, "learning_rate": 0.001, "loss": 2.1838, "step": 129024 }, { "epoch": 11.135265700483092, "grad_norm": 1.097631812095642, "learning_rate": 0.001, "loss": 2.1954, "step": 129080 }, { "epoch": 11.140096618357488, "grad_norm": 2.4652206897735596, "learning_rate": 0.001, "loss": 2.1855, "step": 129136 }, { "epoch": 11.144927536231885, "grad_norm": 1.1412129402160645, "learning_rate": 0.001, "loss": 2.1792, "step": 129192 }, { "epoch": 11.14975845410628, "grad_norm": 1.0345125198364258, "learning_rate": 0.001, "loss": 2.1869, "step": 129248 }, { "epoch": 11.154589371980677, "grad_norm": 1.363893747329712, "learning_rate": 0.001, "loss": 2.18, "step": 129304 }, { "epoch": 11.159420289855072, "grad_norm": 0.6385408043861389, "learning_rate": 0.001, "loss": 2.1717, "step": 129360 }, { "epoch": 11.16425120772947, "grad_norm": 2.4874866008758545, "learning_rate": 0.001, "loss": 2.1684, "step": 129416 }, { "epoch": 11.169082125603865, "grad_norm": 4.056140422821045, "learning_rate": 0.001, "loss": 2.1568, "step": 129472 }, { "epoch": 11.173913043478262, "grad_norm": 0.8279305696487427, "learning_rate": 0.001, "loss": 2.1596, "step": 129528 }, { "epoch": 11.178743961352657, "grad_norm": 1.3949216604232788, "learning_rate": 0.001, "loss": 2.1551, "step": 129584 }, { "epoch": 11.183574879227054, "grad_norm": 1.515504002571106, "learning_rate": 0.001, "loss": 2.1448, "step": 129640 }, { "epoch": 11.18840579710145, "grad_norm": 1.1438316106796265, "learning_rate": 0.001, "loss": 2.1436, "step": 129696 }, { "epoch": 11.193236714975846, "grad_norm": 0.8959769606590271, "learning_rate": 0.001, "loss": 2.1386, "step": 129752 }, { "epoch": 11.198067632850242, "grad_norm": 1.9335591793060303, "learning_rate": 0.001, "loss": 2.131, "step": 129808 }, { "epoch": 11.202898550724637, "grad_norm": 1.6763603687286377, "learning_rate": 0.001, "loss": 2.1403, "step": 129864 }, { "epoch": 11.207729468599034, "grad_norm": 0.9397940635681152, "learning_rate": 0.001, "loss": 2.1603, "step": 129920 }, { "epoch": 11.21256038647343, "grad_norm": 2.0035507678985596, "learning_rate": 0.001, "loss": 2.159, "step": 129976 }, { "epoch": 11.217391304347826, "grad_norm": 4.153164863586426, "learning_rate": 0.001, "loss": 2.1718, "step": 130032 }, { "epoch": 11.222222222222221, "grad_norm": 2.4249260425567627, "learning_rate": 0.001, "loss": 2.1789, "step": 130088 }, { "epoch": 11.227053140096618, "grad_norm": 1.4497575759887695, "learning_rate": 0.001, "loss": 2.1619, "step": 130144 }, { "epoch": 11.231884057971014, "grad_norm": 1.4982216358184814, "learning_rate": 0.001, "loss": 2.1423, "step": 130200 }, { "epoch": 11.23671497584541, "grad_norm": 1.3686857223510742, "learning_rate": 0.001, "loss": 2.1365, "step": 130256 }, { "epoch": 11.241545893719806, "grad_norm": 2.336411952972412, "learning_rate": 0.001, "loss": 2.1395, "step": 130312 }, { "epoch": 11.246376811594203, "grad_norm": 2.833037853240967, "learning_rate": 0.001, "loss": 2.144, "step": 130368 }, { "epoch": 11.251207729468598, "grad_norm": 1.3380255699157715, "learning_rate": 0.001, "loss": 2.1444, "step": 130424 }, { "epoch": 11.256038647342995, "grad_norm": 6.243875980377197, "learning_rate": 0.001, "loss": 2.1557, "step": 130480 }, { "epoch": 11.26086956521739, "grad_norm": 0.9205509424209595, "learning_rate": 0.001, "loss": 2.1409, "step": 130536 }, { "epoch": 11.265700483091788, "grad_norm": 1.081005573272705, "learning_rate": 0.001, "loss": 2.1498, "step": 130592 }, { "epoch": 11.270531400966183, "grad_norm": 2.0074262619018555, "learning_rate": 0.001, "loss": 2.1494, "step": 130648 }, { "epoch": 11.27536231884058, "grad_norm": 6.902637004852295, "learning_rate": 0.001, "loss": 2.1601, "step": 130704 }, { "epoch": 11.280193236714975, "grad_norm": 1.7552622556686401, "learning_rate": 0.001, "loss": 2.1705, "step": 130760 }, { "epoch": 11.285024154589372, "grad_norm": 1.0672425031661987, "learning_rate": 0.001, "loss": 2.1923, "step": 130816 }, { "epoch": 11.289855072463768, "grad_norm": 1.2315629720687866, "learning_rate": 0.001, "loss": 2.1814, "step": 130872 }, { "epoch": 11.294685990338165, "grad_norm": 2.650214195251465, "learning_rate": 0.001, "loss": 2.166, "step": 130928 }, { "epoch": 11.29951690821256, "grad_norm": 1.0113489627838135, "learning_rate": 0.001, "loss": 2.1577, "step": 130984 }, { "epoch": 11.304347826086957, "grad_norm": 0.7434856295585632, "learning_rate": 0.001, "loss": 2.1523, "step": 131040 }, { "epoch": 11.309178743961352, "grad_norm": 2.6903069019317627, "learning_rate": 0.001, "loss": 2.145, "step": 131096 }, { "epoch": 11.31400966183575, "grad_norm": 0.7639374136924744, "learning_rate": 0.001, "loss": 2.1603, "step": 131152 }, { "epoch": 11.318840579710145, "grad_norm": 7.009921073913574, "learning_rate": 0.001, "loss": 2.1613, "step": 131208 }, { "epoch": 11.323671497584542, "grad_norm": 1.8917431831359863, "learning_rate": 0.001, "loss": 2.1763, "step": 131264 }, { "epoch": 11.328502415458937, "grad_norm": 2.3466415405273438, "learning_rate": 0.001, "loss": 2.1955, "step": 131320 }, { "epoch": 11.333333333333334, "grad_norm": 1.9311261177062988, "learning_rate": 0.001, "loss": 2.192, "step": 131376 }, { "epoch": 11.33816425120773, "grad_norm": 3.4147510528564453, "learning_rate": 0.001, "loss": 2.1842, "step": 131432 }, { "epoch": 11.342995169082126, "grad_norm": 2.3967483043670654, "learning_rate": 0.001, "loss": 2.1629, "step": 131488 }, { "epoch": 11.347826086956522, "grad_norm": 1.3805783987045288, "learning_rate": 0.001, "loss": 2.1735, "step": 131544 }, { "epoch": 11.352657004830919, "grad_norm": 1.4286952018737793, "learning_rate": 0.001, "loss": 2.166, "step": 131600 }, { "epoch": 11.357487922705314, "grad_norm": 3.0122897624969482, "learning_rate": 0.001, "loss": 2.1708, "step": 131656 }, { "epoch": 11.36231884057971, "grad_norm": 2.378324031829834, "learning_rate": 0.001, "loss": 2.165, "step": 131712 }, { "epoch": 11.367149758454106, "grad_norm": 1.1295639276504517, "learning_rate": 0.001, "loss": 2.1554, "step": 131768 }, { "epoch": 11.371980676328503, "grad_norm": 7.065188884735107, "learning_rate": 0.001, "loss": 2.1571, "step": 131824 }, { "epoch": 11.376811594202898, "grad_norm": 3.6286349296569824, "learning_rate": 0.001, "loss": 2.163, "step": 131880 }, { "epoch": 11.381642512077295, "grad_norm": 0.7777979373931885, "learning_rate": 0.001, "loss": 2.1649, "step": 131936 }, { "epoch": 11.38647342995169, "grad_norm": 1.3991354703903198, "learning_rate": 0.001, "loss": 2.1506, "step": 131992 }, { "epoch": 11.391304347826088, "grad_norm": 2.2683894634246826, "learning_rate": 0.001, "loss": 2.1498, "step": 132048 }, { "epoch": 11.396135265700483, "grad_norm": 2.3293395042419434, "learning_rate": 0.001, "loss": 2.1575, "step": 132104 }, { "epoch": 11.40096618357488, "grad_norm": 0.8002650737762451, "learning_rate": 0.001, "loss": 2.1556, "step": 132160 }, { "epoch": 11.405797101449275, "grad_norm": 1.4381272792816162, "learning_rate": 0.001, "loss": 2.1537, "step": 132216 }, { "epoch": 11.41062801932367, "grad_norm": 5.938973426818848, "learning_rate": 0.001, "loss": 2.1626, "step": 132272 }, { "epoch": 11.415458937198068, "grad_norm": 24.3090763092041, "learning_rate": 0.001, "loss": 2.1774, "step": 132328 }, { "epoch": 11.420289855072463, "grad_norm": 2.8144443035125732, "learning_rate": 0.001, "loss": 2.1803, "step": 132384 }, { "epoch": 11.42512077294686, "grad_norm": 1.408156394958496, "learning_rate": 0.001, "loss": 2.1812, "step": 132440 }, { "epoch": 11.429951690821255, "grad_norm": 1.4469841718673706, "learning_rate": 0.001, "loss": 2.1893, "step": 132496 }, { "epoch": 11.434782608695652, "grad_norm": 1.8647303581237793, "learning_rate": 0.001, "loss": 2.1821, "step": 132552 }, { "epoch": 11.439613526570048, "grad_norm": 1.9917547702789307, "learning_rate": 0.001, "loss": 2.1693, "step": 132608 }, { "epoch": 11.444444444444445, "grad_norm": 1.5972574949264526, "learning_rate": 0.001, "loss": 2.166, "step": 132664 }, { "epoch": 11.44927536231884, "grad_norm": 1.7513078451156616, "learning_rate": 0.001, "loss": 2.1541, "step": 132720 }, { "epoch": 11.454106280193237, "grad_norm": 2.1618237495422363, "learning_rate": 0.001, "loss": 2.1541, "step": 132776 }, { "epoch": 11.458937198067632, "grad_norm": 1.2123358249664307, "learning_rate": 0.001, "loss": 2.1688, "step": 132832 }, { "epoch": 11.46376811594203, "grad_norm": 3.722198486328125, "learning_rate": 0.001, "loss": 2.176, "step": 132888 }, { "epoch": 11.468599033816425, "grad_norm": 2.611532688140869, "learning_rate": 0.001, "loss": 2.1826, "step": 132944 }, { "epoch": 11.473429951690822, "grad_norm": 1.7881027460098267, "learning_rate": 0.001, "loss": 2.1585, "step": 133000 }, { "epoch": 11.478260869565217, "grad_norm": 1.67707097530365, "learning_rate": 0.001, "loss": 2.153, "step": 133056 }, { "epoch": 11.483091787439614, "grad_norm": 1.211462378501892, "learning_rate": 0.001, "loss": 2.1596, "step": 133112 }, { "epoch": 11.48792270531401, "grad_norm": 3.2668378353118896, "learning_rate": 0.001, "loss": 2.1522, "step": 133168 }, { "epoch": 11.492753623188406, "grad_norm": 2.447831630706787, "learning_rate": 0.001, "loss": 2.1499, "step": 133224 }, { "epoch": 11.497584541062801, "grad_norm": 1.2445319890975952, "learning_rate": 0.001, "loss": 2.1625, "step": 133280 }, { "epoch": 11.502415458937199, "grad_norm": 0.9578527808189392, "learning_rate": 0.001, "loss": 2.1486, "step": 133336 }, { "epoch": 11.507246376811594, "grad_norm": 0.7497963309288025, "learning_rate": 0.001, "loss": 2.1518, "step": 133392 }, { "epoch": 11.51207729468599, "grad_norm": 1.878481388092041, "learning_rate": 0.001, "loss": 2.1487, "step": 133448 }, { "epoch": 11.516908212560386, "grad_norm": 1.6610082387924194, "learning_rate": 0.001, "loss": 2.1461, "step": 133504 }, { "epoch": 11.521739130434783, "grad_norm": 4.432641983032227, "learning_rate": 0.001, "loss": 2.1457, "step": 133560 }, { "epoch": 11.526570048309178, "grad_norm": 1.100925087928772, "learning_rate": 0.001, "loss": 2.1462, "step": 133616 }, { "epoch": 11.531400966183575, "grad_norm": 3.5141429901123047, "learning_rate": 0.001, "loss": 2.1405, "step": 133672 }, { "epoch": 11.53623188405797, "grad_norm": 1.9338464736938477, "learning_rate": 0.001, "loss": 2.151, "step": 133728 }, { "epoch": 11.541062801932368, "grad_norm": 1.7894530296325684, "learning_rate": 0.001, "loss": 2.1449, "step": 133784 }, { "epoch": 11.545893719806763, "grad_norm": 0.7991990447044373, "learning_rate": 0.001, "loss": 2.1443, "step": 133840 }, { "epoch": 11.55072463768116, "grad_norm": 2.088209390640259, "learning_rate": 0.001, "loss": 2.1564, "step": 133896 }, { "epoch": 11.555555555555555, "grad_norm": 2.1392955780029297, "learning_rate": 0.001, "loss": 2.1456, "step": 133952 }, { "epoch": 11.560386473429952, "grad_norm": 4.288720607757568, "learning_rate": 0.001, "loss": 2.1507, "step": 134008 }, { "epoch": 11.565217391304348, "grad_norm": 1.2219886779785156, "learning_rate": 0.001, "loss": 2.155, "step": 134064 }, { "epoch": 11.570048309178745, "grad_norm": 1.0434324741363525, "learning_rate": 0.001, "loss": 2.1599, "step": 134120 }, { "epoch": 11.57487922705314, "grad_norm": 3.612537384033203, "learning_rate": 0.001, "loss": 2.169, "step": 134176 }, { "epoch": 11.579710144927537, "grad_norm": 1.1100342273712158, "learning_rate": 0.001, "loss": 2.16, "step": 134232 }, { "epoch": 11.584541062801932, "grad_norm": 43.956790924072266, "learning_rate": 0.001, "loss": 2.1619, "step": 134288 }, { "epoch": 11.58937198067633, "grad_norm": 2.0098605155944824, "learning_rate": 0.001, "loss": 2.1593, "step": 134344 }, { "epoch": 11.594202898550725, "grad_norm": 1.0257587432861328, "learning_rate": 0.001, "loss": 2.1598, "step": 134400 }, { "epoch": 11.59903381642512, "grad_norm": 2.1896414756774902, "learning_rate": 0.001, "loss": 2.1667, "step": 134456 }, { "epoch": 11.603864734299517, "grad_norm": 1.5707670450210571, "learning_rate": 0.001, "loss": 2.1578, "step": 134512 }, { "epoch": 11.608695652173914, "grad_norm": 3.058683156967163, "learning_rate": 0.001, "loss": 2.1476, "step": 134568 }, { "epoch": 11.61352657004831, "grad_norm": 90.78943634033203, "learning_rate": 0.001, "loss": 2.1577, "step": 134624 }, { "epoch": 11.618357487922705, "grad_norm": 0.8176459074020386, "learning_rate": 0.001, "loss": 2.1488, "step": 134680 }, { "epoch": 11.623188405797102, "grad_norm": 1.7974375486373901, "learning_rate": 0.001, "loss": 2.1629, "step": 134736 }, { "epoch": 11.628019323671497, "grad_norm": 0.8804119825363159, "learning_rate": 0.001, "loss": 2.1623, "step": 134792 }, { "epoch": 11.632850241545894, "grad_norm": 0.8612844347953796, "learning_rate": 0.001, "loss": 2.1794, "step": 134848 }, { "epoch": 11.63768115942029, "grad_norm": 1.7096879482269287, "learning_rate": 0.001, "loss": 2.1721, "step": 134904 }, { "epoch": 11.642512077294686, "grad_norm": 1.6035691499710083, "learning_rate": 0.001, "loss": 2.1554, "step": 134960 }, { "epoch": 11.647342995169081, "grad_norm": 2.2473886013031006, "learning_rate": 0.001, "loss": 2.1641, "step": 135016 }, { "epoch": 11.652173913043478, "grad_norm": 2.8684732913970947, "learning_rate": 0.001, "loss": 2.1551, "step": 135072 }, { "epoch": 11.657004830917874, "grad_norm": 1.1871248483657837, "learning_rate": 0.001, "loss": 2.1589, "step": 135128 }, { "epoch": 11.66183574879227, "grad_norm": 2.1372108459472656, "learning_rate": 0.001, "loss": 2.1786, "step": 135184 }, { "epoch": 11.666666666666666, "grad_norm": 2.134202718734741, "learning_rate": 0.001, "loss": 2.1682, "step": 135240 }, { "epoch": 11.671497584541063, "grad_norm": 1.339689016342163, "learning_rate": 0.001, "loss": 2.1536, "step": 135296 }, { "epoch": 11.676328502415458, "grad_norm": 2.0542263984680176, "learning_rate": 0.001, "loss": 2.1496, "step": 135352 }, { "epoch": 11.681159420289855, "grad_norm": 1.365377426147461, "learning_rate": 0.001, "loss": 2.1402, "step": 135408 }, { "epoch": 11.68599033816425, "grad_norm": 1.2262927293777466, "learning_rate": 0.001, "loss": 2.1284, "step": 135464 }, { "epoch": 11.690821256038648, "grad_norm": 1.1472821235656738, "learning_rate": 0.001, "loss": 2.1397, "step": 135520 }, { "epoch": 11.695652173913043, "grad_norm": 1.2871700525283813, "learning_rate": 0.001, "loss": 2.135, "step": 135576 }, { "epoch": 11.70048309178744, "grad_norm": 0.8797616362571716, "learning_rate": 0.001, "loss": 2.1341, "step": 135632 }, { "epoch": 11.705314009661835, "grad_norm": 1.3007557392120361, "learning_rate": 0.001, "loss": 2.1409, "step": 135688 }, { "epoch": 11.710144927536232, "grad_norm": 2.126065969467163, "learning_rate": 0.001, "loss": 2.1421, "step": 135744 }, { "epoch": 11.714975845410628, "grad_norm": 2.382359266281128, "learning_rate": 0.001, "loss": 2.1439, "step": 135800 }, { "epoch": 11.719806763285025, "grad_norm": 3.3054733276367188, "learning_rate": 0.001, "loss": 2.1553, "step": 135856 }, { "epoch": 11.72463768115942, "grad_norm": 1.831474781036377, "learning_rate": 0.001, "loss": 2.1671, "step": 135912 }, { "epoch": 11.729468599033817, "grad_norm": 1.4319506883621216, "learning_rate": 0.001, "loss": 2.1632, "step": 135968 }, { "epoch": 11.734299516908212, "grad_norm": 1.1082159280776978, "learning_rate": 0.001, "loss": 2.162, "step": 136024 }, { "epoch": 11.73913043478261, "grad_norm": 1.0175840854644775, "learning_rate": 0.001, "loss": 2.1566, "step": 136080 }, { "epoch": 11.743961352657005, "grad_norm": 0.8453747034072876, "learning_rate": 0.001, "loss": 2.1437, "step": 136136 }, { "epoch": 11.748792270531402, "grad_norm": 1.7449556589126587, "learning_rate": 0.001, "loss": 2.1546, "step": 136192 }, { "epoch": 11.753623188405797, "grad_norm": 0.975506067276001, "learning_rate": 0.001, "loss": 2.1587, "step": 136248 }, { "epoch": 11.758454106280194, "grad_norm": 1.2254399061203003, "learning_rate": 0.001, "loss": 2.1486, "step": 136304 }, { "epoch": 11.76328502415459, "grad_norm": 1.4558777809143066, "learning_rate": 0.001, "loss": 2.1614, "step": 136360 }, { "epoch": 11.768115942028986, "grad_norm": 3.111281394958496, "learning_rate": 0.001, "loss": 2.1693, "step": 136416 }, { "epoch": 11.772946859903382, "grad_norm": 3.4066739082336426, "learning_rate": 0.001, "loss": 2.1833, "step": 136472 }, { "epoch": 11.777777777777779, "grad_norm": 3.0420312881469727, "learning_rate": 0.001, "loss": 2.192, "step": 136528 }, { "epoch": 11.782608695652174, "grad_norm": 3.7077364921569824, "learning_rate": 0.001, "loss": 2.1885, "step": 136584 }, { "epoch": 11.78743961352657, "grad_norm": 2.0996952056884766, "learning_rate": 0.001, "loss": 2.1795, "step": 136640 }, { "epoch": 11.792270531400966, "grad_norm": 1.9807239770889282, "learning_rate": 0.001, "loss": 2.1722, "step": 136696 }, { "epoch": 11.797101449275363, "grad_norm": 2.2017979621887207, "learning_rate": 0.001, "loss": 2.1667, "step": 136752 }, { "epoch": 11.801932367149758, "grad_norm": 1.8431072235107422, "learning_rate": 0.001, "loss": 2.1539, "step": 136808 }, { "epoch": 11.806763285024154, "grad_norm": 1.7913908958435059, "learning_rate": 0.001, "loss": 2.1653, "step": 136864 }, { "epoch": 11.81159420289855, "grad_norm": 2.1519887447357178, "learning_rate": 0.001, "loss": 2.1749, "step": 136920 }, { "epoch": 11.816425120772946, "grad_norm": 0.7495535612106323, "learning_rate": 0.001, "loss": 2.1789, "step": 136976 }, { "epoch": 11.821256038647343, "grad_norm": 0.7424222230911255, "learning_rate": 0.001, "loss": 2.1691, "step": 137032 }, { "epoch": 11.826086956521738, "grad_norm": 2.66715931892395, "learning_rate": 0.001, "loss": 2.1772, "step": 137088 }, { "epoch": 11.830917874396135, "grad_norm": 1.4277801513671875, "learning_rate": 0.001, "loss": 2.165, "step": 137144 }, { "epoch": 11.83574879227053, "grad_norm": 2.4702582359313965, "learning_rate": 0.001, "loss": 2.1683, "step": 137200 }, { "epoch": 11.840579710144928, "grad_norm": 2.1138689517974854, "learning_rate": 0.001, "loss": 2.1941, "step": 137256 }, { "epoch": 11.845410628019323, "grad_norm": 1.0289188623428345, "learning_rate": 0.001, "loss": 2.1854, "step": 137312 }, { "epoch": 11.85024154589372, "grad_norm": 6.840153217315674, "learning_rate": 0.001, "loss": 2.1753, "step": 137368 }, { "epoch": 11.855072463768115, "grad_norm": 1.8663305044174194, "learning_rate": 0.001, "loss": 2.1778, "step": 137424 }, { "epoch": 11.859903381642512, "grad_norm": 11.929183959960938, "learning_rate": 0.001, "loss": 2.1917, "step": 137480 }, { "epoch": 11.864734299516908, "grad_norm": 6.134036064147949, "learning_rate": 0.001, "loss": 2.1867, "step": 137536 }, { "epoch": 11.869565217391305, "grad_norm": 8.179972648620605, "learning_rate": 0.001, "loss": 2.1902, "step": 137592 }, { "epoch": 11.8743961352657, "grad_norm": 1.656633973121643, "learning_rate": 0.001, "loss": 2.169, "step": 137648 }, { "epoch": 11.879227053140097, "grad_norm": 1.4404828548431396, "learning_rate": 0.001, "loss": 2.1699, "step": 137704 }, { "epoch": 11.884057971014492, "grad_norm": 4.035676956176758, "learning_rate": 0.001, "loss": 2.1627, "step": 137760 }, { "epoch": 11.88888888888889, "grad_norm": 3.11236572265625, "learning_rate": 0.001, "loss": 2.1563, "step": 137816 }, { "epoch": 11.893719806763285, "grad_norm": 6.423616886138916, "learning_rate": 0.001, "loss": 2.161, "step": 137872 }, { "epoch": 11.898550724637682, "grad_norm": 1.2624446153640747, "learning_rate": 0.001, "loss": 2.1547, "step": 137928 }, { "epoch": 11.903381642512077, "grad_norm": 1.8141759634017944, "learning_rate": 0.001, "loss": 2.1653, "step": 137984 }, { "epoch": 11.908212560386474, "grad_norm": 3.13934063911438, "learning_rate": 0.001, "loss": 2.169, "step": 138040 }, { "epoch": 11.91304347826087, "grad_norm": 0.7363304495811462, "learning_rate": 0.001, "loss": 2.1718, "step": 138096 }, { "epoch": 11.917874396135266, "grad_norm": 7.272291660308838, "learning_rate": 0.001, "loss": 2.1721, "step": 138152 }, { "epoch": 11.922705314009661, "grad_norm": 2.5603482723236084, "learning_rate": 0.001, "loss": 2.1707, "step": 138208 }, { "epoch": 11.927536231884059, "grad_norm": 6.911461353302002, "learning_rate": 0.001, "loss": 2.1828, "step": 138264 }, { "epoch": 11.932367149758454, "grad_norm": 2.6392252445220947, "learning_rate": 0.001, "loss": 2.1714, "step": 138320 }, { "epoch": 11.93719806763285, "grad_norm": 0.8566041588783264, "learning_rate": 0.001, "loss": 2.1745, "step": 138376 }, { "epoch": 11.942028985507246, "grad_norm": 3.405165672302246, "learning_rate": 0.001, "loss": 2.1859, "step": 138432 }, { "epoch": 11.946859903381643, "grad_norm": 0.9229844808578491, "learning_rate": 0.001, "loss": 2.1723, "step": 138488 }, { "epoch": 11.951690821256038, "grad_norm": 0.9978861212730408, "learning_rate": 0.001, "loss": 2.1812, "step": 138544 }, { "epoch": 11.956521739130435, "grad_norm": 2.7527482509613037, "learning_rate": 0.001, "loss": 2.1809, "step": 138600 }, { "epoch": 11.96135265700483, "grad_norm": 3.1028177738189697, "learning_rate": 0.001, "loss": 2.1768, "step": 138656 }, { "epoch": 11.966183574879228, "grad_norm": 1.0113414525985718, "learning_rate": 0.001, "loss": 2.1804, "step": 138712 }, { "epoch": 11.971014492753623, "grad_norm": 4.682823181152344, "learning_rate": 0.001, "loss": 2.1734, "step": 138768 }, { "epoch": 11.97584541062802, "grad_norm": 0.6051453948020935, "learning_rate": 0.001, "loss": 2.1691, "step": 138824 }, { "epoch": 11.980676328502415, "grad_norm": 2.108956813812256, "learning_rate": 0.001, "loss": 2.165, "step": 138880 }, { "epoch": 11.985507246376812, "grad_norm": 0.39985644817352295, "learning_rate": 0.001, "loss": 2.1514, "step": 138936 }, { "epoch": 11.990338164251208, "grad_norm": 0.8410494923591614, "learning_rate": 0.001, "loss": 2.1606, "step": 138992 }, { "epoch": 11.995169082125603, "grad_norm": 1.4523165225982666, "learning_rate": 0.001, "loss": 2.1601, "step": 139048 }, { "epoch": 12.0, "grad_norm": 3.06986141204834, "learning_rate": 0.001, "loss": 2.1758, "step": 139104 }, { "epoch": 12.004830917874395, "grad_norm": 1.7074397802352905, "learning_rate": 0.001, "loss": 2.1382, "step": 139160 }, { "epoch": 12.009661835748792, "grad_norm": 1.0722459554672241, "learning_rate": 0.001, "loss": 2.1318, "step": 139216 }, { "epoch": 12.014492753623188, "grad_norm": 2.7510175704956055, "learning_rate": 0.001, "loss": 2.1191, "step": 139272 }, { "epoch": 12.019323671497585, "grad_norm": 1.4080379009246826, "learning_rate": 0.001, "loss": 2.1216, "step": 139328 }, { "epoch": 12.02415458937198, "grad_norm": 1.3800874948501587, "learning_rate": 0.001, "loss": 2.1238, "step": 139384 }, { "epoch": 12.028985507246377, "grad_norm": 6.7165679931640625, "learning_rate": 0.001, "loss": 2.1396, "step": 139440 }, { "epoch": 12.033816425120772, "grad_norm": 1.1638861894607544, "learning_rate": 0.001, "loss": 2.1195, "step": 139496 }, { "epoch": 12.03864734299517, "grad_norm": 1.0675334930419922, "learning_rate": 0.001, "loss": 2.1187, "step": 139552 }, { "epoch": 12.043478260869565, "grad_norm": 2.1167635917663574, "learning_rate": 0.001, "loss": 2.1139, "step": 139608 }, { "epoch": 12.048309178743962, "grad_norm": 1.0412802696228027, "learning_rate": 0.001, "loss": 2.1215, "step": 139664 }, { "epoch": 12.053140096618357, "grad_norm": 1.233525037765503, "learning_rate": 0.001, "loss": 2.1031, "step": 139720 }, { "epoch": 12.057971014492754, "grad_norm": 1.584393858909607, "learning_rate": 0.001, "loss": 2.1119, "step": 139776 }, { "epoch": 12.06280193236715, "grad_norm": 1.3282197713851929, "learning_rate": 0.001, "loss": 2.1215, "step": 139832 }, { "epoch": 12.067632850241546, "grad_norm": 1.5350204706192017, "learning_rate": 0.001, "loss": 2.1312, "step": 139888 }, { "epoch": 12.072463768115941, "grad_norm": 1.5638731718063354, "learning_rate": 0.001, "loss": 2.1227, "step": 139944 }, { "epoch": 12.077294685990339, "grad_norm": 1.862272024154663, "learning_rate": 0.001, "loss": 2.1254, "step": 140000 }, { "epoch": 12.082125603864734, "grad_norm": 11.566842079162598, "learning_rate": 0.001, "loss": 2.1347, "step": 140056 }, { "epoch": 12.08695652173913, "grad_norm": 13.141790390014648, "learning_rate": 0.001, "loss": 2.1386, "step": 140112 }, { "epoch": 12.091787439613526, "grad_norm": 42.42540740966797, "learning_rate": 0.001, "loss": 2.1418, "step": 140168 }, { "epoch": 12.096618357487923, "grad_norm": 1.0227177143096924, "learning_rate": 0.001, "loss": 2.1627, "step": 140224 }, { "epoch": 12.101449275362318, "grad_norm": 1.3916877508163452, "learning_rate": 0.001, "loss": 2.1645, "step": 140280 }, { "epoch": 12.106280193236715, "grad_norm": 2.8892128467559814, "learning_rate": 0.001, "loss": 2.1544, "step": 140336 }, { "epoch": 12.11111111111111, "grad_norm": 1.1164380311965942, "learning_rate": 0.001, "loss": 2.1603, "step": 140392 }, { "epoch": 12.115942028985508, "grad_norm": 1.561012625694275, "learning_rate": 0.001, "loss": 2.1503, "step": 140448 }, { "epoch": 12.120772946859903, "grad_norm": 2.6087722778320312, "learning_rate": 0.001, "loss": 2.1337, "step": 140504 }, { "epoch": 12.1256038647343, "grad_norm": 19.377309799194336, "learning_rate": 0.001, "loss": 2.1279, "step": 140560 }, { "epoch": 12.130434782608695, "grad_norm": 1.271406888961792, "learning_rate": 0.001, "loss": 2.1202, "step": 140616 }, { "epoch": 12.135265700483092, "grad_norm": 1.743664264678955, "learning_rate": 0.001, "loss": 2.1267, "step": 140672 }, { "epoch": 12.140096618357488, "grad_norm": 1.625611662864685, "learning_rate": 0.001, "loss": 2.1208, "step": 140728 }, { "epoch": 12.144927536231885, "grad_norm": 2.3711163997650146, "learning_rate": 0.001, "loss": 2.1316, "step": 140784 }, { "epoch": 12.14975845410628, "grad_norm": 4.0641608238220215, "learning_rate": 0.001, "loss": 2.1425, "step": 140840 }, { "epoch": 12.154589371980677, "grad_norm": 2.2252283096313477, "learning_rate": 0.001, "loss": 2.1538, "step": 140896 }, { "epoch": 12.159420289855072, "grad_norm": 28.733789443969727, "learning_rate": 0.001, "loss": 2.1415, "step": 140952 }, { "epoch": 12.16425120772947, "grad_norm": 1.5870418548583984, "learning_rate": 0.001, "loss": 2.1433, "step": 141008 }, { "epoch": 12.169082125603865, "grad_norm": 2.2028937339782715, "learning_rate": 0.001, "loss": 2.1799, "step": 141064 }, { "epoch": 12.173913043478262, "grad_norm": 4.300158500671387, "learning_rate": 0.001, "loss": 2.1864, "step": 141120 }, { "epoch": 12.178743961352657, "grad_norm": 16.284698486328125, "learning_rate": 0.001, "loss": 2.1829, "step": 141176 }, { "epoch": 12.183574879227054, "grad_norm": 5.202531814575195, "learning_rate": 0.001, "loss": 2.1746, "step": 141232 }, { "epoch": 12.18840579710145, "grad_norm": 2.1948845386505127, "learning_rate": 0.001, "loss": 2.177, "step": 141288 }, { "epoch": 12.193236714975846, "grad_norm": 2.1246914863586426, "learning_rate": 0.001, "loss": 2.1818, "step": 141344 }, { "epoch": 12.198067632850242, "grad_norm": 2.7810511589050293, "learning_rate": 0.001, "loss": 2.1812, "step": 141400 }, { "epoch": 12.202898550724637, "grad_norm": 1.7156684398651123, "learning_rate": 0.001, "loss": 2.1628, "step": 141456 }, { "epoch": 12.207729468599034, "grad_norm": 1.5086328983306885, "learning_rate": 0.001, "loss": 2.17, "step": 141512 }, { "epoch": 12.21256038647343, "grad_norm": 2.2895312309265137, "learning_rate": 0.001, "loss": 2.1491, "step": 141568 }, { "epoch": 12.217391304347826, "grad_norm": 3.1194708347320557, "learning_rate": 0.001, "loss": 2.1551, "step": 141624 }, { "epoch": 12.222222222222221, "grad_norm": 1.0032291412353516, "learning_rate": 0.001, "loss": 2.1492, "step": 141680 }, { "epoch": 12.227053140096618, "grad_norm": 4.8116960525512695, "learning_rate": 0.001, "loss": 2.1408, "step": 141736 }, { "epoch": 12.231884057971014, "grad_norm": 6.912002086639404, "learning_rate": 0.001, "loss": 2.1415, "step": 141792 }, { "epoch": 12.23671497584541, "grad_norm": 1.4610812664031982, "learning_rate": 0.001, "loss": 2.1448, "step": 141848 }, { "epoch": 12.241545893719806, "grad_norm": 4.043923377990723, "learning_rate": 0.001, "loss": 2.1564, "step": 141904 }, { "epoch": 12.246376811594203, "grad_norm": 1.829960823059082, "learning_rate": 0.001, "loss": 2.161, "step": 141960 }, { "epoch": 12.251207729468598, "grad_norm": 3.8297719955444336, "learning_rate": 0.001, "loss": 2.154, "step": 142016 }, { "epoch": 12.256038647342995, "grad_norm": 2.3305394649505615, "learning_rate": 0.001, "loss": 2.1602, "step": 142072 }, { "epoch": 12.26086956521739, "grad_norm": 2.1256890296936035, "learning_rate": 0.001, "loss": 2.1574, "step": 142128 }, { "epoch": 12.265700483091788, "grad_norm": 1.3211880922317505, "learning_rate": 0.001, "loss": 2.1559, "step": 142184 }, { "epoch": 12.270531400966183, "grad_norm": 3.230886936187744, "learning_rate": 0.001, "loss": 2.1701, "step": 142240 }, { "epoch": 12.27536231884058, "grad_norm": 1.9857741594314575, "learning_rate": 0.001, "loss": 2.1872, "step": 142296 }, { "epoch": 12.280193236714975, "grad_norm": 1.8681553602218628, "learning_rate": 0.001, "loss": 2.1851, "step": 142352 }, { "epoch": 12.285024154589372, "grad_norm": 4.764926910400391, "learning_rate": 0.001, "loss": 2.1832, "step": 142408 }, { "epoch": 12.289855072463768, "grad_norm": 2.012326240539551, "learning_rate": 0.001, "loss": 2.1847, "step": 142464 }, { "epoch": 12.294685990338165, "grad_norm": 16.375873565673828, "learning_rate": 0.001, "loss": 2.1858, "step": 142520 }, { "epoch": 12.29951690821256, "grad_norm": 1.235822081565857, "learning_rate": 0.001, "loss": 2.1956, "step": 142576 }, { "epoch": 12.304347826086957, "grad_norm": 5.588258743286133, "learning_rate": 0.001, "loss": 2.203, "step": 142632 }, { "epoch": 12.309178743961352, "grad_norm": 5.072676181793213, "learning_rate": 0.001, "loss": 2.1697, "step": 142688 }, { "epoch": 12.31400966183575, "grad_norm": 2.5074424743652344, "learning_rate": 0.001, "loss": 2.1717, "step": 142744 }, { "epoch": 12.318840579710145, "grad_norm": 2.709120512008667, "learning_rate": 0.001, "loss": 2.1665, "step": 142800 }, { "epoch": 12.323671497584542, "grad_norm": 3.240938186645508, "learning_rate": 0.001, "loss": 2.155, "step": 142856 }, { "epoch": 12.328502415458937, "grad_norm": 1.7258764505386353, "learning_rate": 0.001, "loss": 2.169, "step": 142912 }, { "epoch": 12.333333333333334, "grad_norm": 1.3388620615005493, "learning_rate": 0.001, "loss": 2.154, "step": 142968 }, { "epoch": 12.33816425120773, "grad_norm": 8.502300262451172, "learning_rate": 0.001, "loss": 2.1571, "step": 143024 }, { "epoch": 12.342995169082126, "grad_norm": 0.8782457709312439, "learning_rate": 0.001, "loss": 2.1672, "step": 143080 }, { "epoch": 12.347826086956522, "grad_norm": 1.126036524772644, "learning_rate": 0.001, "loss": 2.1621, "step": 143136 }, { "epoch": 12.352657004830919, "grad_norm": 0.6641754508018494, "learning_rate": 0.001, "loss": 2.1583, "step": 143192 }, { "epoch": 12.357487922705314, "grad_norm": 0.7980713844299316, "learning_rate": 0.001, "loss": 2.1414, "step": 143248 }, { "epoch": 12.36231884057971, "grad_norm": 1.9634695053100586, "learning_rate": 0.001, "loss": 2.1471, "step": 143304 }, { "epoch": 12.367149758454106, "grad_norm": 1.4545438289642334, "learning_rate": 0.001, "loss": 2.1398, "step": 143360 }, { "epoch": 12.371980676328503, "grad_norm": 1.6943532228469849, "learning_rate": 0.001, "loss": 2.1414, "step": 143416 }, { "epoch": 12.376811594202898, "grad_norm": 0.664146363735199, "learning_rate": 0.001, "loss": 2.1331, "step": 143472 }, { "epoch": 12.381642512077295, "grad_norm": 0.9882853031158447, "learning_rate": 0.001, "loss": 2.1202, "step": 143528 }, { "epoch": 12.38647342995169, "grad_norm": 2.566378116607666, "learning_rate": 0.001, "loss": 2.1315, "step": 143584 }, { "epoch": 12.391304347826088, "grad_norm": 2.5037691593170166, "learning_rate": 0.001, "loss": 2.1423, "step": 143640 }, { "epoch": 12.396135265700483, "grad_norm": 5.4365105628967285, "learning_rate": 0.001, "loss": 2.1253, "step": 143696 }, { "epoch": 12.40096618357488, "grad_norm": 0.8732381463050842, "learning_rate": 0.001, "loss": 2.1363, "step": 143752 }, { "epoch": 12.405797101449275, "grad_norm": 3.01924204826355, "learning_rate": 0.001, "loss": 2.1362, "step": 143808 }, { "epoch": 12.41062801932367, "grad_norm": 0.6369227766990662, "learning_rate": 0.001, "loss": 2.1306, "step": 143864 }, { "epoch": 12.415458937198068, "grad_norm": 1.3475645780563354, "learning_rate": 0.001, "loss": 2.1441, "step": 143920 }, { "epoch": 12.420289855072463, "grad_norm": 16.77593994140625, "learning_rate": 0.001, "loss": 2.134, "step": 143976 }, { "epoch": 12.42512077294686, "grad_norm": 1.5985064506530762, "learning_rate": 0.001, "loss": 2.1316, "step": 144032 }, { "epoch": 12.429951690821255, "grad_norm": 1.7535443305969238, "learning_rate": 0.001, "loss": 2.1401, "step": 144088 }, { "epoch": 12.434782608695652, "grad_norm": 0.8143284916877747, "learning_rate": 0.001, "loss": 2.1407, "step": 144144 }, { "epoch": 12.439613526570048, "grad_norm": 0.7159766554832458, "learning_rate": 0.001, "loss": 2.1432, "step": 144200 }, { "epoch": 12.444444444444445, "grad_norm": 3.6786153316497803, "learning_rate": 0.001, "loss": 2.153, "step": 144256 }, { "epoch": 12.44927536231884, "grad_norm": 0.9263442754745483, "learning_rate": 0.001, "loss": 2.143, "step": 144312 }, { "epoch": 12.454106280193237, "grad_norm": 1.5495977401733398, "learning_rate": 0.001, "loss": 2.1377, "step": 144368 }, { "epoch": 12.458937198067632, "grad_norm": 1.5098943710327148, "learning_rate": 0.001, "loss": 2.1353, "step": 144424 }, { "epoch": 12.46376811594203, "grad_norm": 0.6181824207305908, "learning_rate": 0.001, "loss": 2.1272, "step": 144480 }, { "epoch": 12.468599033816425, "grad_norm": 2.3013219833374023, "learning_rate": 0.001, "loss": 2.127, "step": 144536 }, { "epoch": 12.473429951690822, "grad_norm": 2.7798948287963867, "learning_rate": 0.001, "loss": 2.1483, "step": 144592 }, { "epoch": 12.478260869565217, "grad_norm": 1.0423344373703003, "learning_rate": 0.001, "loss": 2.1516, "step": 144648 }, { "epoch": 12.483091787439614, "grad_norm": 1.4084854125976562, "learning_rate": 0.001, "loss": 2.146, "step": 144704 }, { "epoch": 12.48792270531401, "grad_norm": 1.077108383178711, "learning_rate": 0.001, "loss": 2.139, "step": 144760 }, { "epoch": 12.492753623188406, "grad_norm": 1.4833850860595703, "learning_rate": 0.001, "loss": 2.1199, "step": 144816 }, { "epoch": 12.497584541062801, "grad_norm": 3.583406448364258, "learning_rate": 0.001, "loss": 2.1206, "step": 144872 }, { "epoch": 12.502415458937199, "grad_norm": 3.388789415359497, "learning_rate": 0.001, "loss": 2.1259, "step": 144928 }, { "epoch": 12.507246376811594, "grad_norm": 1.5271341800689697, "learning_rate": 0.001, "loss": 2.136, "step": 144984 }, { "epoch": 12.51207729468599, "grad_norm": 0.9995395541191101, "learning_rate": 0.001, "loss": 2.1438, "step": 145040 }, { "epoch": 12.516908212560386, "grad_norm": 1.144813895225525, "learning_rate": 0.001, "loss": 2.1434, "step": 145096 }, { "epoch": 12.521739130434783, "grad_norm": 18.142234802246094, "learning_rate": 0.001, "loss": 2.136, "step": 145152 }, { "epoch": 12.526570048309178, "grad_norm": 1.2944560050964355, "learning_rate": 0.001, "loss": 2.1269, "step": 145208 }, { "epoch": 12.531400966183575, "grad_norm": 1.6908440589904785, "learning_rate": 0.001, "loss": 2.1338, "step": 145264 }, { "epoch": 12.53623188405797, "grad_norm": 1.6211026906967163, "learning_rate": 0.001, "loss": 2.1375, "step": 145320 }, { "epoch": 12.541062801932368, "grad_norm": 0.6614188551902771, "learning_rate": 0.001, "loss": 2.1364, "step": 145376 }, { "epoch": 12.545893719806763, "grad_norm": 1.1642796993255615, "learning_rate": 0.001, "loss": 2.1338, "step": 145432 }, { "epoch": 12.55072463768116, "grad_norm": 1.3793939352035522, "learning_rate": 0.001, "loss": 2.1303, "step": 145488 }, { "epoch": 12.555555555555555, "grad_norm": 0.9628016352653503, "learning_rate": 0.001, "loss": 2.1281, "step": 145544 }, { "epoch": 12.560386473429952, "grad_norm": 4.906208515167236, "learning_rate": 0.001, "loss": 2.1197, "step": 145600 }, { "epoch": 12.565217391304348, "grad_norm": 1.371993064880371, "learning_rate": 0.001, "loss": 2.1226, "step": 145656 }, { "epoch": 12.570048309178745, "grad_norm": 0.5131025314331055, "learning_rate": 0.001, "loss": 2.1155, "step": 145712 }, { "epoch": 12.57487922705314, "grad_norm": 0.8918805718421936, "learning_rate": 0.001, "loss": 2.1084, "step": 145768 }, { "epoch": 12.579710144927537, "grad_norm": 1.0736923217773438, "learning_rate": 0.001, "loss": 2.1246, "step": 145824 }, { "epoch": 12.584541062801932, "grad_norm": 0.9937901496887207, "learning_rate": 0.001, "loss": 2.114, "step": 145880 }, { "epoch": 12.58937198067633, "grad_norm": 2.28429913520813, "learning_rate": 0.001, "loss": 2.1217, "step": 145936 }, { "epoch": 12.594202898550725, "grad_norm": 1.0660549402236938, "learning_rate": 0.001, "loss": 2.1258, "step": 145992 }, { "epoch": 12.59903381642512, "grad_norm": 1.0357987880706787, "learning_rate": 0.001, "loss": 2.1279, "step": 146048 }, { "epoch": 12.603864734299517, "grad_norm": 1.8048168420791626, "learning_rate": 0.001, "loss": 2.1272, "step": 146104 }, { "epoch": 12.608695652173914, "grad_norm": 1.326817512512207, "learning_rate": 0.001, "loss": 2.1131, "step": 146160 }, { "epoch": 12.61352657004831, "grad_norm": 0.8317714929580688, "learning_rate": 0.001, "loss": 2.1251, "step": 146216 }, { "epoch": 12.618357487922705, "grad_norm": 0.592576265335083, "learning_rate": 0.001, "loss": 2.118, "step": 146272 }, { "epoch": 12.623188405797102, "grad_norm": 3.277266263961792, "learning_rate": 0.001, "loss": 2.1097, "step": 146328 }, { "epoch": 12.628019323671497, "grad_norm": 0.5582002997398376, "learning_rate": 0.001, "loss": 2.1347, "step": 146384 }, { "epoch": 12.632850241545894, "grad_norm": 0.9426195025444031, "learning_rate": 0.001, "loss": 2.1392, "step": 146440 }, { "epoch": 12.63768115942029, "grad_norm": 0.9630420804023743, "learning_rate": 0.001, "loss": 2.1327, "step": 146496 }, { "epoch": 12.642512077294686, "grad_norm": 0.8565064668655396, "learning_rate": 0.001, "loss": 2.1326, "step": 146552 }, { "epoch": 12.647342995169081, "grad_norm": 1.0423953533172607, "learning_rate": 0.001, "loss": 2.1316, "step": 146608 }, { "epoch": 12.652173913043478, "grad_norm": 1.3161524534225464, "learning_rate": 0.001, "loss": 2.1291, "step": 146664 }, { "epoch": 12.657004830917874, "grad_norm": 2.927644729614258, "learning_rate": 0.001, "loss": 2.1383, "step": 146720 }, { "epoch": 12.66183574879227, "grad_norm": 1.5749315023422241, "learning_rate": 0.001, "loss": 2.1354, "step": 146776 }, { "epoch": 12.666666666666666, "grad_norm": 2.4168426990509033, "learning_rate": 0.001, "loss": 2.1367, "step": 146832 }, { "epoch": 12.671497584541063, "grad_norm": 1.2343653440475464, "learning_rate": 0.001, "loss": 2.1447, "step": 146888 }, { "epoch": 12.676328502415458, "grad_norm": 2.962412118911743, "learning_rate": 0.001, "loss": 2.1426, "step": 146944 }, { "epoch": 12.681159420289855, "grad_norm": 1.166462779045105, "learning_rate": 0.001, "loss": 2.1246, "step": 147000 }, { "epoch": 12.68599033816425, "grad_norm": 1.6188726425170898, "learning_rate": 0.001, "loss": 2.1282, "step": 147056 }, { "epoch": 12.690821256038648, "grad_norm": 1.0293705463409424, "learning_rate": 0.001, "loss": 2.1255, "step": 147112 }, { "epoch": 12.695652173913043, "grad_norm": 1.152613878250122, "learning_rate": 0.001, "loss": 2.1325, "step": 147168 }, { "epoch": 12.70048309178744, "grad_norm": 4.03063440322876, "learning_rate": 0.001, "loss": 2.1168, "step": 147224 }, { "epoch": 12.705314009661835, "grad_norm": 1.128021240234375, "learning_rate": 0.001, "loss": 2.1261, "step": 147280 }, { "epoch": 12.710144927536232, "grad_norm": 0.9907158613204956, "learning_rate": 0.001, "loss": 2.1198, "step": 147336 }, { "epoch": 12.714975845410628, "grad_norm": 0.7401301860809326, "learning_rate": 0.001, "loss": 2.1198, "step": 147392 }, { "epoch": 12.719806763285025, "grad_norm": 0.8615275025367737, "learning_rate": 0.001, "loss": 2.129, "step": 147448 }, { "epoch": 12.72463768115942, "grad_norm": 0.9050840139389038, "learning_rate": 0.001, "loss": 2.1328, "step": 147504 }, { "epoch": 12.729468599033817, "grad_norm": 2.3885610103607178, "learning_rate": 0.001, "loss": 2.135, "step": 147560 }, { "epoch": 12.734299516908212, "grad_norm": 0.40776655077934265, "learning_rate": 0.001, "loss": 2.1237, "step": 147616 }, { "epoch": 12.73913043478261, "grad_norm": 0.9876236915588379, "learning_rate": 0.001, "loss": 2.1265, "step": 147672 }, { "epoch": 12.743961352657005, "grad_norm": 1.3310588598251343, "learning_rate": 0.001, "loss": 2.1201, "step": 147728 }, { "epoch": 12.748792270531402, "grad_norm": 1.1427407264709473, "learning_rate": 0.001, "loss": 2.1285, "step": 147784 }, { "epoch": 12.753623188405797, "grad_norm": 1.1546401977539062, "learning_rate": 0.001, "loss": 2.1301, "step": 147840 }, { "epoch": 12.758454106280194, "grad_norm": 1.5894087553024292, "learning_rate": 0.001, "loss": 2.1401, "step": 147896 }, { "epoch": 12.76328502415459, "grad_norm": 1.8177663087844849, "learning_rate": 0.001, "loss": 2.1254, "step": 147952 }, { "epoch": 12.768115942028986, "grad_norm": 2.881303071975708, "learning_rate": 0.001, "loss": 2.1438, "step": 148008 }, { "epoch": 12.772946859903382, "grad_norm": 1.6366645097732544, "learning_rate": 0.001, "loss": 2.139, "step": 148064 }, { "epoch": 12.777777777777779, "grad_norm": 1.1959857940673828, "learning_rate": 0.001, "loss": 2.1541, "step": 148120 }, { "epoch": 12.782608695652174, "grad_norm": 1.8994476795196533, "learning_rate": 0.001, "loss": 2.1476, "step": 148176 }, { "epoch": 12.78743961352657, "grad_norm": 0.5184637308120728, "learning_rate": 0.001, "loss": 2.1332, "step": 148232 }, { "epoch": 12.792270531400966, "grad_norm": 2.3582370281219482, "learning_rate": 0.001, "loss": 2.1453, "step": 148288 }, { "epoch": 12.797101449275363, "grad_norm": 1.3353848457336426, "learning_rate": 0.001, "loss": 2.144, "step": 148344 }, { "epoch": 12.801932367149758, "grad_norm": 2.4771721363067627, "learning_rate": 0.001, "loss": 2.1426, "step": 148400 }, { "epoch": 12.806763285024154, "grad_norm": 0.7120132446289062, "learning_rate": 0.001, "loss": 2.1413, "step": 148456 }, { "epoch": 12.81159420289855, "grad_norm": 1.5477908849716187, "learning_rate": 0.001, "loss": 2.1396, "step": 148512 }, { "epoch": 12.816425120772946, "grad_norm": 1.6299446821212769, "learning_rate": 0.001, "loss": 2.1559, "step": 148568 }, { "epoch": 12.821256038647343, "grad_norm": 1.9400720596313477, "learning_rate": 0.001, "loss": 2.1467, "step": 148624 }, { "epoch": 12.826086956521738, "grad_norm": 1.8135931491851807, "learning_rate": 0.001, "loss": 2.1203, "step": 148680 }, { "epoch": 12.830917874396135, "grad_norm": 1.7615090608596802, "learning_rate": 0.001, "loss": 2.1302, "step": 148736 }, { "epoch": 12.83574879227053, "grad_norm": 4.531754493713379, "learning_rate": 0.001, "loss": 2.157, "step": 148792 }, { "epoch": 12.840579710144928, "grad_norm": 1.1010205745697021, "learning_rate": 0.001, "loss": 2.1384, "step": 148848 }, { "epoch": 12.845410628019323, "grad_norm": 1.0222194194793701, "learning_rate": 0.001, "loss": 2.1342, "step": 148904 }, { "epoch": 12.85024154589372, "grad_norm": 1.8879467248916626, "learning_rate": 0.001, "loss": 2.1438, "step": 148960 }, { "epoch": 12.855072463768115, "grad_norm": 2.7745132446289062, "learning_rate": 0.001, "loss": 2.144, "step": 149016 }, { "epoch": 12.859903381642512, "grad_norm": 2.1177420616149902, "learning_rate": 0.001, "loss": 2.1389, "step": 149072 }, { "epoch": 12.864734299516908, "grad_norm": 2.9606330394744873, "learning_rate": 0.001, "loss": 2.1424, "step": 149128 }, { "epoch": 12.869565217391305, "grad_norm": 1.2833441495895386, "learning_rate": 0.001, "loss": 2.1473, "step": 149184 }, { "epoch": 12.8743961352657, "grad_norm": 1.89826500415802, "learning_rate": 0.001, "loss": 2.1428, "step": 149240 }, { "epoch": 12.879227053140097, "grad_norm": 1.7878497838974, "learning_rate": 0.001, "loss": 2.1366, "step": 149296 }, { "epoch": 12.884057971014492, "grad_norm": 1.2785511016845703, "learning_rate": 0.001, "loss": 2.1302, "step": 149352 }, { "epoch": 12.88888888888889, "grad_norm": 1.3714414834976196, "learning_rate": 0.001, "loss": 2.1327, "step": 149408 }, { "epoch": 12.893719806763285, "grad_norm": 2.212644338607788, "learning_rate": 0.001, "loss": 2.1332, "step": 149464 }, { "epoch": 12.898550724637682, "grad_norm": 1.8334863185882568, "learning_rate": 0.001, "loss": 2.1387, "step": 149520 }, { "epoch": 12.903381642512077, "grad_norm": 1.0166122913360596, "learning_rate": 0.001, "loss": 2.1418, "step": 149576 }, { "epoch": 12.908212560386474, "grad_norm": 6.022477149963379, "learning_rate": 0.001, "loss": 2.1487, "step": 149632 }, { "epoch": 12.91304347826087, "grad_norm": 2.2472314834594727, "learning_rate": 0.001, "loss": 2.144, "step": 149688 }, { "epoch": 12.917874396135266, "grad_norm": 5.197749614715576, "learning_rate": 0.001, "loss": 2.1533, "step": 149744 }, { "epoch": 12.922705314009661, "grad_norm": 15.629669189453125, "learning_rate": 0.001, "loss": 2.1827, "step": 149800 }, { "epoch": 12.927536231884059, "grad_norm": 1.5433567762374878, "learning_rate": 0.001, "loss": 2.1723, "step": 149856 }, { "epoch": 12.932367149758454, "grad_norm": 3.1883909702301025, "learning_rate": 0.001, "loss": 2.178, "step": 149912 }, { "epoch": 12.93719806763285, "grad_norm": 3.1321969032287598, "learning_rate": 0.001, "loss": 2.1627, "step": 149968 }, { "epoch": 12.942028985507246, "grad_norm": 10.574917793273926, "learning_rate": 0.001, "loss": 2.1501, "step": 150024 }, { "epoch": 12.946859903381643, "grad_norm": 1.0977131128311157, "learning_rate": 0.001, "loss": 2.1635, "step": 150080 }, { "epoch": 12.951690821256038, "grad_norm": 2.353797435760498, "learning_rate": 0.001, "loss": 2.189, "step": 150136 }, { "epoch": 12.956521739130435, "grad_norm": 177.41091918945312, "learning_rate": 0.001, "loss": 2.2147, "step": 150192 }, { "epoch": 12.96135265700483, "grad_norm": 2.0154869556427, "learning_rate": 0.001, "loss": 2.2232, "step": 150248 }, { "epoch": 12.966183574879228, "grad_norm": 2.557548761367798, "learning_rate": 0.001, "loss": 2.2052, "step": 150304 }, { "epoch": 12.971014492753623, "grad_norm": 4.553218841552734, "learning_rate": 0.001, "loss": 2.2037, "step": 150360 }, { "epoch": 12.97584541062802, "grad_norm": 6.355138778686523, "learning_rate": 0.001, "loss": 2.2178, "step": 150416 }, { "epoch": 12.980676328502415, "grad_norm": 3.4625821113586426, "learning_rate": 0.001, "loss": 2.2096, "step": 150472 }, { "epoch": 12.985507246376812, "grad_norm": 2.4670097827911377, "learning_rate": 0.001, "loss": 2.1882, "step": 150528 }, { "epoch": 12.990338164251208, "grad_norm": 1.7348414659500122, "learning_rate": 0.001, "loss": 2.199, "step": 150584 }, { "epoch": 12.995169082125603, "grad_norm": 3.511028528213501, "learning_rate": 0.001, "loss": 2.2021, "step": 150640 }, { "epoch": 13.0, "grad_norm": 7.550862789154053, "learning_rate": 0.001, "loss": 2.2044, "step": 150696 }, { "epoch": 13.004830917874395, "grad_norm": 1.4284648895263672, "learning_rate": 0.001, "loss": 2.1599, "step": 150752 }, { "epoch": 13.009661835748792, "grad_norm": 1.9730991125106812, "learning_rate": 0.001, "loss": 2.1659, "step": 150808 }, { "epoch": 13.014492753623188, "grad_norm": 0.8409374356269836, "learning_rate": 0.001, "loss": 2.1646, "step": 150864 }, { "epoch": 13.019323671497585, "grad_norm": 1.1762151718139648, "learning_rate": 0.001, "loss": 2.1676, "step": 150920 }, { "epoch": 13.02415458937198, "grad_norm": 4.034763336181641, "learning_rate": 0.001, "loss": 2.1587, "step": 150976 }, { "epoch": 13.028985507246377, "grad_norm": 1.5850826501846313, "learning_rate": 0.001, "loss": 2.141, "step": 151032 }, { "epoch": 13.033816425120772, "grad_norm": 1.5828746557235718, "learning_rate": 0.001, "loss": 2.1312, "step": 151088 }, { "epoch": 13.03864734299517, "grad_norm": 12.526509284973145, "learning_rate": 0.001, "loss": 2.132, "step": 151144 }, { "epoch": 13.043478260869565, "grad_norm": 13.837462425231934, "learning_rate": 0.001, "loss": 2.1342, "step": 151200 }, { "epoch": 13.048309178743962, "grad_norm": 15.72929859161377, "learning_rate": 0.001, "loss": 2.1442, "step": 151256 }, { "epoch": 13.053140096618357, "grad_norm": 2.5083811283111572, "learning_rate": 0.001, "loss": 2.1526, "step": 151312 }, { "epoch": 13.057971014492754, "grad_norm": 91.62779998779297, "learning_rate": 0.001, "loss": 2.1549, "step": 151368 }, { "epoch": 13.06280193236715, "grad_norm": 1.9386768341064453, "learning_rate": 0.001, "loss": 2.1421, "step": 151424 }, { "epoch": 13.067632850241546, "grad_norm": 2.060494899749756, "learning_rate": 0.001, "loss": 2.1397, "step": 151480 }, { "epoch": 13.072463768115941, "grad_norm": 1.8773630857467651, "learning_rate": 0.001, "loss": 2.123, "step": 151536 }, { "epoch": 13.077294685990339, "grad_norm": 3.6586945056915283, "learning_rate": 0.001, "loss": 2.1259, "step": 151592 }, { "epoch": 13.082125603864734, "grad_norm": 1.596848964691162, "learning_rate": 0.001, "loss": 2.1413, "step": 151648 }, { "epoch": 13.08695652173913, "grad_norm": 1.0399446487426758, "learning_rate": 0.001, "loss": 2.1193, "step": 151704 }, { "epoch": 13.091787439613526, "grad_norm": 0.9175421595573425, "learning_rate": 0.001, "loss": 2.1149, "step": 151760 }, { "epoch": 13.096618357487923, "grad_norm": 2.4593353271484375, "learning_rate": 0.001, "loss": 2.1208, "step": 151816 }, { "epoch": 13.101449275362318, "grad_norm": 2.027021884918213, "learning_rate": 0.001, "loss": 2.1212, "step": 151872 }, { "epoch": 13.106280193236715, "grad_norm": 0.9635437726974487, "learning_rate": 0.001, "loss": 2.1147, "step": 151928 }, { "epoch": 13.11111111111111, "grad_norm": 1.103538990020752, "learning_rate": 0.001, "loss": 2.1168, "step": 151984 }, { "epoch": 13.115942028985508, "grad_norm": 2.2070937156677246, "learning_rate": 0.001, "loss": 2.1023, "step": 152040 }, { "epoch": 13.120772946859903, "grad_norm": 1.435902714729309, "learning_rate": 0.001, "loss": 2.1016, "step": 152096 }, { "epoch": 13.1256038647343, "grad_norm": 1.9959419965744019, "learning_rate": 0.001, "loss": 2.1293, "step": 152152 }, { "epoch": 13.130434782608695, "grad_norm": 2.2775208950042725, "learning_rate": 0.001, "loss": 2.1338, "step": 152208 }, { "epoch": 13.135265700483092, "grad_norm": 0.8768938183784485, "learning_rate": 0.001, "loss": 2.1206, "step": 152264 }, { "epoch": 13.140096618357488, "grad_norm": 1.1370960474014282, "learning_rate": 0.001, "loss": 2.106, "step": 152320 }, { "epoch": 13.144927536231885, "grad_norm": 2.0057201385498047, "learning_rate": 0.001, "loss": 2.1204, "step": 152376 }, { "epoch": 13.14975845410628, "grad_norm": 2.1446590423583984, "learning_rate": 0.001, "loss": 2.139, "step": 152432 }, { "epoch": 13.154589371980677, "grad_norm": 1.389958381652832, "learning_rate": 0.001, "loss": 2.1428, "step": 152488 }, { "epoch": 13.159420289855072, "grad_norm": 1.2191355228424072, "learning_rate": 0.001, "loss": 2.1402, "step": 152544 }, { "epoch": 13.16425120772947, "grad_norm": 7.4249444007873535, "learning_rate": 0.001, "loss": 2.1488, "step": 152600 }, { "epoch": 13.169082125603865, "grad_norm": 1.4022554159164429, "learning_rate": 0.001, "loss": 2.1363, "step": 152656 }, { "epoch": 13.173913043478262, "grad_norm": 1.2038792371749878, "learning_rate": 0.001, "loss": 2.1532, "step": 152712 }, { "epoch": 13.178743961352657, "grad_norm": 1.5878230333328247, "learning_rate": 0.001, "loss": 2.1483, "step": 152768 }, { "epoch": 13.183574879227054, "grad_norm": 1.1976397037506104, "learning_rate": 0.001, "loss": 2.1433, "step": 152824 }, { "epoch": 13.18840579710145, "grad_norm": 1.1416399478912354, "learning_rate": 0.001, "loss": 2.1581, "step": 152880 }, { "epoch": 13.193236714975846, "grad_norm": 0.850429892539978, "learning_rate": 0.001, "loss": 2.1529, "step": 152936 }, { "epoch": 13.198067632850242, "grad_norm": 2.028083086013794, "learning_rate": 0.001, "loss": 2.1474, "step": 152992 }, { "epoch": 13.202898550724637, "grad_norm": 3.1031394004821777, "learning_rate": 0.001, "loss": 2.1317, "step": 153048 }, { "epoch": 13.207729468599034, "grad_norm": 1.853583574295044, "learning_rate": 0.001, "loss": 2.112, "step": 153104 }, { "epoch": 13.21256038647343, "grad_norm": 1.3505733013153076, "learning_rate": 0.001, "loss": 2.1158, "step": 153160 }, { "epoch": 13.217391304347826, "grad_norm": 0.6872023344039917, "learning_rate": 0.001, "loss": 2.1535, "step": 153216 }, { "epoch": 13.222222222222221, "grad_norm": 0.7635059356689453, "learning_rate": 0.001, "loss": 2.1599, "step": 153272 }, { "epoch": 13.227053140096618, "grad_norm": 2.9427006244659424, "learning_rate": 0.001, "loss": 2.1642, "step": 153328 }, { "epoch": 13.231884057971014, "grad_norm": 2.549839496612549, "learning_rate": 0.001, "loss": 2.1473, "step": 153384 }, { "epoch": 13.23671497584541, "grad_norm": 0.8575700521469116, "learning_rate": 0.001, "loss": 2.1302, "step": 153440 }, { "epoch": 13.241545893719806, "grad_norm": 3.396904706954956, "learning_rate": 0.001, "loss": 2.111, "step": 153496 }, { "epoch": 13.246376811594203, "grad_norm": 1.2614824771881104, "learning_rate": 0.001, "loss": 2.1113, "step": 153552 }, { "epoch": 13.251207729468598, "grad_norm": 1.2675766944885254, "learning_rate": 0.001, "loss": 2.1043, "step": 153608 }, { "epoch": 13.256038647342995, "grad_norm": 1.7872023582458496, "learning_rate": 0.001, "loss": 2.1061, "step": 153664 }, { "epoch": 13.26086956521739, "grad_norm": 1.1799516677856445, "learning_rate": 0.001, "loss": 2.105, "step": 153720 }, { "epoch": 13.265700483091788, "grad_norm": 1.4408347606658936, "learning_rate": 0.001, "loss": 2.1114, "step": 153776 }, { "epoch": 13.270531400966183, "grad_norm": 0.4788872301578522, "learning_rate": 0.001, "loss": 2.0996, "step": 153832 }, { "epoch": 13.27536231884058, "grad_norm": 0.8686665892601013, "learning_rate": 0.001, "loss": 2.0938, "step": 153888 }, { "epoch": 13.280193236714975, "grad_norm": 2.249607801437378, "learning_rate": 0.001, "loss": 2.108, "step": 153944 }, { "epoch": 13.285024154589372, "grad_norm": 2.659045696258545, "learning_rate": 0.001, "loss": 2.1034, "step": 154000 }, { "epoch": 13.289855072463768, "grad_norm": 2.5369389057159424, "learning_rate": 0.001, "loss": 2.0995, "step": 154056 }, { "epoch": 13.294685990338165, "grad_norm": 2.343069553375244, "learning_rate": 0.001, "loss": 2.1065, "step": 154112 }, { "epoch": 13.29951690821256, "grad_norm": 0.6565819978713989, "learning_rate": 0.001, "loss": 2.1048, "step": 154168 }, { "epoch": 13.304347826086957, "grad_norm": 0.7682580351829529, "learning_rate": 0.001, "loss": 2.1073, "step": 154224 }, { "epoch": 13.309178743961352, "grad_norm": 0.6823811531066895, "learning_rate": 0.001, "loss": 2.1103, "step": 154280 }, { "epoch": 13.31400966183575, "grad_norm": 1.133642554283142, "learning_rate": 0.001, "loss": 2.1158, "step": 154336 }, { "epoch": 13.318840579710145, "grad_norm": 0.7315028309822083, "learning_rate": 0.001, "loss": 2.1124, "step": 154392 }, { "epoch": 13.323671497584542, "grad_norm": 1.7674360275268555, "learning_rate": 0.001, "loss": 2.1057, "step": 154448 }, { "epoch": 13.328502415458937, "grad_norm": 0.7310938239097595, "learning_rate": 0.001, "loss": 2.1181, "step": 154504 }, { "epoch": 13.333333333333334, "grad_norm": 0.9955859780311584, "learning_rate": 0.001, "loss": 2.1203, "step": 154560 }, { "epoch": 13.33816425120773, "grad_norm": 1.5533608198165894, "learning_rate": 0.001, "loss": 2.1115, "step": 154616 }, { "epoch": 13.342995169082126, "grad_norm": 1.0048167705535889, "learning_rate": 0.001, "loss": 2.1044, "step": 154672 }, { "epoch": 13.347826086956522, "grad_norm": 1.2621773481369019, "learning_rate": 0.001, "loss": 2.1072, "step": 154728 }, { "epoch": 13.352657004830919, "grad_norm": 1.8814021348953247, "learning_rate": 0.001, "loss": 2.1007, "step": 154784 }, { "epoch": 13.357487922705314, "grad_norm": 1.6272419691085815, "learning_rate": 0.001, "loss": 2.1043, "step": 154840 }, { "epoch": 13.36231884057971, "grad_norm": 12.52737808227539, "learning_rate": 0.001, "loss": 2.13, "step": 154896 }, { "epoch": 13.367149758454106, "grad_norm": 3.332087516784668, "learning_rate": 0.001, "loss": 2.1573, "step": 154952 }, { "epoch": 13.371980676328503, "grad_norm": 1.649582028388977, "learning_rate": 0.001, "loss": 2.1535, "step": 155008 }, { "epoch": 13.376811594202898, "grad_norm": 1.4080270528793335, "learning_rate": 0.001, "loss": 2.154, "step": 155064 }, { "epoch": 13.381642512077295, "grad_norm": 2.415435314178467, "learning_rate": 0.001, "loss": 2.1437, "step": 155120 }, { "epoch": 13.38647342995169, "grad_norm": 2.4696104526519775, "learning_rate": 0.001, "loss": 2.1283, "step": 155176 }, { "epoch": 13.391304347826088, "grad_norm": 2.0682575702667236, "learning_rate": 0.001, "loss": 2.1201, "step": 155232 }, { "epoch": 13.396135265700483, "grad_norm": 0.7371551990509033, "learning_rate": 0.001, "loss": 2.1356, "step": 155288 }, { "epoch": 13.40096618357488, "grad_norm": 0.8683264255523682, "learning_rate": 0.001, "loss": 2.1353, "step": 155344 }, { "epoch": 13.405797101449275, "grad_norm": 1.6585768461227417, "learning_rate": 0.001, "loss": 2.1448, "step": 155400 }, { "epoch": 13.41062801932367, "grad_norm": 1.6927316188812256, "learning_rate": 0.001, "loss": 2.1311, "step": 155456 }, { "epoch": 13.415458937198068, "grad_norm": 0.7282180190086365, "learning_rate": 0.001, "loss": 2.1277, "step": 155512 }, { "epoch": 13.420289855072463, "grad_norm": 0.9816649556159973, "learning_rate": 0.001, "loss": 2.1294, "step": 155568 }, { "epoch": 13.42512077294686, "grad_norm": 2.6126668453216553, "learning_rate": 0.001, "loss": 2.1448, "step": 155624 }, { "epoch": 13.429951690821255, "grad_norm": 1.0526080131530762, "learning_rate": 0.001, "loss": 2.1339, "step": 155680 }, { "epoch": 13.434782608695652, "grad_norm": 98.00154113769531, "learning_rate": 0.001, "loss": 2.145, "step": 155736 }, { "epoch": 13.439613526570048, "grad_norm": 0.7751989364624023, "learning_rate": 0.001, "loss": 2.1229, "step": 155792 }, { "epoch": 13.444444444444445, "grad_norm": 1.1652582883834839, "learning_rate": 0.001, "loss": 2.1228, "step": 155848 }, { "epoch": 13.44927536231884, "grad_norm": 2.596214532852173, "learning_rate": 0.001, "loss": 2.1247, "step": 155904 }, { "epoch": 13.454106280193237, "grad_norm": 1.4621740579605103, "learning_rate": 0.001, "loss": 2.1096, "step": 155960 }, { "epoch": 13.458937198067632, "grad_norm": 1.411218523979187, "learning_rate": 0.001, "loss": 2.1246, "step": 156016 }, { "epoch": 13.46376811594203, "grad_norm": 1.3023192882537842, "learning_rate": 0.001, "loss": 2.1185, "step": 156072 }, { "epoch": 13.468599033816425, "grad_norm": 0.6995598673820496, "learning_rate": 0.001, "loss": 2.1184, "step": 156128 }, { "epoch": 13.473429951690822, "grad_norm": 3.9683218002319336, "learning_rate": 0.001, "loss": 2.1181, "step": 156184 }, { "epoch": 13.478260869565217, "grad_norm": 0.8429069519042969, "learning_rate": 0.001, "loss": 2.1187, "step": 156240 }, { "epoch": 13.483091787439614, "grad_norm": 1.305495023727417, "learning_rate": 0.001, "loss": 2.1066, "step": 156296 }, { "epoch": 13.48792270531401, "grad_norm": 1.988076090812683, "learning_rate": 0.001, "loss": 2.1105, "step": 156352 }, { "epoch": 13.492753623188406, "grad_norm": 1.893061876296997, "learning_rate": 0.001, "loss": 2.1198, "step": 156408 }, { "epoch": 13.497584541062801, "grad_norm": 1.5898001194000244, "learning_rate": 0.001, "loss": 2.1353, "step": 156464 }, { "epoch": 13.502415458937199, "grad_norm": 2.0219902992248535, "learning_rate": 0.001, "loss": 2.1471, "step": 156520 }, { "epoch": 13.507246376811594, "grad_norm": 2.892352342605591, "learning_rate": 0.001, "loss": 2.1276, "step": 156576 }, { "epoch": 13.51207729468599, "grad_norm": 1.5758299827575684, "learning_rate": 0.001, "loss": 2.1275, "step": 156632 }, { "epoch": 13.516908212560386, "grad_norm": 1.6753467321395874, "learning_rate": 0.001, "loss": 2.1164, "step": 156688 }, { "epoch": 13.521739130434783, "grad_norm": 12.08422565460205, "learning_rate": 0.001, "loss": 2.1133, "step": 156744 }, { "epoch": 13.526570048309178, "grad_norm": 1.9895418882369995, "learning_rate": 0.001, "loss": 2.1286, "step": 156800 }, { "epoch": 13.531400966183575, "grad_norm": 0.9658872485160828, "learning_rate": 0.001, "loss": 2.1262, "step": 156856 }, { "epoch": 13.53623188405797, "grad_norm": 1.978919506072998, "learning_rate": 0.001, "loss": 2.1267, "step": 156912 }, { "epoch": 13.541062801932368, "grad_norm": 0.47410324215888977, "learning_rate": 0.001, "loss": 2.119, "step": 156968 }, { "epoch": 13.545893719806763, "grad_norm": 1.0026180744171143, "learning_rate": 0.001, "loss": 2.1295, "step": 157024 }, { "epoch": 13.55072463768116, "grad_norm": 0.7485067248344421, "learning_rate": 0.001, "loss": 2.1307, "step": 157080 }, { "epoch": 13.555555555555555, "grad_norm": 0.5274030566215515, "learning_rate": 0.001, "loss": 2.1214, "step": 157136 }, { "epoch": 13.560386473429952, "grad_norm": 0.8862578868865967, "learning_rate": 0.001, "loss": 2.116, "step": 157192 }, { "epoch": 13.565217391304348, "grad_norm": 1.3899259567260742, "learning_rate": 0.001, "loss": 2.1215, "step": 157248 }, { "epoch": 13.570048309178745, "grad_norm": 2.7743959426879883, "learning_rate": 0.001, "loss": 2.1156, "step": 157304 }, { "epoch": 13.57487922705314, "grad_norm": 1.5771849155426025, "learning_rate": 0.001, "loss": 2.1199, "step": 157360 }, { "epoch": 13.579710144927537, "grad_norm": 1.2374780178070068, "learning_rate": 0.001, "loss": 2.1087, "step": 157416 }, { "epoch": 13.584541062801932, "grad_norm": 3.2778754234313965, "learning_rate": 0.001, "loss": 2.1208, "step": 157472 }, { "epoch": 13.58937198067633, "grad_norm": 0.86004239320755, "learning_rate": 0.001, "loss": 2.1202, "step": 157528 }, { "epoch": 13.594202898550725, "grad_norm": 1.7727586030960083, "learning_rate": 0.001, "loss": 2.1231, "step": 157584 }, { "epoch": 13.59903381642512, "grad_norm": 3.322543144226074, "learning_rate": 0.001, "loss": 2.1219, "step": 157640 }, { "epoch": 13.603864734299517, "grad_norm": 2.1620090007781982, "learning_rate": 0.001, "loss": 2.112, "step": 157696 }, { "epoch": 13.608695652173914, "grad_norm": 0.5596914887428284, "learning_rate": 0.001, "loss": 2.1121, "step": 157752 }, { "epoch": 13.61352657004831, "grad_norm": 1.358747959136963, "learning_rate": 0.001, "loss": 2.1193, "step": 157808 }, { "epoch": 13.618357487922705, "grad_norm": 2.925055503845215, "learning_rate": 0.001, "loss": 2.1263, "step": 157864 }, { "epoch": 13.623188405797102, "grad_norm": 5.203530311584473, "learning_rate": 0.001, "loss": 2.1318, "step": 157920 }, { "epoch": 13.628019323671497, "grad_norm": 0.6694729924201965, "learning_rate": 0.001, "loss": 2.137, "step": 157976 }, { "epoch": 13.632850241545894, "grad_norm": 1.621677041053772, "learning_rate": 0.001, "loss": 2.129, "step": 158032 }, { "epoch": 13.63768115942029, "grad_norm": 1.223446249961853, "learning_rate": 0.001, "loss": 2.1205, "step": 158088 }, { "epoch": 13.642512077294686, "grad_norm": 1.7448548078536987, "learning_rate": 0.001, "loss": 2.1102, "step": 158144 }, { "epoch": 13.647342995169081, "grad_norm": 2.5689926147460938, "learning_rate": 0.001, "loss": 2.1081, "step": 158200 }, { "epoch": 13.652173913043478, "grad_norm": 1.0271503925323486, "learning_rate": 0.001, "loss": 2.1187, "step": 158256 }, { "epoch": 13.657004830917874, "grad_norm": 0.9414145350456238, "learning_rate": 0.001, "loss": 2.1146, "step": 158312 }, { "epoch": 13.66183574879227, "grad_norm": 1.4684207439422607, "learning_rate": 0.001, "loss": 2.1214, "step": 158368 }, { "epoch": 13.666666666666666, "grad_norm": 2.3854098320007324, "learning_rate": 0.001, "loss": 2.1141, "step": 158424 }, { "epoch": 13.671497584541063, "grad_norm": 1.349774718284607, "learning_rate": 0.001, "loss": 2.1136, "step": 158480 }, { "epoch": 13.676328502415458, "grad_norm": 1.9693864583969116, "learning_rate": 0.001, "loss": 2.1303, "step": 158536 }, { "epoch": 13.681159420289855, "grad_norm": 0.796342670917511, "learning_rate": 0.001, "loss": 2.118, "step": 158592 }, { "epoch": 13.68599033816425, "grad_norm": 1.3799043893814087, "learning_rate": 0.001, "loss": 2.1185, "step": 158648 }, { "epoch": 13.690821256038648, "grad_norm": 2.128614664077759, "learning_rate": 0.001, "loss": 2.1151, "step": 158704 }, { "epoch": 13.695652173913043, "grad_norm": 0.8599096536636353, "learning_rate": 0.001, "loss": 2.1014, "step": 158760 }, { "epoch": 13.70048309178744, "grad_norm": 1.0084664821624756, "learning_rate": 0.001, "loss": 2.0926, "step": 158816 }, { "epoch": 13.705314009661835, "grad_norm": 1.0774191617965698, "learning_rate": 0.001, "loss": 2.0974, "step": 158872 }, { "epoch": 13.710144927536232, "grad_norm": 1.2552127838134766, "learning_rate": 0.001, "loss": 2.1014, "step": 158928 }, { "epoch": 13.714975845410628, "grad_norm": 0.8322638273239136, "learning_rate": 0.001, "loss": 2.1208, "step": 158984 }, { "epoch": 13.719806763285025, "grad_norm": 0.8590813279151917, "learning_rate": 0.001, "loss": 2.1423, "step": 159040 }, { "epoch": 13.72463768115942, "grad_norm": 2.231987953186035, "learning_rate": 0.001, "loss": 2.115, "step": 159096 }, { "epoch": 13.729468599033817, "grad_norm": 0.6681892275810242, "learning_rate": 0.001, "loss": 2.1026, "step": 159152 }, { "epoch": 13.734299516908212, "grad_norm": 1.50006902217865, "learning_rate": 0.001, "loss": 2.1132, "step": 159208 }, { "epoch": 13.73913043478261, "grad_norm": 1.1472631692886353, "learning_rate": 0.001, "loss": 2.1195, "step": 159264 }, { "epoch": 13.743961352657005, "grad_norm": 1.0082118511199951, "learning_rate": 0.001, "loss": 2.1241, "step": 159320 }, { "epoch": 13.748792270531402, "grad_norm": 2.1933016777038574, "learning_rate": 0.001, "loss": 2.1254, "step": 159376 }, { "epoch": 13.753623188405797, "grad_norm": 1.2152806520462036, "learning_rate": 0.001, "loss": 2.1215, "step": 159432 }, { "epoch": 13.758454106280194, "grad_norm": 0.7440258264541626, "learning_rate": 0.001, "loss": 2.1263, "step": 159488 }, { "epoch": 13.76328502415459, "grad_norm": 1.1146398782730103, "learning_rate": 0.001, "loss": 2.1124, "step": 159544 }, { "epoch": 13.768115942028986, "grad_norm": 0.7433760762214661, "learning_rate": 0.001, "loss": 2.1123, "step": 159600 }, { "epoch": 13.772946859903382, "grad_norm": 1.4501004219055176, "learning_rate": 0.001, "loss": 2.115, "step": 159656 }, { "epoch": 13.777777777777779, "grad_norm": 1.6814368963241577, "learning_rate": 0.001, "loss": 2.1142, "step": 159712 }, { "epoch": 13.782608695652174, "grad_norm": 1.9064940214157104, "learning_rate": 0.001, "loss": 2.0953, "step": 159768 }, { "epoch": 13.78743961352657, "grad_norm": 1.6684178113937378, "learning_rate": 0.001, "loss": 2.091, "step": 159824 }, { "epoch": 13.792270531400966, "grad_norm": 0.7099637389183044, "learning_rate": 0.001, "loss": 2.0985, "step": 159880 }, { "epoch": 13.797101449275363, "grad_norm": 0.9087736010551453, "learning_rate": 0.001, "loss": 2.0967, "step": 159936 }, { "epoch": 13.801932367149758, "grad_norm": 0.7317041754722595, "learning_rate": 0.001, "loss": 2.0998, "step": 159992 }, { "epoch": 13.806763285024154, "grad_norm": 0.5762949585914612, "learning_rate": 0.001, "loss": 2.0929, "step": 160048 }, { "epoch": 13.81159420289855, "grad_norm": 0.5864407420158386, "learning_rate": 0.001, "loss": 2.0944, "step": 160104 }, { "epoch": 13.816425120772946, "grad_norm": 0.7257387042045593, "learning_rate": 0.001, "loss": 2.0959, "step": 160160 }, { "epoch": 13.821256038647343, "grad_norm": 1.3213415145874023, "learning_rate": 0.001, "loss": 2.0926, "step": 160216 }, { "epoch": 13.826086956521738, "grad_norm": 1.233480453491211, "learning_rate": 0.001, "loss": 2.0955, "step": 160272 }, { "epoch": 13.830917874396135, "grad_norm": 1.3564972877502441, "learning_rate": 0.001, "loss": 2.0941, "step": 160328 }, { "epoch": 13.83574879227053, "grad_norm": 1.0533579587936401, "learning_rate": 0.001, "loss": 2.0858, "step": 160384 }, { "epoch": 13.840579710144928, "grad_norm": 3.3225958347320557, "learning_rate": 0.001, "loss": 2.0924, "step": 160440 }, { "epoch": 13.845410628019323, "grad_norm": 0.7427592277526855, "learning_rate": 0.001, "loss": 2.0955, "step": 160496 }, { "epoch": 13.85024154589372, "grad_norm": 0.7221114039421082, "learning_rate": 0.001, "loss": 2.1161, "step": 160552 }, { "epoch": 13.855072463768115, "grad_norm": 1.6178414821624756, "learning_rate": 0.001, "loss": 2.0917, "step": 160608 }, { "epoch": 13.859903381642512, "grad_norm": 1.0564931631088257, "learning_rate": 0.001, "loss": 2.0871, "step": 160664 }, { "epoch": 13.864734299516908, "grad_norm": 2.0206096172332764, "learning_rate": 0.001, "loss": 2.0953, "step": 160720 }, { "epoch": 13.869565217391305, "grad_norm": 1.6121093034744263, "learning_rate": 0.001, "loss": 2.1018, "step": 160776 }, { "epoch": 13.8743961352657, "grad_norm": 1.6579445600509644, "learning_rate": 0.001, "loss": 2.0775, "step": 160832 }, { "epoch": 13.879227053140097, "grad_norm": 1.3393802642822266, "learning_rate": 0.001, "loss": 2.0904, "step": 160888 }, { "epoch": 13.884057971014492, "grad_norm": 0.931817889213562, "learning_rate": 0.001, "loss": 2.0982, "step": 160944 }, { "epoch": 13.88888888888889, "grad_norm": 3.4905459880828857, "learning_rate": 0.001, "loss": 2.0944, "step": 161000 }, { "epoch": 13.893719806763285, "grad_norm": 0.8015429973602295, "learning_rate": 0.001, "loss": 2.0962, "step": 161056 }, { "epoch": 13.898550724637682, "grad_norm": 1.297957181930542, "learning_rate": 0.001, "loss": 2.1112, "step": 161112 }, { "epoch": 13.903381642512077, "grad_norm": 0.9302045702934265, "learning_rate": 0.001, "loss": 2.1166, "step": 161168 }, { "epoch": 13.908212560386474, "grad_norm": 1.845407247543335, "learning_rate": 0.001, "loss": 2.1084, "step": 161224 }, { "epoch": 13.91304347826087, "grad_norm": 9.368751525878906, "learning_rate": 0.001, "loss": 2.1229, "step": 161280 }, { "epoch": 13.917874396135266, "grad_norm": 1.476915717124939, "learning_rate": 0.001, "loss": 2.1269, "step": 161336 }, { "epoch": 13.922705314009661, "grad_norm": 1.9280927181243896, "learning_rate": 0.001, "loss": 2.1397, "step": 161392 }, { "epoch": 13.927536231884059, "grad_norm": 1.451669454574585, "learning_rate": 0.001, "loss": 2.1369, "step": 161448 }, { "epoch": 13.932367149758454, "grad_norm": 3.7889719009399414, "learning_rate": 0.001, "loss": 2.1288, "step": 161504 }, { "epoch": 13.93719806763285, "grad_norm": 1.2046613693237305, "learning_rate": 0.001, "loss": 2.145, "step": 161560 }, { "epoch": 13.942028985507246, "grad_norm": 0.908202052116394, "learning_rate": 0.001, "loss": 2.1672, "step": 161616 }, { "epoch": 13.946859903381643, "grad_norm": 2.666133165359497, "learning_rate": 0.001, "loss": 2.1516, "step": 161672 }, { "epoch": 13.951690821256038, "grad_norm": 6.771801948547363, "learning_rate": 0.001, "loss": 2.154, "step": 161728 }, { "epoch": 13.956521739130435, "grad_norm": 4.490646839141846, "learning_rate": 0.001, "loss": 2.1557, "step": 161784 }, { "epoch": 13.96135265700483, "grad_norm": 2.0039854049682617, "learning_rate": 0.001, "loss": 2.1393, "step": 161840 }, { "epoch": 13.966183574879228, "grad_norm": 3.488013505935669, "learning_rate": 0.001, "loss": 2.1505, "step": 161896 }, { "epoch": 13.971014492753623, "grad_norm": 3.316882610321045, "learning_rate": 0.001, "loss": 2.1408, "step": 161952 }, { "epoch": 13.97584541062802, "grad_norm": 1.202447772026062, "learning_rate": 0.001, "loss": 2.1579, "step": 162008 }, { "epoch": 13.980676328502415, "grad_norm": 1.1322122812271118, "learning_rate": 0.001, "loss": 2.1522, "step": 162064 }, { "epoch": 13.985507246376812, "grad_norm": 1.975663423538208, "learning_rate": 0.001, "loss": 2.1347, "step": 162120 }, { "epoch": 13.990338164251208, "grad_norm": 0.9171027541160583, "learning_rate": 0.001, "loss": 2.1297, "step": 162176 }, { "epoch": 13.995169082125603, "grad_norm": 4.064940452575684, "learning_rate": 0.001, "loss": 2.1212, "step": 162232 }, { "epoch": 14.0, "grad_norm": 1.2812541723251343, "learning_rate": 0.001, "loss": 2.1161, "step": 162288 }, { "epoch": 14.004830917874395, "grad_norm": 2.015765428543091, "learning_rate": 0.001, "loss": 2.0894, "step": 162344 }, { "epoch": 14.009661835748792, "grad_norm": 3.5633678436279297, "learning_rate": 0.001, "loss": 2.0815, "step": 162400 }, { "epoch": 14.014492753623188, "grad_norm": 3.0259530544281006, "learning_rate": 0.001, "loss": 2.0797, "step": 162456 }, { "epoch": 14.019323671497585, "grad_norm": 4.439858913421631, "learning_rate": 0.001, "loss": 2.0767, "step": 162512 }, { "epoch": 14.02415458937198, "grad_norm": 2.9818506240844727, "learning_rate": 0.001, "loss": 2.0782, "step": 162568 }, { "epoch": 14.028985507246377, "grad_norm": 1.519334077835083, "learning_rate": 0.001, "loss": 2.0839, "step": 162624 }, { "epoch": 14.033816425120772, "grad_norm": 1.7865744829177856, "learning_rate": 0.001, "loss": 2.0762, "step": 162680 }, { "epoch": 14.03864734299517, "grad_norm": 1.9717751741409302, "learning_rate": 0.001, "loss": 2.0816, "step": 162736 }, { "epoch": 14.043478260869565, "grad_norm": 1.1875979900360107, "learning_rate": 0.001, "loss": 2.1026, "step": 162792 }, { "epoch": 14.048309178743962, "grad_norm": 1.6881129741668701, "learning_rate": 0.001, "loss": 2.1151, "step": 162848 }, { "epoch": 14.053140096618357, "grad_norm": 3.7378671169281006, "learning_rate": 0.001, "loss": 2.1004, "step": 162904 }, { "epoch": 14.057971014492754, "grad_norm": 1.7965749502182007, "learning_rate": 0.001, "loss": 2.1061, "step": 162960 }, { "epoch": 14.06280193236715, "grad_norm": 0.9295099973678589, "learning_rate": 0.001, "loss": 2.1229, "step": 163016 }, { "epoch": 14.067632850241546, "grad_norm": 2.1705219745635986, "learning_rate": 0.001, "loss": 2.1148, "step": 163072 }, { "epoch": 14.072463768115941, "grad_norm": 1.94368314743042, "learning_rate": 0.001, "loss": 2.1011, "step": 163128 }, { "epoch": 14.077294685990339, "grad_norm": 0.7417897582054138, "learning_rate": 0.001, "loss": 2.0961, "step": 163184 }, { "epoch": 14.082125603864734, "grad_norm": 4.0052289962768555, "learning_rate": 0.001, "loss": 2.0778, "step": 163240 }, { "epoch": 14.08695652173913, "grad_norm": 1.0168331861495972, "learning_rate": 0.001, "loss": 2.0787, "step": 163296 }, { "epoch": 14.091787439613526, "grad_norm": 1.8557634353637695, "learning_rate": 0.001, "loss": 2.0787, "step": 163352 }, { "epoch": 14.096618357487923, "grad_norm": 0.3780621290206909, "learning_rate": 0.001, "loss": 2.0791, "step": 163408 }, { "epoch": 14.101449275362318, "grad_norm": 0.5136595368385315, "learning_rate": 0.001, "loss": 2.0753, "step": 163464 }, { "epoch": 14.106280193236715, "grad_norm": 0.9920003414154053, "learning_rate": 0.001, "loss": 2.0765, "step": 163520 }, { "epoch": 14.11111111111111, "grad_norm": 0.8664082884788513, "learning_rate": 0.001, "loss": 2.0759, "step": 163576 }, { "epoch": 14.115942028985508, "grad_norm": 0.57419753074646, "learning_rate": 0.001, "loss": 2.0785, "step": 163632 }, { "epoch": 14.120772946859903, "grad_norm": 1.2664976119995117, "learning_rate": 0.001, "loss": 2.0739, "step": 163688 }, { "epoch": 14.1256038647343, "grad_norm": 0.5676619410514832, "learning_rate": 0.001, "loss": 2.0816, "step": 163744 }, { "epoch": 14.130434782608695, "grad_norm": 0.7228012084960938, "learning_rate": 0.001, "loss": 2.1026, "step": 163800 }, { "epoch": 14.135265700483092, "grad_norm": 2.3994898796081543, "learning_rate": 0.001, "loss": 2.0864, "step": 163856 }, { "epoch": 14.140096618357488, "grad_norm": 0.7544922828674316, "learning_rate": 0.001, "loss": 2.0908, "step": 163912 }, { "epoch": 14.144927536231885, "grad_norm": 0.5263815522193909, "learning_rate": 0.001, "loss": 2.0771, "step": 163968 }, { "epoch": 14.14975845410628, "grad_norm": 3.943847179412842, "learning_rate": 0.001, "loss": 2.0705, "step": 164024 }, { "epoch": 14.154589371980677, "grad_norm": 0.6299681067466736, "learning_rate": 0.001, "loss": 2.0649, "step": 164080 }, { "epoch": 14.159420289855072, "grad_norm": 0.7022569179534912, "learning_rate": 0.001, "loss": 2.0776, "step": 164136 }, { "epoch": 14.16425120772947, "grad_norm": 1.2700905799865723, "learning_rate": 0.001, "loss": 2.0971, "step": 164192 }, { "epoch": 14.169082125603865, "grad_norm": 3.574528217315674, "learning_rate": 0.001, "loss": 2.089, "step": 164248 }, { "epoch": 14.173913043478262, "grad_norm": 20.486120223999023, "learning_rate": 0.001, "loss": 2.0898, "step": 164304 }, { "epoch": 14.178743961352657, "grad_norm": 5.954614639282227, "learning_rate": 0.001, "loss": 2.0926, "step": 164360 }, { "epoch": 14.183574879227054, "grad_norm": 4.658265590667725, "learning_rate": 0.001, "loss": 2.0864, "step": 164416 }, { "epoch": 14.18840579710145, "grad_norm": 0.9387691617012024, "learning_rate": 0.001, "loss": 2.1033, "step": 164472 }, { "epoch": 14.193236714975846, "grad_norm": 0.806334376335144, "learning_rate": 0.001, "loss": 2.1112, "step": 164528 }, { "epoch": 14.198067632850242, "grad_norm": 1.323714017868042, "learning_rate": 0.001, "loss": 2.0923, "step": 164584 }, { "epoch": 14.202898550724637, "grad_norm": 0.9407135248184204, "learning_rate": 0.001, "loss": 2.0884, "step": 164640 }, { "epoch": 14.207729468599034, "grad_norm": 0.5886964201927185, "learning_rate": 0.001, "loss": 2.1017, "step": 164696 }, { "epoch": 14.21256038647343, "grad_norm": 3.506727933883667, "learning_rate": 0.001, "loss": 2.0947, "step": 164752 }, { "epoch": 14.217391304347826, "grad_norm": 1.226180911064148, "learning_rate": 0.001, "loss": 2.0922, "step": 164808 }, { "epoch": 14.222222222222221, "grad_norm": 1.6454286575317383, "learning_rate": 0.001, "loss": 2.0912, "step": 164864 }, { "epoch": 14.227053140096618, "grad_norm": 1.7647241353988647, "learning_rate": 0.001, "loss": 2.0857, "step": 164920 }, { "epoch": 14.231884057971014, "grad_norm": 0.6546169519424438, "learning_rate": 0.001, "loss": 2.0917, "step": 164976 }, { "epoch": 14.23671497584541, "grad_norm": 2.7022316455841064, "learning_rate": 0.001, "loss": 2.0928, "step": 165032 }, { "epoch": 14.241545893719806, "grad_norm": 0.9677340388298035, "learning_rate": 0.001, "loss": 2.0896, "step": 165088 }, { "epoch": 14.246376811594203, "grad_norm": 1.0026935338974, "learning_rate": 0.001, "loss": 2.0921, "step": 165144 }, { "epoch": 14.251207729468598, "grad_norm": 1.4015276432037354, "learning_rate": 0.001, "loss": 2.0867, "step": 165200 }, { "epoch": 14.256038647342995, "grad_norm": 0.4426155090332031, "learning_rate": 0.001, "loss": 2.0857, "step": 165256 }, { "epoch": 14.26086956521739, "grad_norm": 0.7532420754432678, "learning_rate": 0.001, "loss": 2.0797, "step": 165312 }, { "epoch": 14.265700483091788, "grad_norm": 0.9576014876365662, "learning_rate": 0.001, "loss": 2.098, "step": 165368 }, { "epoch": 14.270531400966183, "grad_norm": 1.3186644315719604, "learning_rate": 0.001, "loss": 2.0991, "step": 165424 }, { "epoch": 14.27536231884058, "grad_norm": 1.3279974460601807, "learning_rate": 0.001, "loss": 2.0847, "step": 165480 }, { "epoch": 14.280193236714975, "grad_norm": 1.023114800453186, "learning_rate": 0.001, "loss": 2.102, "step": 165536 }, { "epoch": 14.285024154589372, "grad_norm": 2.983393907546997, "learning_rate": 0.001, "loss": 2.0917, "step": 165592 }, { "epoch": 14.289855072463768, "grad_norm": 0.5952508449554443, "learning_rate": 0.001, "loss": 2.0753, "step": 165648 }, { "epoch": 14.294685990338165, "grad_norm": 1.6369107961654663, "learning_rate": 0.001, "loss": 2.0787, "step": 165704 }, { "epoch": 14.29951690821256, "grad_norm": 0.763378381729126, "learning_rate": 0.001, "loss": 2.0701, "step": 165760 }, { "epoch": 14.304347826086957, "grad_norm": 1.1152658462524414, "learning_rate": 0.001, "loss": 2.079, "step": 165816 }, { "epoch": 14.309178743961352, "grad_norm": 6.66530704498291, "learning_rate": 0.001, "loss": 2.0827, "step": 165872 }, { "epoch": 14.31400966183575, "grad_norm": 0.7748475074768066, "learning_rate": 0.001, "loss": 2.0775, "step": 165928 }, { "epoch": 14.318840579710145, "grad_norm": 1.5329574346542358, "learning_rate": 0.001, "loss": 2.0785, "step": 165984 }, { "epoch": 14.323671497584542, "grad_norm": 1.5411434173583984, "learning_rate": 0.001, "loss": 2.0697, "step": 166040 }, { "epoch": 14.328502415458937, "grad_norm": 0.6459455490112305, "learning_rate": 0.001, "loss": 2.0788, "step": 166096 }, { "epoch": 14.333333333333334, "grad_norm": 0.47820526361465454, "learning_rate": 0.001, "loss": 2.0717, "step": 166152 }, { "epoch": 14.33816425120773, "grad_norm": 0.44646474719047546, "learning_rate": 0.001, "loss": 2.0754, "step": 166208 }, { "epoch": 14.342995169082126, "grad_norm": 0.6516669392585754, "learning_rate": 0.001, "loss": 2.0827, "step": 166264 }, { "epoch": 14.347826086956522, "grad_norm": 0.6498791575431824, "learning_rate": 0.001, "loss": 2.0698, "step": 166320 }, { "epoch": 14.352657004830919, "grad_norm": 1.2011828422546387, "learning_rate": 0.001, "loss": 2.0725, "step": 166376 }, { "epoch": 14.357487922705314, "grad_norm": 0.6755707263946533, "learning_rate": 0.001, "loss": 2.0807, "step": 166432 }, { "epoch": 14.36231884057971, "grad_norm": 1.3804514408111572, "learning_rate": 0.001, "loss": 2.0726, "step": 166488 }, { "epoch": 14.367149758454106, "grad_norm": 1.330936312675476, "learning_rate": 0.001, "loss": 2.065, "step": 166544 }, { "epoch": 14.371980676328503, "grad_norm": 0.6674533486366272, "learning_rate": 0.001, "loss": 2.0773, "step": 166600 }, { "epoch": 14.376811594202898, "grad_norm": 0.43645772337913513, "learning_rate": 0.001, "loss": 2.072, "step": 166656 }, { "epoch": 14.381642512077295, "grad_norm": 0.5057602524757385, "learning_rate": 0.001, "loss": 2.065, "step": 166712 }, { "epoch": 14.38647342995169, "grad_norm": 0.56394362449646, "learning_rate": 0.001, "loss": 2.0707, "step": 166768 }, { "epoch": 14.391304347826088, "grad_norm": 0.6102176904678345, "learning_rate": 0.001, "loss": 2.0657, "step": 166824 }, { "epoch": 14.396135265700483, "grad_norm": 1.0363285541534424, "learning_rate": 0.001, "loss": 2.0723, "step": 166880 }, { "epoch": 14.40096618357488, "grad_norm": 0.7264270186424255, "learning_rate": 0.001, "loss": 2.0628, "step": 166936 }, { "epoch": 14.405797101449275, "grad_norm": 0.8421317338943481, "learning_rate": 0.001, "loss": 2.0742, "step": 166992 }, { "epoch": 14.41062801932367, "grad_norm": 1.6796785593032837, "learning_rate": 0.001, "loss": 2.0681, "step": 167048 }, { "epoch": 14.415458937198068, "grad_norm": 0.9827716946601868, "learning_rate": 0.001, "loss": 2.0591, "step": 167104 }, { "epoch": 14.420289855072463, "grad_norm": 0.8008168339729309, "learning_rate": 0.001, "loss": 2.0641, "step": 167160 }, { "epoch": 14.42512077294686, "grad_norm": 0.6461685299873352, "learning_rate": 0.001, "loss": 2.0627, "step": 167216 }, { "epoch": 14.429951690821255, "grad_norm": 0.5084369778633118, "learning_rate": 0.001, "loss": 2.0543, "step": 167272 }, { "epoch": 14.434782608695652, "grad_norm": 0.3755362033843994, "learning_rate": 0.001, "loss": 2.0592, "step": 167328 }, { "epoch": 14.439613526570048, "grad_norm": 0.9225021004676819, "learning_rate": 0.001, "loss": 2.0512, "step": 167384 }, { "epoch": 14.444444444444445, "grad_norm": 0.40125328302383423, "learning_rate": 0.001, "loss": 2.061, "step": 167440 }, { "epoch": 14.44927536231884, "grad_norm": 2.9441957473754883, "learning_rate": 0.001, "loss": 2.0944, "step": 167496 }, { "epoch": 14.454106280193237, "grad_norm": 3.9784598350524902, "learning_rate": 0.001, "loss": 2.0986, "step": 167552 }, { "epoch": 14.458937198067632, "grad_norm": 0.5904225707054138, "learning_rate": 0.001, "loss": 2.0942, "step": 167608 }, { "epoch": 14.46376811594203, "grad_norm": 0.4019748270511627, "learning_rate": 0.001, "loss": 2.0819, "step": 167664 }, { "epoch": 14.468599033816425, "grad_norm": 0.652597963809967, "learning_rate": 0.001, "loss": 2.0773, "step": 167720 }, { "epoch": 14.473429951690822, "grad_norm": 0.4811539053916931, "learning_rate": 0.001, "loss": 2.0706, "step": 167776 }, { "epoch": 14.478260869565217, "grad_norm": 0.4728718400001526, "learning_rate": 0.001, "loss": 2.0635, "step": 167832 }, { "epoch": 14.483091787439614, "grad_norm": 0.5088309049606323, "learning_rate": 0.001, "loss": 2.0614, "step": 167888 }, { "epoch": 14.48792270531401, "grad_norm": 0.5286533832550049, "learning_rate": 0.001, "loss": 2.0603, "step": 167944 }, { "epoch": 14.492753623188406, "grad_norm": 0.6797974705696106, "learning_rate": 0.001, "loss": 2.0521, "step": 168000 }, { "epoch": 14.497584541062801, "grad_norm": 0.6201916933059692, "learning_rate": 0.001, "loss": 2.075, "step": 168056 }, { "epoch": 14.502415458937199, "grad_norm": 0.552043616771698, "learning_rate": 0.001, "loss": 2.0652, "step": 168112 }, { "epoch": 14.507246376811594, "grad_norm": 0.7064962387084961, "learning_rate": 0.001, "loss": 2.078, "step": 168168 }, { "epoch": 14.51207729468599, "grad_norm": 7.383607864379883, "learning_rate": 0.001, "loss": 2.0809, "step": 168224 }, { "epoch": 14.516908212560386, "grad_norm": 2.657451868057251, "learning_rate": 0.001, "loss": 2.095, "step": 168280 }, { "epoch": 14.521739130434783, "grad_norm": 0.592877209186554, "learning_rate": 0.001, "loss": 2.0939, "step": 168336 }, { "epoch": 14.526570048309178, "grad_norm": 2.029914140701294, "learning_rate": 0.001, "loss": 2.0773, "step": 168392 }, { "epoch": 14.531400966183575, "grad_norm": 0.9684410095214844, "learning_rate": 0.001, "loss": 2.0831, "step": 168448 }, { "epoch": 14.53623188405797, "grad_norm": 1.4609384536743164, "learning_rate": 0.001, "loss": 2.0976, "step": 168504 }, { "epoch": 14.541062801932368, "grad_norm": 0.7181475162506104, "learning_rate": 0.001, "loss": 2.115, "step": 168560 }, { "epoch": 14.545893719806763, "grad_norm": 0.5920982956886292, "learning_rate": 0.001, "loss": 2.1118, "step": 168616 }, { "epoch": 14.55072463768116, "grad_norm": 6.049862861633301, "learning_rate": 0.001, "loss": 2.1096, "step": 168672 }, { "epoch": 14.555555555555555, "grad_norm": 2.6849656105041504, "learning_rate": 0.001, "loss": 2.0984, "step": 168728 }, { "epoch": 14.560386473429952, "grad_norm": 2.490931749343872, "learning_rate": 0.001, "loss": 2.0967, "step": 168784 }, { "epoch": 14.565217391304348, "grad_norm": 0.6823988556861877, "learning_rate": 0.001, "loss": 2.0987, "step": 168840 }, { "epoch": 14.570048309178745, "grad_norm": 1.7376644611358643, "learning_rate": 0.001, "loss": 2.1137, "step": 168896 }, { "epoch": 14.57487922705314, "grad_norm": 1.6851379871368408, "learning_rate": 0.001, "loss": 2.1016, "step": 168952 }, { "epoch": 14.579710144927537, "grad_norm": 1.1215546131134033, "learning_rate": 0.001, "loss": 2.1057, "step": 169008 }, { "epoch": 14.584541062801932, "grad_norm": 1.5103774070739746, "learning_rate": 0.001, "loss": 2.108, "step": 169064 }, { "epoch": 14.58937198067633, "grad_norm": 2.852341890335083, "learning_rate": 0.001, "loss": 2.1345, "step": 169120 }, { "epoch": 14.594202898550725, "grad_norm": 1.6364822387695312, "learning_rate": 0.001, "loss": 2.1329, "step": 169176 }, { "epoch": 14.59903381642512, "grad_norm": 3.4666504859924316, "learning_rate": 0.001, "loss": 2.1331, "step": 169232 }, { "epoch": 14.603864734299517, "grad_norm": 4.621094226837158, "learning_rate": 0.001, "loss": 2.1303, "step": 169288 }, { "epoch": 14.608695652173914, "grad_norm": 2.3316121101379395, "learning_rate": 0.001, "loss": 2.1165, "step": 169344 }, { "epoch": 14.61352657004831, "grad_norm": 2.956366539001465, "learning_rate": 0.001, "loss": 2.1103, "step": 169400 }, { "epoch": 14.618357487922705, "grad_norm": 1.5519510507583618, "learning_rate": 0.001, "loss": 2.1078, "step": 169456 }, { "epoch": 14.623188405797102, "grad_norm": 1.516550064086914, "learning_rate": 0.001, "loss": 2.1029, "step": 169512 }, { "epoch": 14.628019323671497, "grad_norm": 4.001878261566162, "learning_rate": 0.001, "loss": 2.0955, "step": 169568 }, { "epoch": 14.632850241545894, "grad_norm": 1.6552058458328247, "learning_rate": 0.001, "loss": 2.097, "step": 169624 }, { "epoch": 14.63768115942029, "grad_norm": 1.3740285634994507, "learning_rate": 0.001, "loss": 2.0998, "step": 169680 }, { "epoch": 14.642512077294686, "grad_norm": 0.5937606692314148, "learning_rate": 0.001, "loss": 2.0974, "step": 169736 }, { "epoch": 14.647342995169081, "grad_norm": 0.4707351624965668, "learning_rate": 0.001, "loss": 2.1263, "step": 169792 }, { "epoch": 14.652173913043478, "grad_norm": 0.47917288541793823, "learning_rate": 0.001, "loss": 2.1024, "step": 169848 }, { "epoch": 14.657004830917874, "grad_norm": 0.6686074137687683, "learning_rate": 0.001, "loss": 2.0939, "step": 169904 }, { "epoch": 14.66183574879227, "grad_norm": 0.9329943060874939, "learning_rate": 0.001, "loss": 2.089, "step": 169960 }, { "epoch": 14.666666666666666, "grad_norm": 1.7489795684814453, "learning_rate": 0.001, "loss": 2.0847, "step": 170016 }, { "epoch": 14.671497584541063, "grad_norm": 0.6932048201560974, "learning_rate": 0.001, "loss": 2.085, "step": 170072 }, { "epoch": 14.676328502415458, "grad_norm": 0.7575638294219971, "learning_rate": 0.001, "loss": 2.0761, "step": 170128 }, { "epoch": 14.681159420289855, "grad_norm": 0.5367351770401001, "learning_rate": 0.001, "loss": 2.0911, "step": 170184 }, { "epoch": 14.68599033816425, "grad_norm": 0.5035954713821411, "learning_rate": 0.001, "loss": 2.0885, "step": 170240 }, { "epoch": 14.690821256038648, "grad_norm": 0.5958396196365356, "learning_rate": 0.001, "loss": 2.0645, "step": 170296 }, { "epoch": 14.695652173913043, "grad_norm": 0.5565921068191528, "learning_rate": 0.001, "loss": 2.0695, "step": 170352 }, { "epoch": 14.70048309178744, "grad_norm": 0.8306350708007812, "learning_rate": 0.001, "loss": 2.0716, "step": 170408 }, { "epoch": 14.705314009661835, "grad_norm": 0.5590113997459412, "learning_rate": 0.001, "loss": 2.0656, "step": 170464 }, { "epoch": 14.710144927536232, "grad_norm": 0.9812882542610168, "learning_rate": 0.001, "loss": 2.0739, "step": 170520 }, { "epoch": 14.714975845410628, "grad_norm": 1.226194977760315, "learning_rate": 0.001, "loss": 2.0671, "step": 170576 }, { "epoch": 14.719806763285025, "grad_norm": 1.9902857542037964, "learning_rate": 0.001, "loss": 2.0687, "step": 170632 }, { "epoch": 14.72463768115942, "grad_norm": 2.0700135231018066, "learning_rate": 0.001, "loss": 2.1006, "step": 170688 }, { "epoch": 14.729468599033817, "grad_norm": 0.551053524017334, "learning_rate": 0.001, "loss": 2.0866, "step": 170744 }, { "epoch": 14.734299516908212, "grad_norm": 0.5387642979621887, "learning_rate": 0.001, "loss": 2.0741, "step": 170800 }, { "epoch": 14.73913043478261, "grad_norm": 0.39276209473609924, "learning_rate": 0.001, "loss": 2.0791, "step": 170856 }, { "epoch": 14.743961352657005, "grad_norm": 6.496521949768066, "learning_rate": 0.001, "loss": 2.0797, "step": 170912 }, { "epoch": 14.748792270531402, "grad_norm": 0.7262991070747375, "learning_rate": 0.001, "loss": 2.0883, "step": 170968 }, { "epoch": 14.753623188405797, "grad_norm": 3.1189754009246826, "learning_rate": 0.001, "loss": 2.0849, "step": 171024 }, { "epoch": 14.758454106280194, "grad_norm": 0.6200608015060425, "learning_rate": 0.001, "loss": 2.0812, "step": 171080 }, { "epoch": 14.76328502415459, "grad_norm": 0.5921116471290588, "learning_rate": 0.001, "loss": 2.0924, "step": 171136 }, { "epoch": 14.768115942028986, "grad_norm": 3.781170606613159, "learning_rate": 0.001, "loss": 2.0938, "step": 171192 }, { "epoch": 14.772946859903382, "grad_norm": 0.4791521430015564, "learning_rate": 0.001, "loss": 2.0757, "step": 171248 }, { "epoch": 14.777777777777779, "grad_norm": 0.5061606168746948, "learning_rate": 0.001, "loss": 2.0835, "step": 171304 }, { "epoch": 14.782608695652174, "grad_norm": 0.9491889476776123, "learning_rate": 0.001, "loss": 2.0846, "step": 171360 }, { "epoch": 14.78743961352657, "grad_norm": 0.35187625885009766, "learning_rate": 0.001, "loss": 2.0745, "step": 171416 }, { "epoch": 14.792270531400966, "grad_norm": 1.5387614965438843, "learning_rate": 0.001, "loss": 2.0767, "step": 171472 }, { "epoch": 14.797101449275363, "grad_norm": 0.6680706739425659, "learning_rate": 0.001, "loss": 2.0822, "step": 171528 }, { "epoch": 14.801932367149758, "grad_norm": 0.6752028465270996, "learning_rate": 0.001, "loss": 2.0871, "step": 171584 }, { "epoch": 14.806763285024154, "grad_norm": 0.6330510973930359, "learning_rate": 0.001, "loss": 2.0802, "step": 171640 }, { "epoch": 14.81159420289855, "grad_norm": 0.5179868340492249, "learning_rate": 0.001, "loss": 2.0696, "step": 171696 }, { "epoch": 14.816425120772946, "grad_norm": 0.5982415080070496, "learning_rate": 0.001, "loss": 2.0885, "step": 171752 }, { "epoch": 14.821256038647343, "grad_norm": 0.7442541122436523, "learning_rate": 0.001, "loss": 2.0844, "step": 171808 }, { "epoch": 14.826086956521738, "grad_norm": 0.8595708012580872, "learning_rate": 0.001, "loss": 2.0659, "step": 171864 }, { "epoch": 14.830917874396135, "grad_norm": 1.9857449531555176, "learning_rate": 0.001, "loss": 2.0718, "step": 171920 }, { "epoch": 14.83574879227053, "grad_norm": 0.7514676451683044, "learning_rate": 0.001, "loss": 2.0702, "step": 171976 }, { "epoch": 14.840579710144928, "grad_norm": 2.298125982284546, "learning_rate": 0.001, "loss": 2.0668, "step": 172032 }, { "epoch": 14.845410628019323, "grad_norm": 2.7248640060424805, "learning_rate": 0.001, "loss": 2.0513, "step": 172088 }, { "epoch": 14.85024154589372, "grad_norm": 0.5040480494499207, "learning_rate": 0.001, "loss": 2.0721, "step": 172144 }, { "epoch": 14.855072463768115, "grad_norm": 1.8116939067840576, "learning_rate": 0.001, "loss": 2.0494, "step": 172200 }, { "epoch": 14.859903381642512, "grad_norm": 2.097075939178467, "learning_rate": 0.001, "loss": 2.0619, "step": 172256 }, { "epoch": 14.864734299516908, "grad_norm": 1.0462239980697632, "learning_rate": 0.001, "loss": 2.0635, "step": 172312 }, { "epoch": 14.869565217391305, "grad_norm": 1.0059878826141357, "learning_rate": 0.001, "loss": 2.0747, "step": 172368 }, { "epoch": 14.8743961352657, "grad_norm": 0.5482062101364136, "learning_rate": 0.001, "loss": 2.0837, "step": 172424 }, { "epoch": 14.879227053140097, "grad_norm": 0.5053689479827881, "learning_rate": 0.001, "loss": 2.0878, "step": 172480 }, { "epoch": 14.884057971014492, "grad_norm": 0.6957647800445557, "learning_rate": 0.001, "loss": 2.0968, "step": 172536 }, { "epoch": 14.88888888888889, "grad_norm": 1.373155951499939, "learning_rate": 0.001, "loss": 2.0965, "step": 172592 }, { "epoch": 14.893719806763285, "grad_norm": 1.6637959480285645, "learning_rate": 0.001, "loss": 2.0849, "step": 172648 }, { "epoch": 14.898550724637682, "grad_norm": 0.7371339201927185, "learning_rate": 0.001, "loss": 2.0723, "step": 172704 }, { "epoch": 14.903381642512077, "grad_norm": 2.699892044067383, "learning_rate": 0.001, "loss": 2.0744, "step": 172760 }, { "epoch": 14.908212560386474, "grad_norm": 2.290555477142334, "learning_rate": 0.001, "loss": 2.0834, "step": 172816 }, { "epoch": 14.91304347826087, "grad_norm": 3.486701250076294, "learning_rate": 0.001, "loss": 2.11, "step": 172872 }, { "epoch": 14.917874396135266, "grad_norm": 2.754394292831421, "learning_rate": 0.001, "loss": 2.1203, "step": 172928 }, { "epoch": 14.922705314009661, "grad_norm": 2.6272425651550293, "learning_rate": 0.001, "loss": 2.1018, "step": 172984 }, { "epoch": 14.927536231884059, "grad_norm": 1.0866725444793701, "learning_rate": 0.001, "loss": 2.0964, "step": 173040 }, { "epoch": 14.932367149758454, "grad_norm": 0.8815363049507141, "learning_rate": 0.001, "loss": 2.123, "step": 173096 }, { "epoch": 14.93719806763285, "grad_norm": 2.0062432289123535, "learning_rate": 0.001, "loss": 2.1019, "step": 173152 }, { "epoch": 14.942028985507246, "grad_norm": 1.198691725730896, "learning_rate": 0.001, "loss": 2.0836, "step": 173208 }, { "epoch": 14.946859903381643, "grad_norm": 2.507596492767334, "learning_rate": 0.001, "loss": 2.0808, "step": 173264 }, { "epoch": 14.951690821256038, "grad_norm": 0.8321933150291443, "learning_rate": 0.001, "loss": 2.093, "step": 173320 }, { "epoch": 14.956521739130435, "grad_norm": 1.0588809251785278, "learning_rate": 0.001, "loss": 2.1236, "step": 173376 }, { "epoch": 14.96135265700483, "grad_norm": 0.8044658303260803, "learning_rate": 0.001, "loss": 2.1221, "step": 173432 }, { "epoch": 14.966183574879228, "grad_norm": 1.4201046228408813, "learning_rate": 0.001, "loss": 2.0964, "step": 173488 }, { "epoch": 14.971014492753623, "grad_norm": 0.7640172243118286, "learning_rate": 0.001, "loss": 2.0873, "step": 173544 }, { "epoch": 14.97584541062802, "grad_norm": 4.29083776473999, "learning_rate": 0.001, "loss": 2.0898, "step": 173600 }, { "epoch": 14.980676328502415, "grad_norm": 1.1134473085403442, "learning_rate": 0.001, "loss": 2.086, "step": 173656 }, { "epoch": 14.985507246376812, "grad_norm": 1.2478002309799194, "learning_rate": 0.001, "loss": 2.0936, "step": 173712 }, { "epoch": 14.990338164251208, "grad_norm": 1.1813775300979614, "learning_rate": 0.001, "loss": 2.0994, "step": 173768 }, { "epoch": 14.995169082125603, "grad_norm": 2.0440359115600586, "learning_rate": 0.001, "loss": 2.1031, "step": 173824 }, { "epoch": 15.0, "grad_norm": 1.2008017301559448, "learning_rate": 0.001, "loss": 2.1042, "step": 173880 }, { "epoch": 15.004830917874395, "grad_norm": 2.8801169395446777, "learning_rate": 0.001, "loss": 2.0839, "step": 173936 }, { "epoch": 15.009661835748792, "grad_norm": 2.73030161857605, "learning_rate": 0.001, "loss": 2.083, "step": 173992 }, { "epoch": 15.014492753623188, "grad_norm": 15.803240776062012, "learning_rate": 0.001, "loss": 2.0829, "step": 174048 }, { "epoch": 15.019323671497585, "grad_norm": 0.7853598594665527, "learning_rate": 0.001, "loss": 2.0704, "step": 174104 }, { "epoch": 15.02415458937198, "grad_norm": 0.692227303981781, "learning_rate": 0.001, "loss": 2.0774, "step": 174160 }, { "epoch": 15.028985507246377, "grad_norm": 1.200893759727478, "learning_rate": 0.001, "loss": 2.0658, "step": 174216 }, { "epoch": 15.033816425120772, "grad_norm": 2.0408151149749756, "learning_rate": 0.001, "loss": 2.0656, "step": 174272 }, { "epoch": 15.03864734299517, "grad_norm": 1.1072137355804443, "learning_rate": 0.001, "loss": 2.0707, "step": 174328 }, { "epoch": 15.043478260869565, "grad_norm": 3.77675724029541, "learning_rate": 0.001, "loss": 2.0623, "step": 174384 }, { "epoch": 15.048309178743962, "grad_norm": 2.0611681938171387, "learning_rate": 0.001, "loss": 2.0503, "step": 174440 }, { "epoch": 15.053140096618357, "grad_norm": 1.0527141094207764, "learning_rate": 0.001, "loss": 2.0488, "step": 174496 }, { "epoch": 15.057971014492754, "grad_norm": 2.0727038383483887, "learning_rate": 0.001, "loss": 2.0519, "step": 174552 }, { "epoch": 15.06280193236715, "grad_norm": 7.793724536895752, "learning_rate": 0.001, "loss": 2.0593, "step": 174608 }, { "epoch": 15.067632850241546, "grad_norm": 1.8213049173355103, "learning_rate": 0.001, "loss": 2.0536, "step": 174664 }, { "epoch": 15.072463768115941, "grad_norm": 2.0224125385284424, "learning_rate": 0.001, "loss": 2.0852, "step": 174720 }, { "epoch": 15.077294685990339, "grad_norm": 0.8237075805664062, "learning_rate": 0.001, "loss": 2.0908, "step": 174776 }, { "epoch": 15.082125603864734, "grad_norm": 1.308464527130127, "learning_rate": 0.001, "loss": 2.0881, "step": 174832 }, { "epoch": 15.08695652173913, "grad_norm": 0.875292181968689, "learning_rate": 0.001, "loss": 2.0748, "step": 174888 }, { "epoch": 15.091787439613526, "grad_norm": 2.6404037475585938, "learning_rate": 0.001, "loss": 2.0831, "step": 174944 }, { "epoch": 15.096618357487923, "grad_norm": 2.686084032058716, "learning_rate": 0.001, "loss": 2.0669, "step": 175000 }, { "epoch": 15.101449275362318, "grad_norm": 6.5948100090026855, "learning_rate": 0.001, "loss": 2.0577, "step": 175056 }, { "epoch": 15.106280193236715, "grad_norm": 0.43562135100364685, "learning_rate": 0.001, "loss": 2.0688, "step": 175112 }, { "epoch": 15.11111111111111, "grad_norm": 0.8576856255531311, "learning_rate": 0.001, "loss": 2.0677, "step": 175168 }, { "epoch": 15.115942028985508, "grad_norm": 1.9057815074920654, "learning_rate": 0.001, "loss": 2.0662, "step": 175224 }, { "epoch": 15.120772946859903, "grad_norm": 0.6675427556037903, "learning_rate": 0.001, "loss": 2.0771, "step": 175280 }, { "epoch": 15.1256038647343, "grad_norm": 2.9588990211486816, "learning_rate": 0.001, "loss": 2.077, "step": 175336 }, { "epoch": 15.130434782608695, "grad_norm": 3.4082581996917725, "learning_rate": 0.001, "loss": 2.0581, "step": 175392 }, { "epoch": 15.135265700483092, "grad_norm": 0.7630171179771423, "learning_rate": 0.001, "loss": 2.0753, "step": 175448 }, { "epoch": 15.140096618357488, "grad_norm": 26.633541107177734, "learning_rate": 0.001, "loss": 2.0917, "step": 175504 }, { "epoch": 15.144927536231885, "grad_norm": 2.04732084274292, "learning_rate": 0.001, "loss": 2.0741, "step": 175560 }, { "epoch": 15.14975845410628, "grad_norm": 1.5380600690841675, "learning_rate": 0.001, "loss": 2.0726, "step": 175616 }, { "epoch": 15.154589371980677, "grad_norm": 1.9612212181091309, "learning_rate": 0.001, "loss": 2.0644, "step": 175672 }, { "epoch": 15.159420289855072, "grad_norm": 1.4751856327056885, "learning_rate": 0.001, "loss": 2.067, "step": 175728 }, { "epoch": 15.16425120772947, "grad_norm": 1.3604212999343872, "learning_rate": 0.001, "loss": 2.0591, "step": 175784 }, { "epoch": 15.169082125603865, "grad_norm": 0.4815526604652405, "learning_rate": 0.001, "loss": 2.0588, "step": 175840 }, { "epoch": 15.173913043478262, "grad_norm": 0.9640748500823975, "learning_rate": 0.001, "loss": 2.0745, "step": 175896 }, { "epoch": 15.178743961352657, "grad_norm": 1.3795249462127686, "learning_rate": 0.001, "loss": 2.0829, "step": 175952 }, { "epoch": 15.183574879227054, "grad_norm": 0.7823031544685364, "learning_rate": 0.001, "loss": 2.0783, "step": 176008 }, { "epoch": 15.18840579710145, "grad_norm": 0.3591630458831787, "learning_rate": 0.001, "loss": 2.0659, "step": 176064 }, { "epoch": 15.193236714975846, "grad_norm": 1.758555293083191, "learning_rate": 0.001, "loss": 2.0659, "step": 176120 }, { "epoch": 15.198067632850242, "grad_norm": 0.614258885383606, "learning_rate": 0.001, "loss": 2.0427, "step": 176176 }, { "epoch": 15.202898550724637, "grad_norm": 0.8358861804008484, "learning_rate": 0.001, "loss": 2.0504, "step": 176232 }, { "epoch": 15.207729468599034, "grad_norm": 4.243839740753174, "learning_rate": 0.001, "loss": 2.0703, "step": 176288 }, { "epoch": 15.21256038647343, "grad_norm": 3.7503905296325684, "learning_rate": 0.001, "loss": 2.0897, "step": 176344 }, { "epoch": 15.217391304347826, "grad_norm": 1.2701398134231567, "learning_rate": 0.001, "loss": 2.0589, "step": 176400 }, { "epoch": 15.222222222222221, "grad_norm": 1.6691104173660278, "learning_rate": 0.001, "loss": 2.0652, "step": 176456 }, { "epoch": 15.227053140096618, "grad_norm": 0.6347532272338867, "learning_rate": 0.001, "loss": 2.0831, "step": 176512 }, { "epoch": 15.231884057971014, "grad_norm": 1.848062515258789, "learning_rate": 0.001, "loss": 2.0849, "step": 176568 }, { "epoch": 15.23671497584541, "grad_norm": 2.6668789386749268, "learning_rate": 0.001, "loss": 2.0843, "step": 176624 }, { "epoch": 15.241545893719806, "grad_norm": 0.9065186381340027, "learning_rate": 0.001, "loss": 2.0665, "step": 176680 }, { "epoch": 15.246376811594203, "grad_norm": 0.6160411834716797, "learning_rate": 0.001, "loss": 2.0749, "step": 176736 }, { "epoch": 15.251207729468598, "grad_norm": 0.4809981882572174, "learning_rate": 0.001, "loss": 2.0729, "step": 176792 }, { "epoch": 15.256038647342995, "grad_norm": 0.49896150827407837, "learning_rate": 0.001, "loss": 2.074, "step": 176848 }, { "epoch": 15.26086956521739, "grad_norm": 1.1309893131256104, "learning_rate": 0.001, "loss": 2.0759, "step": 176904 }, { "epoch": 15.265700483091788, "grad_norm": 0.8129328489303589, "learning_rate": 0.001, "loss": 2.0633, "step": 176960 }, { "epoch": 15.270531400966183, "grad_norm": 1.58247709274292, "learning_rate": 0.001, "loss": 2.0658, "step": 177016 }, { "epoch": 15.27536231884058, "grad_norm": 2.141803503036499, "learning_rate": 0.001, "loss": 2.0629, "step": 177072 }, { "epoch": 15.280193236714975, "grad_norm": 0.7735856771469116, "learning_rate": 0.001, "loss": 2.066, "step": 177128 }, { "epoch": 15.285024154589372, "grad_norm": 1.1034777164459229, "learning_rate": 0.001, "loss": 2.0656, "step": 177184 }, { "epoch": 15.289855072463768, "grad_norm": 0.8623336553573608, "learning_rate": 0.001, "loss": 2.0688, "step": 177240 }, { "epoch": 15.294685990338165, "grad_norm": 1.3948649168014526, "learning_rate": 0.001, "loss": 2.0788, "step": 177296 }, { "epoch": 15.29951690821256, "grad_norm": 2.8162314891815186, "learning_rate": 0.001, "loss": 2.0871, "step": 177352 }, { "epoch": 15.304347826086957, "grad_norm": 0.8825968503952026, "learning_rate": 0.001, "loss": 2.0728, "step": 177408 }, { "epoch": 15.309178743961352, "grad_norm": 2.773601531982422, "learning_rate": 0.001, "loss": 2.0977, "step": 177464 }, { "epoch": 15.31400966183575, "grad_norm": 0.6833995580673218, "learning_rate": 0.001, "loss": 2.0845, "step": 177520 }, { "epoch": 15.318840579710145, "grad_norm": 1.07160222530365, "learning_rate": 0.001, "loss": 2.0751, "step": 177576 }, { "epoch": 15.323671497584542, "grad_norm": 4.794754505157471, "learning_rate": 0.001, "loss": 2.0675, "step": 177632 }, { "epoch": 15.328502415458937, "grad_norm": 0.7153533101081848, "learning_rate": 0.001, "loss": 2.0563, "step": 177688 }, { "epoch": 15.333333333333334, "grad_norm": 2.763734817504883, "learning_rate": 0.001, "loss": 2.0641, "step": 177744 }, { "epoch": 15.33816425120773, "grad_norm": 2.5734941959381104, "learning_rate": 0.001, "loss": 2.0519, "step": 177800 }, { "epoch": 15.342995169082126, "grad_norm": 1.5281262397766113, "learning_rate": 0.001, "loss": 2.0511, "step": 177856 }, { "epoch": 15.347826086956522, "grad_norm": 3.705000400543213, "learning_rate": 0.001, "loss": 2.056, "step": 177912 }, { "epoch": 15.352657004830919, "grad_norm": 0.9009491205215454, "learning_rate": 0.001, "loss": 2.0577, "step": 177968 }, { "epoch": 15.357487922705314, "grad_norm": 1.4239006042480469, "learning_rate": 0.001, "loss": 2.0628, "step": 178024 }, { "epoch": 15.36231884057971, "grad_norm": 0.815852701663971, "learning_rate": 0.001, "loss": 2.0767, "step": 178080 }, { "epoch": 15.367149758454106, "grad_norm": 1.8801230192184448, "learning_rate": 0.001, "loss": 2.0707, "step": 178136 }, { "epoch": 15.371980676328503, "grad_norm": 1.3395931720733643, "learning_rate": 0.001, "loss": 2.0628, "step": 178192 }, { "epoch": 15.376811594202898, "grad_norm": 0.7946965098381042, "learning_rate": 0.001, "loss": 2.0886, "step": 178248 }, { "epoch": 15.381642512077295, "grad_norm": 4.0611443519592285, "learning_rate": 0.001, "loss": 2.0826, "step": 178304 }, { "epoch": 15.38647342995169, "grad_norm": 13.194400787353516, "learning_rate": 0.001, "loss": 2.0809, "step": 178360 }, { "epoch": 15.391304347826088, "grad_norm": 1.3880815505981445, "learning_rate": 0.001, "loss": 2.0741, "step": 178416 }, { "epoch": 15.396135265700483, "grad_norm": 0.47312793135643005, "learning_rate": 0.001, "loss": 2.0665, "step": 178472 }, { "epoch": 15.40096618357488, "grad_norm": 1.9929115772247314, "learning_rate": 0.001, "loss": 2.077, "step": 178528 }, { "epoch": 15.405797101449275, "grad_norm": 12.217371940612793, "learning_rate": 0.001, "loss": 2.0614, "step": 178584 }, { "epoch": 15.41062801932367, "grad_norm": 1.0362637042999268, "learning_rate": 0.001, "loss": 2.0675, "step": 178640 }, { "epoch": 15.415458937198068, "grad_norm": 1.1414883136749268, "learning_rate": 0.001, "loss": 2.0654, "step": 178696 }, { "epoch": 15.420289855072463, "grad_norm": 1.9589189291000366, "learning_rate": 0.001, "loss": 2.0667, "step": 178752 }, { "epoch": 15.42512077294686, "grad_norm": 1.7324857711791992, "learning_rate": 0.001, "loss": 2.0746, "step": 178808 }, { "epoch": 15.429951690821255, "grad_norm": 0.7011397480964661, "learning_rate": 0.001, "loss": 2.0658, "step": 178864 }, { "epoch": 15.434782608695652, "grad_norm": 10.742341041564941, "learning_rate": 0.001, "loss": 2.0747, "step": 178920 }, { "epoch": 15.439613526570048, "grad_norm": 0.4875578284263611, "learning_rate": 0.001, "loss": 2.0845, "step": 178976 }, { "epoch": 15.444444444444445, "grad_norm": 11.339142799377441, "learning_rate": 0.001, "loss": 2.0809, "step": 179032 }, { "epoch": 15.44927536231884, "grad_norm": 1.1817768812179565, "learning_rate": 0.001, "loss": 2.0765, "step": 179088 }, { "epoch": 15.454106280193237, "grad_norm": 0.7472344636917114, "learning_rate": 0.001, "loss": 2.0891, "step": 179144 }, { "epoch": 15.458937198067632, "grad_norm": 0.4393731653690338, "learning_rate": 0.001, "loss": 2.0817, "step": 179200 }, { "epoch": 15.46376811594203, "grad_norm": 0.7980862259864807, "learning_rate": 0.001, "loss": 2.0828, "step": 179256 }, { "epoch": 15.468599033816425, "grad_norm": 0.6689130663871765, "learning_rate": 0.001, "loss": 2.0651, "step": 179312 }, { "epoch": 15.473429951690822, "grad_norm": 0.6272009611129761, "learning_rate": 0.001, "loss": 2.0844, "step": 179368 }, { "epoch": 15.478260869565217, "grad_norm": 0.5187233090400696, "learning_rate": 0.001, "loss": 2.0872, "step": 179424 }, { "epoch": 15.483091787439614, "grad_norm": 12.676966667175293, "learning_rate": 0.001, "loss": 2.0807, "step": 179480 }, { "epoch": 15.48792270531401, "grad_norm": 6.730846881866455, "learning_rate": 0.001, "loss": 2.0866, "step": 179536 }, { "epoch": 15.492753623188406, "grad_norm": 0.6037222743034363, "learning_rate": 0.001, "loss": 2.0849, "step": 179592 }, { "epoch": 15.497584541062801, "grad_norm": 2.9655251502990723, "learning_rate": 0.001, "loss": 2.0826, "step": 179648 }, { "epoch": 15.502415458937199, "grad_norm": 3.5572848320007324, "learning_rate": 0.001, "loss": 2.0958, "step": 179704 }, { "epoch": 15.507246376811594, "grad_norm": 0.6195342540740967, "learning_rate": 0.001, "loss": 2.098, "step": 179760 }, { "epoch": 15.51207729468599, "grad_norm": 1.8525114059448242, "learning_rate": 0.001, "loss": 2.0848, "step": 179816 }, { "epoch": 15.516908212560386, "grad_norm": 3.5596747398376465, "learning_rate": 0.001, "loss": 2.0785, "step": 179872 }, { "epoch": 15.521739130434783, "grad_norm": 2.71140456199646, "learning_rate": 0.001, "loss": 2.0815, "step": 179928 }, { "epoch": 15.526570048309178, "grad_norm": 4.051966190338135, "learning_rate": 0.001, "loss": 2.0966, "step": 179984 }, { "epoch": 15.531400966183575, "grad_norm": 1.5408616065979004, "learning_rate": 0.001, "loss": 2.1054, "step": 180040 }, { "epoch": 15.53623188405797, "grad_norm": 2.197767734527588, "learning_rate": 0.001, "loss": 2.1243, "step": 180096 }, { "epoch": 15.541062801932368, "grad_norm": 1.8272418975830078, "learning_rate": 0.001, "loss": 2.1012, "step": 180152 }, { "epoch": 15.545893719806763, "grad_norm": 0.5757448077201843, "learning_rate": 0.001, "loss": 2.1124, "step": 180208 }, { "epoch": 15.55072463768116, "grad_norm": 1.2812471389770508, "learning_rate": 0.001, "loss": 2.1236, "step": 180264 }, { "epoch": 15.555555555555555, "grad_norm": 0.48313093185424805, "learning_rate": 0.001, "loss": 2.106, "step": 180320 }, { "epoch": 15.560386473429952, "grad_norm": 0.6699447631835938, "learning_rate": 0.001, "loss": 2.0914, "step": 180376 }, { "epoch": 15.565217391304348, "grad_norm": 0.8164467811584473, "learning_rate": 0.001, "loss": 2.0871, "step": 180432 }, { "epoch": 15.570048309178745, "grad_norm": 0.6131106615066528, "learning_rate": 0.001, "loss": 2.0856, "step": 180488 }, { "epoch": 15.57487922705314, "grad_norm": 0.964442789554596, "learning_rate": 0.001, "loss": 2.076, "step": 180544 }, { "epoch": 15.579710144927537, "grad_norm": 1.0887730121612549, "learning_rate": 0.001, "loss": 2.0736, "step": 180600 }, { "epoch": 15.584541062801932, "grad_norm": 1.0917245149612427, "learning_rate": 0.001, "loss": 2.0707, "step": 180656 }, { "epoch": 15.58937198067633, "grad_norm": 0.9515587687492371, "learning_rate": 0.001, "loss": 2.0601, "step": 180712 }, { "epoch": 15.594202898550725, "grad_norm": 0.5061343908309937, "learning_rate": 0.001, "loss": 2.0612, "step": 180768 }, { "epoch": 15.59903381642512, "grad_norm": 0.8528938889503479, "learning_rate": 0.001, "loss": 2.0606, "step": 180824 }, { "epoch": 15.603864734299517, "grad_norm": 0.5530614256858826, "learning_rate": 0.001, "loss": 2.0586, "step": 180880 }, { "epoch": 15.608695652173914, "grad_norm": 0.5599806904792786, "learning_rate": 0.001, "loss": 2.0673, "step": 180936 }, { "epoch": 15.61352657004831, "grad_norm": 8.718918800354004, "learning_rate": 0.001, "loss": 2.0261, "step": 180992 }, { "epoch": 15.618357487922705, "grad_norm": 5.93067741394043, "learning_rate": 0.001, "loss": 2.0332, "step": 181048 }, { "epoch": 15.623188405797102, "grad_norm": 0.3749951422214508, "learning_rate": 0.001, "loss": 2.0435, "step": 181104 }, { "epoch": 15.628019323671497, "grad_norm": 2.1972835063934326, "learning_rate": 0.001, "loss": 2.0358, "step": 181160 }, { "epoch": 15.632850241545894, "grad_norm": 0.5415869951248169, "learning_rate": 0.001, "loss": 2.0356, "step": 181216 }, { "epoch": 15.63768115942029, "grad_norm": 3.1783981323242188, "learning_rate": 0.001, "loss": 2.0236, "step": 181272 }, { "epoch": 15.642512077294686, "grad_norm": 5.332545280456543, "learning_rate": 0.001, "loss": 2.0392, "step": 181328 }, { "epoch": 15.647342995169081, "grad_norm": 1.6674243211746216, "learning_rate": 0.001, "loss": 2.0346, "step": 181384 }, { "epoch": 15.652173913043478, "grad_norm": 1.740112066268921, "learning_rate": 0.001, "loss": 2.029, "step": 181440 }, { "epoch": 15.657004830917874, "grad_norm": 1.439554214477539, "learning_rate": 0.001, "loss": 2.0336, "step": 181496 }, { "epoch": 15.66183574879227, "grad_norm": 2.1376547813415527, "learning_rate": 0.001, "loss": 2.0321, "step": 181552 }, { "epoch": 15.666666666666666, "grad_norm": 1.2525209188461304, "learning_rate": 0.001, "loss": 2.0593, "step": 181608 }, { "epoch": 15.671497584541063, "grad_norm": 3.7702012062072754, "learning_rate": 0.001, "loss": 2.0545, "step": 181664 }, { "epoch": 15.676328502415458, "grad_norm": 2.050967216491699, "learning_rate": 0.001, "loss": 2.05, "step": 181720 }, { "epoch": 15.681159420289855, "grad_norm": 0.9646165370941162, "learning_rate": 0.001, "loss": 2.0443, "step": 181776 }, { "epoch": 15.68599033816425, "grad_norm": 2.4290432929992676, "learning_rate": 0.001, "loss": 2.0561, "step": 181832 }, { "epoch": 15.690821256038648, "grad_norm": 1.2598652839660645, "learning_rate": 0.001, "loss": 2.0527, "step": 181888 }, { "epoch": 15.695652173913043, "grad_norm": 2.173372983932495, "learning_rate": 0.001, "loss": 2.0657, "step": 181944 }, { "epoch": 15.70048309178744, "grad_norm": 1.6048988103866577, "learning_rate": 0.001, "loss": 2.064, "step": 182000 }, { "epoch": 15.705314009661835, "grad_norm": 81.71548461914062, "learning_rate": 0.001, "loss": 2.0626, "step": 182056 }, { "epoch": 15.710144927536232, "grad_norm": 7.592867851257324, "learning_rate": 0.001, "loss": 2.0513, "step": 182112 }, { "epoch": 15.714975845410628, "grad_norm": 2.293421506881714, "learning_rate": 0.001, "loss": 2.0621, "step": 182168 }, { "epoch": 15.719806763285025, "grad_norm": 3.173964262008667, "learning_rate": 0.001, "loss": 2.0565, "step": 182224 }, { "epoch": 15.72463768115942, "grad_norm": 2.251840114593506, "learning_rate": 0.001, "loss": 2.0606, "step": 182280 }, { "epoch": 15.729468599033817, "grad_norm": 2.4754340648651123, "learning_rate": 0.001, "loss": 2.0664, "step": 182336 }, { "epoch": 15.734299516908212, "grad_norm": 1.4506685733795166, "learning_rate": 0.001, "loss": 2.0561, "step": 182392 }, { "epoch": 15.73913043478261, "grad_norm": 1.3857123851776123, "learning_rate": 0.001, "loss": 2.0484, "step": 182448 }, { "epoch": 15.743961352657005, "grad_norm": 2.828580141067505, "learning_rate": 0.001, "loss": 2.062, "step": 182504 }, { "epoch": 15.748792270531402, "grad_norm": 2.3600261211395264, "learning_rate": 0.001, "loss": 2.0553, "step": 182560 }, { "epoch": 15.753623188405797, "grad_norm": 3.6167361736297607, "learning_rate": 0.001, "loss": 2.0503, "step": 182616 }, { "epoch": 15.758454106280194, "grad_norm": 1.123544692993164, "learning_rate": 0.001, "loss": 2.0508, "step": 182672 }, { "epoch": 15.76328502415459, "grad_norm": 26.867191314697266, "learning_rate": 0.001, "loss": 2.042, "step": 182728 }, { "epoch": 15.768115942028986, "grad_norm": 2.123518228530884, "learning_rate": 0.001, "loss": 2.0407, "step": 182784 }, { "epoch": 15.772946859903382, "grad_norm": 0.5891660451889038, "learning_rate": 0.001, "loss": 2.0321, "step": 182840 }, { "epoch": 15.777777777777779, "grad_norm": 2.4394891262054443, "learning_rate": 0.001, "loss": 2.0383, "step": 182896 }, { "epoch": 15.782608695652174, "grad_norm": 90.72672271728516, "learning_rate": 0.001, "loss": 2.0539, "step": 182952 }, { "epoch": 15.78743961352657, "grad_norm": 0.6709445118904114, "learning_rate": 0.001, "loss": 2.0503, "step": 183008 }, { "epoch": 15.792270531400966, "grad_norm": 0.8341113924980164, "learning_rate": 0.001, "loss": 2.0376, "step": 183064 }, { "epoch": 15.797101449275363, "grad_norm": 0.7178948521614075, "learning_rate": 0.001, "loss": 2.0439, "step": 183120 }, { "epoch": 15.801932367149758, "grad_norm": 1.4892902374267578, "learning_rate": 0.001, "loss": 2.0617, "step": 183176 }, { "epoch": 15.806763285024154, "grad_norm": 1.0534706115722656, "learning_rate": 0.001, "loss": 2.0899, "step": 183232 }, { "epoch": 15.81159420289855, "grad_norm": 9.325037002563477, "learning_rate": 0.001, "loss": 2.0697, "step": 183288 }, { "epoch": 15.816425120772946, "grad_norm": 2.459322929382324, "learning_rate": 0.001, "loss": 2.0634, "step": 183344 }, { "epoch": 15.821256038647343, "grad_norm": 0.5397865772247314, "learning_rate": 0.001, "loss": 2.0555, "step": 183400 }, { "epoch": 15.826086956521738, "grad_norm": 4.8079304695129395, "learning_rate": 0.001, "loss": 2.0689, "step": 183456 }, { "epoch": 15.830917874396135, "grad_norm": 1.0921560525894165, "learning_rate": 0.001, "loss": 2.0678, "step": 183512 }, { "epoch": 15.83574879227053, "grad_norm": 1.0716294050216675, "learning_rate": 0.001, "loss": 2.0594, "step": 183568 }, { "epoch": 15.840579710144928, "grad_norm": 1.8330527544021606, "learning_rate": 0.001, "loss": 2.052, "step": 183624 }, { "epoch": 15.845410628019323, "grad_norm": 0.46384838223457336, "learning_rate": 0.001, "loss": 2.0566, "step": 183680 }, { "epoch": 15.85024154589372, "grad_norm": 0.8690996766090393, "learning_rate": 0.001, "loss": 2.0444, "step": 183736 }, { "epoch": 15.855072463768115, "grad_norm": 0.5121428966522217, "learning_rate": 0.001, "loss": 2.053, "step": 183792 }, { "epoch": 15.859903381642512, "grad_norm": 0.7626178860664368, "learning_rate": 0.001, "loss": 2.0402, "step": 183848 }, { "epoch": 15.864734299516908, "grad_norm": 0.7143154740333557, "learning_rate": 0.001, "loss": 2.031, "step": 183904 }, { "epoch": 15.869565217391305, "grad_norm": 0.6300618052482605, "learning_rate": 0.001, "loss": 2.0429, "step": 183960 }, { "epoch": 15.8743961352657, "grad_norm": 1.8488914966583252, "learning_rate": 0.001, "loss": 2.0487, "step": 184016 }, { "epoch": 15.879227053140097, "grad_norm": 0.43330931663513184, "learning_rate": 0.001, "loss": 2.0502, "step": 184072 }, { "epoch": 15.884057971014492, "grad_norm": 1.4612081050872803, "learning_rate": 0.001, "loss": 2.0494, "step": 184128 }, { "epoch": 15.88888888888889, "grad_norm": 0.44561025500297546, "learning_rate": 0.001, "loss": 2.053, "step": 184184 }, { "epoch": 15.893719806763285, "grad_norm": 0.5649104118347168, "learning_rate": 0.001, "loss": 2.0799, "step": 184240 }, { "epoch": 15.898550724637682, "grad_norm": 0.5883121490478516, "learning_rate": 0.001, "loss": 2.0793, "step": 184296 }, { "epoch": 15.903381642512077, "grad_norm": 0.6387181878089905, "learning_rate": 0.001, "loss": 2.0722, "step": 184352 }, { "epoch": 15.908212560386474, "grad_norm": 0.8768320083618164, "learning_rate": 0.001, "loss": 2.0599, "step": 184408 }, { "epoch": 15.91304347826087, "grad_norm": 0.44215306639671326, "learning_rate": 0.001, "loss": 2.0465, "step": 184464 }, { "epoch": 15.917874396135266, "grad_norm": 0.7613080739974976, "learning_rate": 0.001, "loss": 2.055, "step": 184520 }, { "epoch": 15.922705314009661, "grad_norm": 0.6234574317932129, "learning_rate": 0.001, "loss": 2.0388, "step": 184576 }, { "epoch": 15.927536231884059, "grad_norm": 1.9232505559921265, "learning_rate": 0.001, "loss": 2.047, "step": 184632 }, { "epoch": 15.932367149758454, "grad_norm": 0.6034908890724182, "learning_rate": 0.001, "loss": 2.0612, "step": 184688 }, { "epoch": 15.93719806763285, "grad_norm": 1.0975749492645264, "learning_rate": 0.001, "loss": 2.0643, "step": 184744 }, { "epoch": 15.942028985507246, "grad_norm": 0.9936833381652832, "learning_rate": 0.001, "loss": 2.0755, "step": 184800 }, { "epoch": 15.946859903381643, "grad_norm": 4.649092674255371, "learning_rate": 0.001, "loss": 2.0814, "step": 184856 }, { "epoch": 15.951690821256038, "grad_norm": 1.0066566467285156, "learning_rate": 0.001, "loss": 2.0794, "step": 184912 }, { "epoch": 15.956521739130435, "grad_norm": 0.5824914574623108, "learning_rate": 0.001, "loss": 2.0828, "step": 184968 }, { "epoch": 15.96135265700483, "grad_norm": 0.7705357670783997, "learning_rate": 0.001, "loss": 2.0767, "step": 185024 }, { "epoch": 15.966183574879228, "grad_norm": 1.243199348449707, "learning_rate": 0.001, "loss": 2.0838, "step": 185080 }, { "epoch": 15.971014492753623, "grad_norm": 1.8424952030181885, "learning_rate": 0.001, "loss": 2.0818, "step": 185136 }, { "epoch": 15.97584541062802, "grad_norm": 0.6935544013977051, "learning_rate": 0.001, "loss": 2.0778, "step": 185192 }, { "epoch": 15.980676328502415, "grad_norm": 1.2499427795410156, "learning_rate": 0.001, "loss": 2.0822, "step": 185248 }, { "epoch": 15.985507246376812, "grad_norm": 2.3306241035461426, "learning_rate": 0.001, "loss": 2.0778, "step": 185304 }, { "epoch": 15.990338164251208, "grad_norm": 1.9763392210006714, "learning_rate": 0.001, "loss": 2.0803, "step": 185360 }, { "epoch": 15.995169082125603, "grad_norm": 3.810502290725708, "learning_rate": 0.001, "loss": 2.1005, "step": 185416 }, { "epoch": 16.0, "grad_norm": 4.783773899078369, "learning_rate": 0.001, "loss": 2.0863, "step": 185472 }, { "epoch": 16.004830917874397, "grad_norm": 1.484560489654541, "learning_rate": 0.001, "loss": 2.0592, "step": 185528 }, { "epoch": 16.00966183574879, "grad_norm": 4.203853607177734, "learning_rate": 0.001, "loss": 2.0676, "step": 185584 }, { "epoch": 16.014492753623188, "grad_norm": 0.35111045837402344, "learning_rate": 0.001, "loss": 2.0543, "step": 185640 }, { "epoch": 16.019323671497585, "grad_norm": 1.1685441732406616, "learning_rate": 0.001, "loss": 2.0656, "step": 185696 }, { "epoch": 16.02415458937198, "grad_norm": 1.0759357213974, "learning_rate": 0.001, "loss": 2.0555, "step": 185752 }, { "epoch": 16.028985507246375, "grad_norm": 1.6144546270370483, "learning_rate": 0.001, "loss": 2.0457, "step": 185808 }, { "epoch": 16.033816425120772, "grad_norm": 0.6961482167243958, "learning_rate": 0.001, "loss": 2.0396, "step": 185864 }, { "epoch": 16.03864734299517, "grad_norm": 1.4635748863220215, "learning_rate": 0.001, "loss": 2.0424, "step": 185920 }, { "epoch": 16.043478260869566, "grad_norm": 2.843299150466919, "learning_rate": 0.001, "loss": 2.049, "step": 185976 }, { "epoch": 16.04830917874396, "grad_norm": 0.6566775441169739, "learning_rate": 0.001, "loss": 2.0586, "step": 186032 }, { "epoch": 16.053140096618357, "grad_norm": 2.746415376663208, "learning_rate": 0.001, "loss": 2.0514, "step": 186088 }, { "epoch": 16.057971014492754, "grad_norm": 0.7152809500694275, "learning_rate": 0.001, "loss": 2.0417, "step": 186144 }, { "epoch": 16.06280193236715, "grad_norm": 0.7769956588745117, "learning_rate": 0.001, "loss": 2.04, "step": 186200 }, { "epoch": 16.067632850241544, "grad_norm": 0.5652279853820801, "learning_rate": 0.001, "loss": 2.0417, "step": 186256 }, { "epoch": 16.07246376811594, "grad_norm": 0.34586387872695923, "learning_rate": 0.001, "loss": 2.0478, "step": 186312 }, { "epoch": 16.07729468599034, "grad_norm": 0.9556130766868591, "learning_rate": 0.001, "loss": 2.0387, "step": 186368 }, { "epoch": 16.082125603864736, "grad_norm": 2.0306713581085205, "learning_rate": 0.001, "loss": 2.0405, "step": 186424 }, { "epoch": 16.08695652173913, "grad_norm": 0.4355758726596832, "learning_rate": 0.001, "loss": 2.024, "step": 186480 }, { "epoch": 16.091787439613526, "grad_norm": 2.6172306537628174, "learning_rate": 0.001, "loss": 2.0269, "step": 186536 }, { "epoch": 16.096618357487923, "grad_norm": 2.0479471683502197, "learning_rate": 0.001, "loss": 2.0365, "step": 186592 }, { "epoch": 16.10144927536232, "grad_norm": 1.1867527961730957, "learning_rate": 0.001, "loss": 2.0342, "step": 186648 }, { "epoch": 16.106280193236714, "grad_norm": 4.775577545166016, "learning_rate": 0.001, "loss": 2.0365, "step": 186704 }, { "epoch": 16.11111111111111, "grad_norm": 0.986009955406189, "learning_rate": 0.001, "loss": 2.0305, "step": 186760 }, { "epoch": 16.115942028985508, "grad_norm": 14.898215293884277, "learning_rate": 0.001, "loss": 2.0392, "step": 186816 }, { "epoch": 16.120772946859905, "grad_norm": 5.029755592346191, "learning_rate": 0.001, "loss": 2.0429, "step": 186872 }, { "epoch": 16.1256038647343, "grad_norm": 1.3929964303970337, "learning_rate": 0.001, "loss": 2.0507, "step": 186928 }, { "epoch": 16.130434782608695, "grad_norm": 1.0490055084228516, "learning_rate": 0.001, "loss": 2.0768, "step": 186984 }, { "epoch": 16.135265700483092, "grad_norm": 2.575105905532837, "learning_rate": 0.001, "loss": 2.0654, "step": 187040 }, { "epoch": 16.14009661835749, "grad_norm": 2.0277960300445557, "learning_rate": 0.001, "loss": 2.0672, "step": 187096 }, { "epoch": 16.144927536231883, "grad_norm": 1.6006652116775513, "learning_rate": 0.001, "loss": 2.0845, "step": 187152 }, { "epoch": 16.14975845410628, "grad_norm": 0.9333575367927551, "learning_rate": 0.001, "loss": 2.0924, "step": 187208 }, { "epoch": 16.154589371980677, "grad_norm": 1.1374971866607666, "learning_rate": 0.001, "loss": 2.0983, "step": 187264 }, { "epoch": 16.159420289855074, "grad_norm": 1.6902309656143188, "learning_rate": 0.001, "loss": 2.0883, "step": 187320 }, { "epoch": 16.164251207729468, "grad_norm": 0.85816490650177, "learning_rate": 0.001, "loss": 2.0724, "step": 187376 }, { "epoch": 16.169082125603865, "grad_norm": 1.3843284845352173, "learning_rate": 0.001, "loss": 2.0681, "step": 187432 }, { "epoch": 16.17391304347826, "grad_norm": 0.9836299419403076, "learning_rate": 0.001, "loss": 2.0528, "step": 187488 }, { "epoch": 16.17874396135266, "grad_norm": 12.954838752746582, "learning_rate": 0.001, "loss": 2.0579, "step": 187544 }, { "epoch": 16.183574879227052, "grad_norm": 0.7564798593521118, "learning_rate": 0.001, "loss": 2.0536, "step": 187600 }, { "epoch": 16.18840579710145, "grad_norm": 4.486562728881836, "learning_rate": 0.001, "loss": 2.0483, "step": 187656 }, { "epoch": 16.193236714975846, "grad_norm": 1.0050679445266724, "learning_rate": 0.001, "loss": 2.0481, "step": 187712 }, { "epoch": 16.19806763285024, "grad_norm": 1.318315029144287, "learning_rate": 0.001, "loss": 2.0369, "step": 187768 }, { "epoch": 16.202898550724637, "grad_norm": 1.0909481048583984, "learning_rate": 0.001, "loss": 2.0535, "step": 187824 }, { "epoch": 16.207729468599034, "grad_norm": 0.6456713080406189, "learning_rate": 0.001, "loss": 2.0562, "step": 187880 }, { "epoch": 16.21256038647343, "grad_norm": 0.7630131840705872, "learning_rate": 0.001, "loss": 2.0512, "step": 187936 }, { "epoch": 16.217391304347824, "grad_norm": 0.6994858980178833, "learning_rate": 0.001, "loss": 2.0462, "step": 187992 }, { "epoch": 16.22222222222222, "grad_norm": 138.99832153320312, "learning_rate": 0.001, "loss": 2.0429, "step": 188048 }, { "epoch": 16.22705314009662, "grad_norm": 1.063202977180481, "learning_rate": 0.001, "loss": 2.0385, "step": 188104 }, { "epoch": 16.231884057971016, "grad_norm": 1.0482465028762817, "learning_rate": 0.001, "loss": 2.0369, "step": 188160 }, { "epoch": 16.23671497584541, "grad_norm": 0.6204719543457031, "learning_rate": 0.001, "loss": 2.0325, "step": 188216 }, { "epoch": 16.241545893719806, "grad_norm": 0.5433629155158997, "learning_rate": 0.001, "loss": 2.0473, "step": 188272 }, { "epoch": 16.246376811594203, "grad_norm": 0.6457331776618958, "learning_rate": 0.001, "loss": 2.0499, "step": 188328 }, { "epoch": 16.2512077294686, "grad_norm": 0.9773275256156921, "learning_rate": 0.001, "loss": 2.0747, "step": 188384 }, { "epoch": 16.256038647342994, "grad_norm": 1.3162868022918701, "learning_rate": 0.001, "loss": 2.076, "step": 188440 }, { "epoch": 16.26086956521739, "grad_norm": 2.528379201889038, "learning_rate": 0.001, "loss": 2.0655, "step": 188496 }, { "epoch": 16.265700483091788, "grad_norm": 3.2349584102630615, "learning_rate": 0.001, "loss": 2.0576, "step": 188552 }, { "epoch": 16.270531400966185, "grad_norm": 13.773297309875488, "learning_rate": 0.001, "loss": 2.0565, "step": 188608 }, { "epoch": 16.27536231884058, "grad_norm": 3543.341552734375, "learning_rate": 0.001, "loss": 2.0575, "step": 188664 }, { "epoch": 16.280193236714975, "grad_norm": 1.4820069074630737, "learning_rate": 0.001, "loss": 2.0496, "step": 188720 }, { "epoch": 16.285024154589372, "grad_norm": 1.4564237594604492, "learning_rate": 0.001, "loss": 2.0477, "step": 188776 }, { "epoch": 16.28985507246377, "grad_norm": 1.422391414642334, "learning_rate": 0.001, "loss": 2.0393, "step": 188832 }, { "epoch": 16.294685990338163, "grad_norm": 1.2921226024627686, "learning_rate": 0.001, "loss": 2.0425, "step": 188888 }, { "epoch": 16.29951690821256, "grad_norm": 2.743957042694092, "learning_rate": 0.001, "loss": 2.0581, "step": 188944 }, { "epoch": 16.304347826086957, "grad_norm": 0.8751780986785889, "learning_rate": 0.001, "loss": 2.0667, "step": 189000 }, { "epoch": 16.309178743961354, "grad_norm": 1.3919097185134888, "learning_rate": 0.001, "loss": 2.0556, "step": 189056 }, { "epoch": 16.314009661835748, "grad_norm": 3.9211606979370117, "learning_rate": 0.001, "loss": 2.0602, "step": 189112 }, { "epoch": 16.318840579710145, "grad_norm": 4.1606597900390625, "learning_rate": 0.001, "loss": 2.0558, "step": 189168 }, { "epoch": 16.32367149758454, "grad_norm": 5.697816848754883, "learning_rate": 0.001, "loss": 2.0694, "step": 189224 }, { "epoch": 16.32850241545894, "grad_norm": 2.9886012077331543, "learning_rate": 0.001, "loss": 2.0666, "step": 189280 }, { "epoch": 16.333333333333332, "grad_norm": 2.0707736015319824, "learning_rate": 0.001, "loss": 2.0578, "step": 189336 }, { "epoch": 16.33816425120773, "grad_norm": 2.8228771686553955, "learning_rate": 0.001, "loss": 2.0639, "step": 189392 }, { "epoch": 16.342995169082126, "grad_norm": 1.3293325901031494, "learning_rate": 0.001, "loss": 2.0677, "step": 189448 }, { "epoch": 16.347826086956523, "grad_norm": 1.0883820056915283, "learning_rate": 0.001, "loss": 2.0676, "step": 189504 }, { "epoch": 16.352657004830917, "grad_norm": 0.9181184768676758, "learning_rate": 0.001, "loss": 2.0579, "step": 189560 }, { "epoch": 16.357487922705314, "grad_norm": 2.282514810562134, "learning_rate": 0.001, "loss": 2.0515, "step": 189616 }, { "epoch": 16.36231884057971, "grad_norm": 0.8217980265617371, "learning_rate": 0.001, "loss": 2.0494, "step": 189672 }, { "epoch": 16.367149758454108, "grad_norm": 0.9382248520851135, "learning_rate": 0.001, "loss": 2.0657, "step": 189728 }, { "epoch": 16.3719806763285, "grad_norm": 7.725281715393066, "learning_rate": 0.001, "loss": 2.0618, "step": 189784 }, { "epoch": 16.3768115942029, "grad_norm": 1.3862109184265137, "learning_rate": 0.001, "loss": 2.0495, "step": 189840 }, { "epoch": 16.381642512077295, "grad_norm": 2.093127965927124, "learning_rate": 0.001, "loss": 2.0524, "step": 189896 }, { "epoch": 16.386473429951693, "grad_norm": 2.0543019771575928, "learning_rate": 0.001, "loss": 2.0407, "step": 189952 }, { "epoch": 16.391304347826086, "grad_norm": 1.3897541761398315, "learning_rate": 0.001, "loss": 2.0543, "step": 190008 }, { "epoch": 16.396135265700483, "grad_norm": 2.6832361221313477, "learning_rate": 0.001, "loss": 2.0392, "step": 190064 }, { "epoch": 16.40096618357488, "grad_norm": 0.3399752378463745, "learning_rate": 0.001, "loss": 2.049, "step": 190120 }, { "epoch": 16.405797101449274, "grad_norm": 1.0992438793182373, "learning_rate": 0.001, "loss": 2.0484, "step": 190176 }, { "epoch": 16.41062801932367, "grad_norm": 0.8764998912811279, "learning_rate": 0.001, "loss": 2.0668, "step": 190232 }, { "epoch": 16.415458937198068, "grad_norm": 2.9381978511810303, "learning_rate": 0.001, "loss": 2.0503, "step": 190288 }, { "epoch": 16.420289855072465, "grad_norm": 0.549014151096344, "learning_rate": 0.001, "loss": 2.058, "step": 190344 }, { "epoch": 16.42512077294686, "grad_norm": 0.6904180645942688, "learning_rate": 0.001, "loss": 2.0393, "step": 190400 }, { "epoch": 16.429951690821255, "grad_norm": 1.2345902919769287, "learning_rate": 0.001, "loss": 2.0372, "step": 190456 }, { "epoch": 16.434782608695652, "grad_norm": 1.625508427619934, "learning_rate": 0.001, "loss": 2.0402, "step": 190512 }, { "epoch": 16.43961352657005, "grad_norm": 1.1619187593460083, "learning_rate": 0.001, "loss": 2.0462, "step": 190568 }, { "epoch": 16.444444444444443, "grad_norm": 0.6694504022598267, "learning_rate": 0.001, "loss": 2.0406, "step": 190624 }, { "epoch": 16.44927536231884, "grad_norm": 6.3962860107421875, "learning_rate": 0.001, "loss": 2.0441, "step": 190680 }, { "epoch": 16.454106280193237, "grad_norm": 0.7398471832275391, "learning_rate": 0.001, "loss": 2.0471, "step": 190736 }, { "epoch": 16.458937198067634, "grad_norm": 1.052964448928833, "learning_rate": 0.001, "loss": 2.0464, "step": 190792 }, { "epoch": 16.463768115942027, "grad_norm": 3.402850866317749, "learning_rate": 0.001, "loss": 2.0506, "step": 190848 }, { "epoch": 16.468599033816425, "grad_norm": 0.5928205847740173, "learning_rate": 0.001, "loss": 2.045, "step": 190904 }, { "epoch": 16.47342995169082, "grad_norm": 1.0342724323272705, "learning_rate": 0.001, "loss": 2.0449, "step": 190960 }, { "epoch": 16.47826086956522, "grad_norm": 1.663536548614502, "learning_rate": 0.001, "loss": 2.0497, "step": 191016 }, { "epoch": 16.483091787439612, "grad_norm": 0.3610554039478302, "learning_rate": 0.001, "loss": 2.0368, "step": 191072 }, { "epoch": 16.48792270531401, "grad_norm": 1.7068403959274292, "learning_rate": 0.001, "loss": 2.0324, "step": 191128 }, { "epoch": 16.492753623188406, "grad_norm": 1.5902267694473267, "learning_rate": 0.001, "loss": 2.0276, "step": 191184 }, { "epoch": 16.497584541062803, "grad_norm": 0.4901221990585327, "learning_rate": 0.001, "loss": 2.0246, "step": 191240 }, { "epoch": 16.502415458937197, "grad_norm": 0.8506833910942078, "learning_rate": 0.001, "loss": 2.0389, "step": 191296 }, { "epoch": 16.507246376811594, "grad_norm": 1.642460584640503, "learning_rate": 0.001, "loss": 2.0334, "step": 191352 }, { "epoch": 16.51207729468599, "grad_norm": 12.916961669921875, "learning_rate": 0.001, "loss": 2.0428, "step": 191408 }, { "epoch": 16.516908212560388, "grad_norm": 0.913677990436554, "learning_rate": 0.001, "loss": 2.0574, "step": 191464 }, { "epoch": 16.52173913043478, "grad_norm": 0.5878140926361084, "learning_rate": 0.001, "loss": 2.0541, "step": 191520 }, { "epoch": 16.52657004830918, "grad_norm": 2.80161452293396, "learning_rate": 0.001, "loss": 2.0525, "step": 191576 }, { "epoch": 16.531400966183575, "grad_norm": 0.6691017150878906, "learning_rate": 0.001, "loss": 2.0512, "step": 191632 }, { "epoch": 16.536231884057973, "grad_norm": 7.426950931549072, "learning_rate": 0.001, "loss": 2.0578, "step": 191688 }, { "epoch": 16.541062801932366, "grad_norm": 2.8254494667053223, "learning_rate": 0.001, "loss": 2.0597, "step": 191744 }, { "epoch": 16.545893719806763, "grad_norm": 1.4224467277526855, "learning_rate": 0.001, "loss": 2.0549, "step": 191800 }, { "epoch": 16.55072463768116, "grad_norm": 0.7792167663574219, "learning_rate": 0.001, "loss": 2.0587, "step": 191856 }, { "epoch": 16.555555555555557, "grad_norm": 2.31687331199646, "learning_rate": 0.001, "loss": 2.0427, "step": 191912 }, { "epoch": 16.56038647342995, "grad_norm": 1.6561695337295532, "learning_rate": 0.001, "loss": 2.0321, "step": 191968 }, { "epoch": 16.565217391304348, "grad_norm": 1.8316993713378906, "learning_rate": 0.001, "loss": 2.0397, "step": 192024 }, { "epoch": 16.570048309178745, "grad_norm": 0.8019382953643799, "learning_rate": 0.001, "loss": 2.0575, "step": 192080 }, { "epoch": 16.57487922705314, "grad_norm": 1.310551404953003, "learning_rate": 0.001, "loss": 2.0515, "step": 192136 }, { "epoch": 16.579710144927535, "grad_norm": 0.566013514995575, "learning_rate": 0.001, "loss": 2.0388, "step": 192192 }, { "epoch": 16.584541062801932, "grad_norm": 0.9311992526054382, "learning_rate": 0.001, "loss": 2.0622, "step": 192248 }, { "epoch": 16.58937198067633, "grad_norm": 2.2935314178466797, "learning_rate": 0.001, "loss": 2.0571, "step": 192304 }, { "epoch": 16.594202898550726, "grad_norm": 1.7149572372436523, "learning_rate": 0.001, "loss": 2.045, "step": 192360 }, { "epoch": 16.59903381642512, "grad_norm": 0.6001467704772949, "learning_rate": 0.001, "loss": 2.0345, "step": 192416 }, { "epoch": 16.603864734299517, "grad_norm": 31.393375396728516, "learning_rate": 0.001, "loss": 2.0385, "step": 192472 }, { "epoch": 16.608695652173914, "grad_norm": 1.7302800416946411, "learning_rate": 0.001, "loss": 2.0372, "step": 192528 }, { "epoch": 16.613526570048307, "grad_norm": 0.5918297171592712, "learning_rate": 0.001, "loss": 2.0434, "step": 192584 }, { "epoch": 16.618357487922705, "grad_norm": 0.4707130193710327, "learning_rate": 0.001, "loss": 2.0492, "step": 192640 }, { "epoch": 16.6231884057971, "grad_norm": 0.7781787514686584, "learning_rate": 0.001, "loss": 2.0441, "step": 192696 }, { "epoch": 16.6280193236715, "grad_norm": 0.3937075436115265, "learning_rate": 0.001, "loss": 2.0464, "step": 192752 }, { "epoch": 16.632850241545892, "grad_norm": 0.6312329769134521, "learning_rate": 0.001, "loss": 2.0421, "step": 192808 }, { "epoch": 16.63768115942029, "grad_norm": 0.47556814551353455, "learning_rate": 0.001, "loss": 2.0431, "step": 192864 }, { "epoch": 16.642512077294686, "grad_norm": 2.72989559173584, "learning_rate": 0.001, "loss": 2.042, "step": 192920 }, { "epoch": 16.647342995169083, "grad_norm": 0.5594395995140076, "learning_rate": 0.001, "loss": 2.0633, "step": 192976 }, { "epoch": 16.652173913043477, "grad_norm": 0.4032968580722809, "learning_rate": 0.001, "loss": 2.0592, "step": 193032 }, { "epoch": 16.657004830917874, "grad_norm": 0.3391912281513214, "learning_rate": 0.001, "loss": 2.056, "step": 193088 }, { "epoch": 16.66183574879227, "grad_norm": 1.115797996520996, "learning_rate": 0.001, "loss": 2.0545, "step": 193144 }, { "epoch": 16.666666666666668, "grad_norm": 0.47218626737594604, "learning_rate": 0.001, "loss": 2.0448, "step": 193200 }, { "epoch": 16.67149758454106, "grad_norm": 0.4615825116634369, "learning_rate": 0.001, "loss": 2.0393, "step": 193256 }, { "epoch": 16.67632850241546, "grad_norm": 0.34725797176361084, "learning_rate": 0.001, "loss": 2.0459, "step": 193312 }, { "epoch": 16.681159420289855, "grad_norm": 0.9175338745117188, "learning_rate": 0.001, "loss": 2.0455, "step": 193368 }, { "epoch": 16.685990338164252, "grad_norm": 0.4513278901576996, "learning_rate": 0.001, "loss": 2.0422, "step": 193424 }, { "epoch": 16.690821256038646, "grad_norm": 0.7281111478805542, "learning_rate": 0.001, "loss": 2.0489, "step": 193480 }, { "epoch": 16.695652173913043, "grad_norm": 1.4144949913024902, "learning_rate": 0.001, "loss": 2.0409, "step": 193536 }, { "epoch": 16.70048309178744, "grad_norm": 0.3072492778301239, "learning_rate": 0.001, "loss": 2.0291, "step": 193592 }, { "epoch": 16.705314009661837, "grad_norm": 0.5312104821205139, "learning_rate": 0.001, "loss": 2.0364, "step": 193648 }, { "epoch": 16.71014492753623, "grad_norm": 0.6134706735610962, "learning_rate": 0.001, "loss": 2.0323, "step": 193704 }, { "epoch": 16.714975845410628, "grad_norm": 0.5133123397827148, "learning_rate": 0.001, "loss": 2.0237, "step": 193760 }, { "epoch": 16.719806763285025, "grad_norm": 0.7568499445915222, "learning_rate": 0.001, "loss": 2.0234, "step": 193816 }, { "epoch": 16.72463768115942, "grad_norm": 0.6134992837905884, "learning_rate": 0.001, "loss": 2.0174, "step": 193872 }, { "epoch": 16.729468599033815, "grad_norm": 0.23798410594463348, "learning_rate": 0.001, "loss": 2.0387, "step": 193928 }, { "epoch": 16.734299516908212, "grad_norm": 0.3881063759326935, "learning_rate": 0.001, "loss": 2.0401, "step": 193984 }, { "epoch": 16.73913043478261, "grad_norm": 0.5284119248390198, "learning_rate": 0.001, "loss": 2.0368, "step": 194040 }, { "epoch": 16.743961352657006, "grad_norm": 11.572823524475098, "learning_rate": 0.001, "loss": 2.0388, "step": 194096 }, { "epoch": 16.7487922705314, "grad_norm": 0.9035015106201172, "learning_rate": 0.001, "loss": 2.0318, "step": 194152 }, { "epoch": 16.753623188405797, "grad_norm": 0.5194437503814697, "learning_rate": 0.001, "loss": 2.0321, "step": 194208 }, { "epoch": 16.758454106280194, "grad_norm": 0.4773789346218109, "learning_rate": 0.001, "loss": 2.0342, "step": 194264 }, { "epoch": 16.76328502415459, "grad_norm": 0.4673251211643219, "learning_rate": 0.001, "loss": 2.0408, "step": 194320 }, { "epoch": 16.768115942028984, "grad_norm": 0.35106217861175537, "learning_rate": 0.001, "loss": 2.0449, "step": 194376 }, { "epoch": 16.77294685990338, "grad_norm": 0.42789706587791443, "learning_rate": 0.001, "loss": 2.0412, "step": 194432 }, { "epoch": 16.77777777777778, "grad_norm": 0.32567098736763, "learning_rate": 0.001, "loss": 2.0315, "step": 194488 }, { "epoch": 16.782608695652176, "grad_norm": 0.5352192521095276, "learning_rate": 0.001, "loss": 2.0253, "step": 194544 }, { "epoch": 16.78743961352657, "grad_norm": 0.6705586910247803, "learning_rate": 0.001, "loss": 2.0249, "step": 194600 }, { "epoch": 16.792270531400966, "grad_norm": 0.5575780272483826, "learning_rate": 0.001, "loss": 2.0267, "step": 194656 }, { "epoch": 16.797101449275363, "grad_norm": 0.634821355342865, "learning_rate": 0.001, "loss": 2.0466, "step": 194712 }, { "epoch": 16.80193236714976, "grad_norm": 0.3211246728897095, "learning_rate": 0.001, "loss": 2.0486, "step": 194768 }, { "epoch": 16.806763285024154, "grad_norm": 0.9176349639892578, "learning_rate": 0.001, "loss": 2.0421, "step": 194824 }, { "epoch": 16.81159420289855, "grad_norm": 1.6583536863327026, "learning_rate": 0.001, "loss": 2.0562, "step": 194880 }, { "epoch": 16.816425120772948, "grad_norm": 0.6718212366104126, "learning_rate": 0.001, "loss": 2.0531, "step": 194936 }, { "epoch": 16.82125603864734, "grad_norm": 0.30208057165145874, "learning_rate": 0.001, "loss": 2.0354, "step": 194992 }, { "epoch": 16.82608695652174, "grad_norm": 1.1301578283309937, "learning_rate": 0.001, "loss": 2.0383, "step": 195048 }, { "epoch": 16.830917874396135, "grad_norm": 0.3630686402320862, "learning_rate": 0.001, "loss": 2.0264, "step": 195104 }, { "epoch": 16.835748792270532, "grad_norm": 0.5475546717643738, "learning_rate": 0.001, "loss": 2.0261, "step": 195160 }, { "epoch": 16.840579710144926, "grad_norm": 0.38853809237480164, "learning_rate": 0.001, "loss": 2.0378, "step": 195216 }, { "epoch": 16.845410628019323, "grad_norm": 0.43732473254203796, "learning_rate": 0.001, "loss": 2.0318, "step": 195272 }, { "epoch": 16.85024154589372, "grad_norm": 0.7336903810501099, "learning_rate": 0.001, "loss": 2.0355, "step": 195328 }, { "epoch": 16.855072463768117, "grad_norm": 0.3635547459125519, "learning_rate": 0.001, "loss": 2.0258, "step": 195384 }, { "epoch": 16.85990338164251, "grad_norm": 0.8626202940940857, "learning_rate": 0.001, "loss": 2.0259, "step": 195440 }, { "epoch": 16.864734299516908, "grad_norm": 1.1758482456207275, "learning_rate": 0.001, "loss": 2.0291, "step": 195496 }, { "epoch": 16.869565217391305, "grad_norm": 0.4273427128791809, "learning_rate": 0.001, "loss": 2.0139, "step": 195552 }, { "epoch": 16.8743961352657, "grad_norm": 0.9766685962677002, "learning_rate": 0.001, "loss": 2.033, "step": 195608 }, { "epoch": 16.879227053140095, "grad_norm": 1.9136470556259155, "learning_rate": 0.001, "loss": 2.0626, "step": 195664 }, { "epoch": 16.884057971014492, "grad_norm": 1.6740261316299438, "learning_rate": 0.001, "loss": 2.0535, "step": 195720 }, { "epoch": 16.88888888888889, "grad_norm": 1.4089282751083374, "learning_rate": 0.001, "loss": 2.039, "step": 195776 }, { "epoch": 16.893719806763286, "grad_norm": 2.856243848800659, "learning_rate": 0.001, "loss": 2.0532, "step": 195832 }, { "epoch": 16.89855072463768, "grad_norm": 0.27055734395980835, "learning_rate": 0.001, "loss": 2.0414, "step": 195888 }, { "epoch": 16.903381642512077, "grad_norm": 1.418712854385376, "learning_rate": 0.001, "loss": 2.0551, "step": 195944 }, { "epoch": 16.908212560386474, "grad_norm": 0.41182950139045715, "learning_rate": 0.001, "loss": 2.0431, "step": 196000 }, { "epoch": 16.91304347826087, "grad_norm": 0.49568501114845276, "learning_rate": 0.001, "loss": 2.0396, "step": 196056 }, { "epoch": 16.917874396135264, "grad_norm": 0.39828601479530334, "learning_rate": 0.001, "loss": 2.03, "step": 196112 }, { "epoch": 16.92270531400966, "grad_norm": 0.4218588173389435, "learning_rate": 0.001, "loss": 2.0322, "step": 196168 }, { "epoch": 16.92753623188406, "grad_norm": 0.33129236102104187, "learning_rate": 0.001, "loss": 2.0301, "step": 196224 }, { "epoch": 16.932367149758456, "grad_norm": 0.2899501919746399, "learning_rate": 0.001, "loss": 2.0358, "step": 196280 }, { "epoch": 16.93719806763285, "grad_norm": 0.33048245310783386, "learning_rate": 0.001, "loss": 2.0275, "step": 196336 }, { "epoch": 16.942028985507246, "grad_norm": 0.7601819038391113, "learning_rate": 0.001, "loss": 2.0264, "step": 196392 }, { "epoch": 16.946859903381643, "grad_norm": 1.2421513795852661, "learning_rate": 0.001, "loss": 2.0294, "step": 196448 }, { "epoch": 16.95169082125604, "grad_norm": 2.4832963943481445, "learning_rate": 0.001, "loss": 2.0176, "step": 196504 }, { "epoch": 16.956521739130434, "grad_norm": 3.9885926246643066, "learning_rate": 0.001, "loss": 2.0262, "step": 196560 }, { "epoch": 16.96135265700483, "grad_norm": 3.799417495727539, "learning_rate": 0.001, "loss": 2.0483, "step": 196616 }, { "epoch": 16.966183574879228, "grad_norm": 1.228283405303955, "learning_rate": 0.001, "loss": 2.046, "step": 196672 }, { "epoch": 16.971014492753625, "grad_norm": 4.565097808837891, "learning_rate": 0.001, "loss": 2.0446, "step": 196728 }, { "epoch": 16.97584541062802, "grad_norm": 1.1718043088912964, "learning_rate": 0.001, "loss": 2.0566, "step": 196784 }, { "epoch": 16.980676328502415, "grad_norm": 0.9650245308876038, "learning_rate": 0.001, "loss": 2.0506, "step": 196840 }, { "epoch": 16.985507246376812, "grad_norm": 22.02593231201172, "learning_rate": 0.001, "loss": 2.0424, "step": 196896 }, { "epoch": 16.990338164251206, "grad_norm": 0.6295512318611145, "learning_rate": 0.001, "loss": 2.0344, "step": 196952 }, { "epoch": 16.995169082125603, "grad_norm": 0.7233116626739502, "learning_rate": 0.001, "loss": 2.0298, "step": 197008 }, { "epoch": 17.0, "grad_norm": 0.9202417135238647, "learning_rate": 0.001, "loss": 2.0264, "step": 197064 }, { "epoch": 17.004830917874397, "grad_norm": 0.7357917428016663, "learning_rate": 0.001, "loss": 1.9913, "step": 197120 }, { "epoch": 17.00966183574879, "grad_norm": 1.363856315612793, "learning_rate": 0.001, "loss": 1.9916, "step": 197176 }, { "epoch": 17.014492753623188, "grad_norm": 0.7536722421646118, "learning_rate": 0.001, "loss": 1.9872, "step": 197232 }, { "epoch": 17.019323671497585, "grad_norm": 0.4628455638885498, "learning_rate": 0.001, "loss": 1.9983, "step": 197288 }, { "epoch": 17.02415458937198, "grad_norm": 0.43860211968421936, "learning_rate": 0.001, "loss": 1.9898, "step": 197344 }, { "epoch": 17.028985507246375, "grad_norm": 1.0862034559249878, "learning_rate": 0.001, "loss": 2.01, "step": 197400 }, { "epoch": 17.033816425120772, "grad_norm": 0.41091638803482056, "learning_rate": 0.001, "loss": 2.0078, "step": 197456 }, { "epoch": 17.03864734299517, "grad_norm": 0.8949408531188965, "learning_rate": 0.001, "loss": 1.9963, "step": 197512 }, { "epoch": 17.043478260869566, "grad_norm": 0.4607747793197632, "learning_rate": 0.001, "loss": 1.9903, "step": 197568 }, { "epoch": 17.04830917874396, "grad_norm": 0.4450814723968506, "learning_rate": 0.001, "loss": 1.9989, "step": 197624 }, { "epoch": 17.053140096618357, "grad_norm": 0.6968145966529846, "learning_rate": 0.001, "loss": 2.0258, "step": 197680 }, { "epoch": 17.057971014492754, "grad_norm": 0.3505527079105377, "learning_rate": 0.001, "loss": 2.0374, "step": 197736 }, { "epoch": 17.06280193236715, "grad_norm": 0.7007247805595398, "learning_rate": 0.001, "loss": 2.0241, "step": 197792 }, { "epoch": 17.067632850241544, "grad_norm": 0.42565181851387024, "learning_rate": 0.001, "loss": 2.0158, "step": 197848 }, { "epoch": 17.07246376811594, "grad_norm": 0.6678960919380188, "learning_rate": 0.001, "loss": 2.0191, "step": 197904 }, { "epoch": 17.07729468599034, "grad_norm": 4.585575103759766, "learning_rate": 0.001, "loss": 2.0139, "step": 197960 }, { "epoch": 17.082125603864736, "grad_norm": 7.68037748336792, "learning_rate": 0.001, "loss": 2.0153, "step": 198016 }, { "epoch": 17.08695652173913, "grad_norm": 2.2260894775390625, "learning_rate": 0.001, "loss": 2.0219, "step": 198072 }, { "epoch": 17.091787439613526, "grad_norm": 4.9096198081970215, "learning_rate": 0.001, "loss": 2.0295, "step": 198128 }, { "epoch": 17.096618357487923, "grad_norm": 1.7353177070617676, "learning_rate": 0.001, "loss": 2.0289, "step": 198184 }, { "epoch": 17.10144927536232, "grad_norm": 1.8456236124038696, "learning_rate": 0.001, "loss": 2.0475, "step": 198240 }, { "epoch": 17.106280193236714, "grad_norm": 3.275623321533203, "learning_rate": 0.001, "loss": 2.0504, "step": 198296 }, { "epoch": 17.11111111111111, "grad_norm": 2.0498526096343994, "learning_rate": 0.001, "loss": 2.0347, "step": 198352 }, { "epoch": 17.115942028985508, "grad_norm": 1.6902790069580078, "learning_rate": 0.001, "loss": 2.0453, "step": 198408 }, { "epoch": 17.120772946859905, "grad_norm": 14.961019515991211, "learning_rate": 0.001, "loss": 2.0749, "step": 198464 }, { "epoch": 17.1256038647343, "grad_norm": 9.504125595092773, "learning_rate": 0.001, "loss": 2.1208, "step": 198520 }, { "epoch": 17.130434782608695, "grad_norm": 3.910196542739868, "learning_rate": 0.001, "loss": 2.1016, "step": 198576 }, { "epoch": 17.135265700483092, "grad_norm": 89.04476165771484, "learning_rate": 0.001, "loss": 2.0685, "step": 198632 }, { "epoch": 17.14009661835749, "grad_norm": 2.2351114749908447, "learning_rate": 0.001, "loss": 2.0557, "step": 198688 }, { "epoch": 17.144927536231883, "grad_norm": 0.8914165496826172, "learning_rate": 0.001, "loss": 2.0747, "step": 198744 }, { "epoch": 17.14975845410628, "grad_norm": 0.8623120188713074, "learning_rate": 0.001, "loss": 2.0776, "step": 198800 }, { "epoch": 17.154589371980677, "grad_norm": 2.2080469131469727, "learning_rate": 0.001, "loss": 2.062, "step": 198856 }, { "epoch": 17.159420289855074, "grad_norm": 2.3244779109954834, "learning_rate": 0.001, "loss": 2.0849, "step": 198912 }, { "epoch": 17.164251207729468, "grad_norm": 3.2196238040924072, "learning_rate": 0.001, "loss": 2.0722, "step": 198968 }, { "epoch": 17.169082125603865, "grad_norm": 7.231638431549072, "learning_rate": 0.001, "loss": 2.0453, "step": 199024 }, { "epoch": 17.17391304347826, "grad_norm": 0.8193380832672119, "learning_rate": 0.001, "loss": 2.0565, "step": 199080 }, { "epoch": 17.17874396135266, "grad_norm": 1.0477856397628784, "learning_rate": 0.001, "loss": 2.057, "step": 199136 }, { "epoch": 17.183574879227052, "grad_norm": 2.18863582611084, "learning_rate": 0.001, "loss": 2.044, "step": 199192 }, { "epoch": 17.18840579710145, "grad_norm": 6.6848249435424805, "learning_rate": 0.001, "loss": 2.0369, "step": 199248 }, { "epoch": 17.193236714975846, "grad_norm": 2.0866594314575195, "learning_rate": 0.001, "loss": 2.0491, "step": 199304 }, { "epoch": 17.19806763285024, "grad_norm": 0.6480128765106201, "learning_rate": 0.001, "loss": 2.0412, "step": 199360 }, { "epoch": 17.202898550724637, "grad_norm": 2.501214027404785, "learning_rate": 0.001, "loss": 2.0439, "step": 199416 }, { "epoch": 17.207729468599034, "grad_norm": 0.3921353816986084, "learning_rate": 0.001, "loss": 2.045, "step": 199472 }, { "epoch": 17.21256038647343, "grad_norm": 16.650386810302734, "learning_rate": 0.001, "loss": 2.0279, "step": 199528 }, { "epoch": 17.217391304347824, "grad_norm": 4.484215259552002, "learning_rate": 0.001, "loss": 2.0351, "step": 199584 }, { "epoch": 17.22222222222222, "grad_norm": 11.456953048706055, "learning_rate": 0.001, "loss": 2.0329, "step": 199640 }, { "epoch": 17.22705314009662, "grad_norm": 8.215957641601562, "learning_rate": 0.001, "loss": 2.0216, "step": 199696 }, { "epoch": 17.231884057971016, "grad_norm": 0.8254291415214539, "learning_rate": 0.001, "loss": 2.0257, "step": 199752 }, { "epoch": 17.23671497584541, "grad_norm": 0.8101583123207092, "learning_rate": 0.001, "loss": 2.0326, "step": 199808 }, { "epoch": 17.241545893719806, "grad_norm": 0.9279499650001526, "learning_rate": 0.001, "loss": 2.0257, "step": 199864 }, { "epoch": 17.246376811594203, "grad_norm": 0.5354316830635071, "learning_rate": 0.001, "loss": 2.0268, "step": 199920 }, { "epoch": 17.2512077294686, "grad_norm": 0.5018588900566101, "learning_rate": 0.001, "loss": 2.03, "step": 199976 }, { "epoch": 17.256038647342994, "grad_norm": 0.687272846698761, "learning_rate": 0.001, "loss": 2.0225, "step": 200032 }, { "epoch": 17.26086956521739, "grad_norm": 1.1128239631652832, "learning_rate": 0.001, "loss": 2.0178, "step": 200088 }, { "epoch": 17.265700483091788, "grad_norm": 0.3646056354045868, "learning_rate": 0.001, "loss": 2.0201, "step": 200144 }, { "epoch": 17.270531400966185, "grad_norm": 3.7217750549316406, "learning_rate": 0.001, "loss": 2.0146, "step": 200200 }, { "epoch": 17.27536231884058, "grad_norm": 1.0923576354980469, "learning_rate": 0.001, "loss": 2.0199, "step": 200256 }, { "epoch": 17.280193236714975, "grad_norm": 0.4362781047821045, "learning_rate": 0.001, "loss": 2.0248, "step": 200312 }, { "epoch": 17.285024154589372, "grad_norm": 2.7038536071777344, "learning_rate": 0.001, "loss": 2.0455, "step": 200368 }, { "epoch": 17.28985507246377, "grad_norm": 0.8134745359420776, "learning_rate": 0.001, "loss": 2.031, "step": 200424 }, { "epoch": 17.294685990338163, "grad_norm": 1.8891823291778564, "learning_rate": 0.001, "loss": 2.0245, "step": 200480 }, { "epoch": 17.29951690821256, "grad_norm": 0.6027181148529053, "learning_rate": 0.001, "loss": 2.0163, "step": 200536 }, { "epoch": 17.304347826086957, "grad_norm": 0.7690904140472412, "learning_rate": 0.001, "loss": 2.0142, "step": 200592 }, { "epoch": 17.309178743961354, "grad_norm": 1.082108736038208, "learning_rate": 0.001, "loss": 2.0055, "step": 200648 }, { "epoch": 17.314009661835748, "grad_norm": 0.49429255723953247, "learning_rate": 0.001, "loss": 2.0199, "step": 200704 }, { "epoch": 17.318840579710145, "grad_norm": 8.144294738769531, "learning_rate": 0.001, "loss": 2.0111, "step": 200760 }, { "epoch": 17.32367149758454, "grad_norm": 1.2012593746185303, "learning_rate": 0.001, "loss": 1.9996, "step": 200816 }, { "epoch": 17.32850241545894, "grad_norm": 0.45788559317588806, "learning_rate": 0.001, "loss": 2.0101, "step": 200872 }, { "epoch": 17.333333333333332, "grad_norm": 1.644631028175354, "learning_rate": 0.001, "loss": 2.0104, "step": 200928 }, { "epoch": 17.33816425120773, "grad_norm": 4.120462894439697, "learning_rate": 0.001, "loss": 2.0094, "step": 200984 }, { "epoch": 17.342995169082126, "grad_norm": 0.2877126634120941, "learning_rate": 0.001, "loss": 2.0078, "step": 201040 }, { "epoch": 17.347826086956523, "grad_norm": 0.8104161620140076, "learning_rate": 0.001, "loss": 2.01, "step": 201096 }, { "epoch": 17.352657004830917, "grad_norm": 0.7426979541778564, "learning_rate": 0.001, "loss": 2.0098, "step": 201152 }, { "epoch": 17.357487922705314, "grad_norm": 1.6111778020858765, "learning_rate": 0.001, "loss": 2.0168, "step": 201208 }, { "epoch": 17.36231884057971, "grad_norm": 0.7598965764045715, "learning_rate": 0.001, "loss": 2.0034, "step": 201264 }, { "epoch": 17.367149758454108, "grad_norm": 0.5758513808250427, "learning_rate": 0.001, "loss": 2.0228, "step": 201320 }, { "epoch": 17.3719806763285, "grad_norm": 1.135926365852356, "learning_rate": 0.001, "loss": 2.0055, "step": 201376 }, { "epoch": 17.3768115942029, "grad_norm": 3.106802225112915, "learning_rate": 0.001, "loss": 2.0016, "step": 201432 }, { "epoch": 17.381642512077295, "grad_norm": 2.6268579959869385, "learning_rate": 0.001, "loss": 2.0048, "step": 201488 }, { "epoch": 17.386473429951693, "grad_norm": 0.4911486506462097, "learning_rate": 0.001, "loss": 2.0022, "step": 201544 }, { "epoch": 17.391304347826086, "grad_norm": 0.5255958437919617, "learning_rate": 0.001, "loss": 2.0047, "step": 201600 }, { "epoch": 17.396135265700483, "grad_norm": 0.46415650844573975, "learning_rate": 0.001, "loss": 2.0073, "step": 201656 }, { "epoch": 17.40096618357488, "grad_norm": 0.4485037326812744, "learning_rate": 0.001, "loss": 1.9943, "step": 201712 }, { "epoch": 17.405797101449274, "grad_norm": 0.7632162570953369, "learning_rate": 0.001, "loss": 1.9933, "step": 201768 }, { "epoch": 17.41062801932367, "grad_norm": 3.6975579261779785, "learning_rate": 0.001, "loss": 1.983, "step": 201824 }, { "epoch": 17.415458937198068, "grad_norm": 1.308274745941162, "learning_rate": 0.001, "loss": 1.982, "step": 201880 }, { "epoch": 17.420289855072465, "grad_norm": 0.34257379174232483, "learning_rate": 0.001, "loss": 1.9997, "step": 201936 }, { "epoch": 17.42512077294686, "grad_norm": 0.5163534283638, "learning_rate": 0.001, "loss": 1.9982, "step": 201992 }, { "epoch": 17.429951690821255, "grad_norm": 0.9127593040466309, "learning_rate": 0.001, "loss": 1.9969, "step": 202048 }, { "epoch": 17.434782608695652, "grad_norm": 0.341212660074234, "learning_rate": 0.001, "loss": 2.0065, "step": 202104 }, { "epoch": 17.43961352657005, "grad_norm": 1.1749262809753418, "learning_rate": 0.001, "loss": 1.9972, "step": 202160 }, { "epoch": 17.444444444444443, "grad_norm": 0.6761568188667297, "learning_rate": 0.001, "loss": 1.9978, "step": 202216 }, { "epoch": 17.44927536231884, "grad_norm": 2.245246410369873, "learning_rate": 0.001, "loss": 1.9848, "step": 202272 }, { "epoch": 17.454106280193237, "grad_norm": 0.4219343662261963, "learning_rate": 0.001, "loss": 1.9917, "step": 202328 }, { "epoch": 17.458937198067634, "grad_norm": 0.6091845631599426, "learning_rate": 0.001, "loss": 1.9879, "step": 202384 }, { "epoch": 17.463768115942027, "grad_norm": 0.6387943625450134, "learning_rate": 0.001, "loss": 1.989, "step": 202440 }, { "epoch": 17.468599033816425, "grad_norm": 0.3861483931541443, "learning_rate": 0.001, "loss": 1.9963, "step": 202496 }, { "epoch": 17.47342995169082, "grad_norm": 0.4580955505371094, "learning_rate": 0.001, "loss": 2.0025, "step": 202552 }, { "epoch": 17.47826086956522, "grad_norm": 0.9553045034408569, "learning_rate": 0.001, "loss": 1.9998, "step": 202608 }, { "epoch": 17.483091787439612, "grad_norm": 0.41008260846138, "learning_rate": 0.001, "loss": 1.9996, "step": 202664 }, { "epoch": 17.48792270531401, "grad_norm": 0.3904622793197632, "learning_rate": 0.001, "loss": 2.012, "step": 202720 }, { "epoch": 17.492753623188406, "grad_norm": 0.43343502283096313, "learning_rate": 0.001, "loss": 2.0329, "step": 202776 }, { "epoch": 17.497584541062803, "grad_norm": 0.30230212211608887, "learning_rate": 0.001, "loss": 2.0326, "step": 202832 }, { "epoch": 17.502415458937197, "grad_norm": 0.35011816024780273, "learning_rate": 0.001, "loss": 2.0207, "step": 202888 }, { "epoch": 17.507246376811594, "grad_norm": 0.8570321202278137, "learning_rate": 0.001, "loss": 2.0277, "step": 202944 }, { "epoch": 17.51207729468599, "grad_norm": 1.2873200178146362, "learning_rate": 0.001, "loss": 2.0365, "step": 203000 }, { "epoch": 17.516908212560388, "grad_norm": 1.4232642650604248, "learning_rate": 0.001, "loss": 2.0346, "step": 203056 }, { "epoch": 17.52173913043478, "grad_norm": 0.8065872192382812, "learning_rate": 0.001, "loss": 2.0325, "step": 203112 }, { "epoch": 17.52657004830918, "grad_norm": 0.36926472187042236, "learning_rate": 0.001, "loss": 2.033, "step": 203168 }, { "epoch": 17.531400966183575, "grad_norm": 0.5115698575973511, "learning_rate": 0.001, "loss": 2.0324, "step": 203224 }, { "epoch": 17.536231884057973, "grad_norm": 1.2330418825149536, "learning_rate": 0.001, "loss": 2.0171, "step": 203280 }, { "epoch": 17.541062801932366, "grad_norm": 1.1197870969772339, "learning_rate": 0.001, "loss": 2.0157, "step": 203336 }, { "epoch": 17.545893719806763, "grad_norm": 0.703370988368988, "learning_rate": 0.001, "loss": 2.0033, "step": 203392 }, { "epoch": 17.55072463768116, "grad_norm": 4.859795093536377, "learning_rate": 0.001, "loss": 2.008, "step": 203448 }, { "epoch": 17.555555555555557, "grad_norm": 3.150716543197632, "learning_rate": 0.001, "loss": 2.0202, "step": 203504 }, { "epoch": 17.56038647342995, "grad_norm": 0.6640255451202393, "learning_rate": 0.001, "loss": 2.0098, "step": 203560 }, { "epoch": 17.565217391304348, "grad_norm": 1.415740966796875, "learning_rate": 0.001, "loss": 2.0059, "step": 203616 }, { "epoch": 17.570048309178745, "grad_norm": 0.8966888785362244, "learning_rate": 0.001, "loss": 2.028, "step": 203672 }, { "epoch": 17.57487922705314, "grad_norm": 0.38754573464393616, "learning_rate": 0.001, "loss": 2.016, "step": 203728 }, { "epoch": 17.579710144927535, "grad_norm": 0.35220766067504883, "learning_rate": 0.001, "loss": 2.0191, "step": 203784 }, { "epoch": 17.584541062801932, "grad_norm": 0.3672716021537781, "learning_rate": 0.001, "loss": 2.0229, "step": 203840 }, { "epoch": 17.58937198067633, "grad_norm": 0.3008093237876892, "learning_rate": 0.001, "loss": 2.0234, "step": 203896 }, { "epoch": 17.594202898550726, "grad_norm": 0.7144272923469543, "learning_rate": 0.001, "loss": 2.0053, "step": 203952 }, { "epoch": 17.59903381642512, "grad_norm": 0.3190706670284271, "learning_rate": 0.001, "loss": 2.0101, "step": 204008 }, { "epoch": 17.603864734299517, "grad_norm": 0.29227423667907715, "learning_rate": 0.001, "loss": 2.0013, "step": 204064 }, { "epoch": 17.608695652173914, "grad_norm": 0.3204523026943207, "learning_rate": 0.001, "loss": 2.0057, "step": 204120 }, { "epoch": 17.613526570048307, "grad_norm": 0.3938024342060089, "learning_rate": 0.001, "loss": 2.0122, "step": 204176 }, { "epoch": 17.618357487922705, "grad_norm": 4.193080902099609, "learning_rate": 0.001, "loss": 2.018, "step": 204232 }, { "epoch": 17.6231884057971, "grad_norm": 0.3865472376346588, "learning_rate": 0.001, "loss": 2.0056, "step": 204288 }, { "epoch": 17.6280193236715, "grad_norm": 0.3245698809623718, "learning_rate": 0.001, "loss": 1.9988, "step": 204344 }, { "epoch": 17.632850241545892, "grad_norm": 2.2892158031463623, "learning_rate": 0.001, "loss": 2.0025, "step": 204400 }, { "epoch": 17.63768115942029, "grad_norm": 3.89587664604187, "learning_rate": 0.001, "loss": 2.0011, "step": 204456 }, { "epoch": 17.642512077294686, "grad_norm": 0.3852275013923645, "learning_rate": 0.001, "loss": 2.0043, "step": 204512 }, { "epoch": 17.647342995169083, "grad_norm": 0.8561948537826538, "learning_rate": 0.001, "loss": 2.0132, "step": 204568 }, { "epoch": 17.652173913043477, "grad_norm": 0.6353474259376526, "learning_rate": 0.001, "loss": 2.0342, "step": 204624 }, { "epoch": 17.657004830917874, "grad_norm": 0.5256075263023376, "learning_rate": 0.001, "loss": 2.0279, "step": 204680 }, { "epoch": 17.66183574879227, "grad_norm": 0.7293254137039185, "learning_rate": 0.001, "loss": 2.0162, "step": 204736 }, { "epoch": 17.666666666666668, "grad_norm": 0.47616949677467346, "learning_rate": 0.001, "loss": 2.0065, "step": 204792 }, { "epoch": 17.67149758454106, "grad_norm": 1.3632248640060425, "learning_rate": 0.001, "loss": 2.013, "step": 204848 }, { "epoch": 17.67632850241546, "grad_norm": 0.8381468653678894, "learning_rate": 0.001, "loss": 2.0098, "step": 204904 }, { "epoch": 17.681159420289855, "grad_norm": 0.42445775866508484, "learning_rate": 0.001, "loss": 2.008, "step": 204960 }, { "epoch": 17.685990338164252, "grad_norm": 0.3829394578933716, "learning_rate": 0.001, "loss": 2.0076, "step": 205016 }, { "epoch": 17.690821256038646, "grad_norm": 2.4244043827056885, "learning_rate": 0.001, "loss": 2.0177, "step": 205072 }, { "epoch": 17.695652173913043, "grad_norm": 0.943936824798584, "learning_rate": 0.001, "loss": 2.0208, "step": 205128 }, { "epoch": 17.70048309178744, "grad_norm": 2.515855312347412, "learning_rate": 0.001, "loss": 2.0263, "step": 205184 }, { "epoch": 17.705314009661837, "grad_norm": 1.1545761823654175, "learning_rate": 0.001, "loss": 2.0157, "step": 205240 }, { "epoch": 17.71014492753623, "grad_norm": 1.326422929763794, "learning_rate": 0.001, "loss": 2.0279, "step": 205296 }, { "epoch": 17.714975845410628, "grad_norm": 1.9896457195281982, "learning_rate": 0.001, "loss": 2.0353, "step": 205352 }, { "epoch": 17.719806763285025, "grad_norm": 1.214542269706726, "learning_rate": 0.001, "loss": 2.0338, "step": 205408 }, { "epoch": 17.72463768115942, "grad_norm": 1.8464019298553467, "learning_rate": 0.001, "loss": 2.0492, "step": 205464 }, { "epoch": 17.729468599033815, "grad_norm": 1.2762267589569092, "learning_rate": 0.001, "loss": 2.0539, "step": 205520 }, { "epoch": 17.734299516908212, "grad_norm": 0.6249876022338867, "learning_rate": 0.001, "loss": 2.0498, "step": 205576 }, { "epoch": 17.73913043478261, "grad_norm": 0.34604132175445557, "learning_rate": 0.001, "loss": 2.0406, "step": 205632 }, { "epoch": 17.743961352657006, "grad_norm": 1.9437006711959839, "learning_rate": 0.001, "loss": 2.0226, "step": 205688 }, { "epoch": 17.7487922705314, "grad_norm": 0.4825698733329773, "learning_rate": 0.001, "loss": 2.0242, "step": 205744 }, { "epoch": 17.753623188405797, "grad_norm": 0.6121832132339478, "learning_rate": 0.001, "loss": 2.0159, "step": 205800 }, { "epoch": 17.758454106280194, "grad_norm": 0.47594112157821655, "learning_rate": 0.001, "loss": 2.0203, "step": 205856 }, { "epoch": 17.76328502415459, "grad_norm": 1.5213972330093384, "learning_rate": 0.001, "loss": 2.0249, "step": 205912 }, { "epoch": 17.768115942028984, "grad_norm": 0.8917885422706604, "learning_rate": 0.001, "loss": 2.0404, "step": 205968 }, { "epoch": 17.77294685990338, "grad_norm": 0.6616286635398865, "learning_rate": 0.001, "loss": 2.0243, "step": 206024 }, { "epoch": 17.77777777777778, "grad_norm": 0.988978385925293, "learning_rate": 0.001, "loss": 2.0111, "step": 206080 }, { "epoch": 17.782608695652176, "grad_norm": 0.8258740901947021, "learning_rate": 0.001, "loss": 2.0228, "step": 206136 }, { "epoch": 17.78743961352657, "grad_norm": 0.9602023363113403, "learning_rate": 0.001, "loss": 2.017, "step": 206192 }, { "epoch": 17.792270531400966, "grad_norm": 1.483341097831726, "learning_rate": 0.001, "loss": 2.0096, "step": 206248 }, { "epoch": 17.797101449275363, "grad_norm": 0.8836691379547119, "learning_rate": 0.001, "loss": 2.0137, "step": 206304 }, { "epoch": 17.80193236714976, "grad_norm": 1.424958348274231, "learning_rate": 0.001, "loss": 2.0146, "step": 206360 }, { "epoch": 17.806763285024154, "grad_norm": 0.7646310329437256, "learning_rate": 0.001, "loss": 2.011, "step": 206416 }, { "epoch": 17.81159420289855, "grad_norm": 1.6540539264678955, "learning_rate": 0.001, "loss": 1.9993, "step": 206472 }, { "epoch": 17.816425120772948, "grad_norm": 0.627555787563324, "learning_rate": 0.001, "loss": 2.0174, "step": 206528 }, { "epoch": 17.82125603864734, "grad_norm": 0.8958575129508972, "learning_rate": 0.001, "loss": 2.0174, "step": 206584 }, { "epoch": 17.82608695652174, "grad_norm": 1.3169989585876465, "learning_rate": 0.001, "loss": 2.0358, "step": 206640 }, { "epoch": 17.830917874396135, "grad_norm": 0.43286895751953125, "learning_rate": 0.001, "loss": 2.041, "step": 206696 }, { "epoch": 17.835748792270532, "grad_norm": 0.7443773150444031, "learning_rate": 0.001, "loss": 2.0299, "step": 206752 }, { "epoch": 17.840579710144926, "grad_norm": 1.0382713079452515, "learning_rate": 0.001, "loss": 2.0287, "step": 206808 }, { "epoch": 17.845410628019323, "grad_norm": 1.7661292552947998, "learning_rate": 0.001, "loss": 2.0273, "step": 206864 }, { "epoch": 17.85024154589372, "grad_norm": 1.2433719635009766, "learning_rate": 0.001, "loss": 2.0492, "step": 206920 }, { "epoch": 17.855072463768117, "grad_norm": 1.3783491849899292, "learning_rate": 0.001, "loss": 2.0384, "step": 206976 }, { "epoch": 17.85990338164251, "grad_norm": 0.6853235363960266, "learning_rate": 0.001, "loss": 2.0362, "step": 207032 }, { "epoch": 17.864734299516908, "grad_norm": 1.6107745170593262, "learning_rate": 0.001, "loss": 2.0256, "step": 207088 }, { "epoch": 17.869565217391305, "grad_norm": 1.0396264791488647, "learning_rate": 0.001, "loss": 2.0302, "step": 207144 }, { "epoch": 17.8743961352657, "grad_norm": 0.6649816036224365, "learning_rate": 0.001, "loss": 2.0309, "step": 207200 }, { "epoch": 17.879227053140095, "grad_norm": 1.2221636772155762, "learning_rate": 0.001, "loss": 2.0258, "step": 207256 }, { "epoch": 17.884057971014492, "grad_norm": 12.234846115112305, "learning_rate": 0.001, "loss": 2.0332, "step": 207312 }, { "epoch": 17.88888888888889, "grad_norm": 1.5809814929962158, "learning_rate": 0.001, "loss": 2.0522, "step": 207368 }, { "epoch": 17.893719806763286, "grad_norm": 0.5922200083732605, "learning_rate": 0.001, "loss": 2.0467, "step": 207424 }, { "epoch": 17.89855072463768, "grad_norm": 0.7181801199913025, "learning_rate": 0.001, "loss": 2.0437, "step": 207480 }, { "epoch": 17.903381642512077, "grad_norm": 1.1437773704528809, "learning_rate": 0.001, "loss": 2.0339, "step": 207536 }, { "epoch": 17.908212560386474, "grad_norm": 2.955794334411621, "learning_rate": 0.001, "loss": 2.0262, "step": 207592 }, { "epoch": 17.91304347826087, "grad_norm": 3.1599080562591553, "learning_rate": 0.001, "loss": 2.026, "step": 207648 }, { "epoch": 17.917874396135264, "grad_norm": 0.8696035742759705, "learning_rate": 0.001, "loss": 2.0407, "step": 207704 }, { "epoch": 17.92270531400966, "grad_norm": 1.1342369318008423, "learning_rate": 0.001, "loss": 2.0462, "step": 207760 }, { "epoch": 17.92753623188406, "grad_norm": 0.48327529430389404, "learning_rate": 0.001, "loss": 2.0398, "step": 207816 }, { "epoch": 17.932367149758456, "grad_norm": 1.3128464221954346, "learning_rate": 0.001, "loss": 2.0413, "step": 207872 }, { "epoch": 17.93719806763285, "grad_norm": 0.7495733499526978, "learning_rate": 0.001, "loss": 2.0379, "step": 207928 }, { "epoch": 17.942028985507246, "grad_norm": 1.0244556665420532, "learning_rate": 0.001, "loss": 2.0277, "step": 207984 }, { "epoch": 17.946859903381643, "grad_norm": 1.3469117879867554, "learning_rate": 0.001, "loss": 2.0323, "step": 208040 }, { "epoch": 17.95169082125604, "grad_norm": 0.9481945633888245, "learning_rate": 0.001, "loss": 2.0272, "step": 208096 }, { "epoch": 17.956521739130434, "grad_norm": 1.437888503074646, "learning_rate": 0.001, "loss": 2.0422, "step": 208152 }, { "epoch": 17.96135265700483, "grad_norm": 0.5852994322776794, "learning_rate": 0.001, "loss": 2.0411, "step": 208208 }, { "epoch": 17.966183574879228, "grad_norm": 0.6143671870231628, "learning_rate": 0.001, "loss": 2.0517, "step": 208264 }, { "epoch": 17.971014492753625, "grad_norm": 1.1441049575805664, "learning_rate": 0.001, "loss": 2.044, "step": 208320 }, { "epoch": 17.97584541062802, "grad_norm": 1.3598695993423462, "learning_rate": 0.001, "loss": 2.0292, "step": 208376 }, { "epoch": 17.980676328502415, "grad_norm": 0.8377267718315125, "learning_rate": 0.001, "loss": 2.0283, "step": 208432 }, { "epoch": 17.985507246376812, "grad_norm": 0.9452416896820068, "learning_rate": 0.001, "loss": 2.0371, "step": 208488 }, { "epoch": 17.990338164251206, "grad_norm": 1.0680909156799316, "learning_rate": 0.001, "loss": 2.0314, "step": 208544 }, { "epoch": 17.995169082125603, "grad_norm": 0.7076923251152039, "learning_rate": 0.001, "loss": 2.0214, "step": 208600 }, { "epoch": 18.0, "grad_norm": 2.3420917987823486, "learning_rate": 0.001, "loss": 2.0244, "step": 208656 }, { "epoch": 18.004830917874397, "grad_norm": 0.6423053741455078, "learning_rate": 0.001, "loss": 2.0009, "step": 208712 }, { "epoch": 18.00966183574879, "grad_norm": 0.608394980430603, "learning_rate": 0.001, "loss": 1.9884, "step": 208768 }, { "epoch": 18.014492753623188, "grad_norm": 0.6217007040977478, "learning_rate": 0.001, "loss": 1.9875, "step": 208824 }, { "epoch": 18.019323671497585, "grad_norm": 3.0535802841186523, "learning_rate": 0.001, "loss": 1.9889, "step": 208880 }, { "epoch": 18.02415458937198, "grad_norm": 9.73165512084961, "learning_rate": 0.001, "loss": 1.9903, "step": 208936 }, { "epoch": 18.028985507246375, "grad_norm": 0.5923244953155518, "learning_rate": 0.001, "loss": 1.9967, "step": 208992 }, { "epoch": 18.033816425120772, "grad_norm": 0.8756278157234192, "learning_rate": 0.001, "loss": 2.0016, "step": 209048 }, { "epoch": 18.03864734299517, "grad_norm": 0.6837737560272217, "learning_rate": 0.001, "loss": 1.9924, "step": 209104 }, { "epoch": 18.043478260869566, "grad_norm": 1.5591719150543213, "learning_rate": 0.001, "loss": 1.9897, "step": 209160 }, { "epoch": 18.04830917874396, "grad_norm": 1.744718313217163, "learning_rate": 0.001, "loss": 1.9936, "step": 209216 }, { "epoch": 18.053140096618357, "grad_norm": 0.7881166934967041, "learning_rate": 0.001, "loss": 2.0045, "step": 209272 }, { "epoch": 18.057971014492754, "grad_norm": 1.719242811203003, "learning_rate": 0.001, "loss": 2.0106, "step": 209328 }, { "epoch": 18.06280193236715, "grad_norm": 3.994037628173828, "learning_rate": 0.001, "loss": 2.02, "step": 209384 }, { "epoch": 18.067632850241544, "grad_norm": 4.949288845062256, "learning_rate": 0.001, "loss": 2.0138, "step": 209440 }, { "epoch": 18.07246376811594, "grad_norm": 0.6637479066848755, "learning_rate": 0.001, "loss": 2.0225, "step": 209496 }, { "epoch": 18.07729468599034, "grad_norm": 0.434272825717926, "learning_rate": 0.001, "loss": 2.0164, "step": 209552 }, { "epoch": 18.082125603864736, "grad_norm": 1.797939658164978, "learning_rate": 0.001, "loss": 2.0099, "step": 209608 }, { "epoch": 18.08695652173913, "grad_norm": 0.5884961485862732, "learning_rate": 0.001, "loss": 2.0063, "step": 209664 }, { "epoch": 18.091787439613526, "grad_norm": 7.537419319152832, "learning_rate": 0.001, "loss": 2.0075, "step": 209720 }, { "epoch": 18.096618357487923, "grad_norm": 6.30743408203125, "learning_rate": 0.001, "loss": 2.0013, "step": 209776 }, { "epoch": 18.10144927536232, "grad_norm": 0.9096853137016296, "learning_rate": 0.001, "loss": 2.0143, "step": 209832 }, { "epoch": 18.106280193236714, "grad_norm": 1.7086690664291382, "learning_rate": 0.001, "loss": 1.9956, "step": 209888 }, { "epoch": 18.11111111111111, "grad_norm": 1.3940095901489258, "learning_rate": 0.001, "loss": 1.9906, "step": 209944 }, { "epoch": 18.115942028985508, "grad_norm": 0.5439285039901733, "learning_rate": 0.001, "loss": 1.9706, "step": 210000 }, { "epoch": 18.120772946859905, "grad_norm": 0.6971027255058289, "learning_rate": 0.001, "loss": 1.9933, "step": 210056 }, { "epoch": 18.1256038647343, "grad_norm": 0.44979310035705566, "learning_rate": 0.001, "loss": 1.9914, "step": 210112 }, { "epoch": 18.130434782608695, "grad_norm": 4.765186309814453, "learning_rate": 0.001, "loss": 1.9958, "step": 210168 }, { "epoch": 18.135265700483092, "grad_norm": 0.5212612152099609, "learning_rate": 0.001, "loss": 1.9854, "step": 210224 }, { "epoch": 18.14009661835749, "grad_norm": 0.9277397990226746, "learning_rate": 0.001, "loss": 1.9877, "step": 210280 }, { "epoch": 18.144927536231883, "grad_norm": 4.402560710906982, "learning_rate": 0.001, "loss": 1.996, "step": 210336 }, { "epoch": 18.14975845410628, "grad_norm": 1.205804705619812, "learning_rate": 0.001, "loss": 1.9919, "step": 210392 }, { "epoch": 18.154589371980677, "grad_norm": 0.6661087274551392, "learning_rate": 0.001, "loss": 1.9859, "step": 210448 }, { "epoch": 18.159420289855074, "grad_norm": 1.2227925062179565, "learning_rate": 0.001, "loss": 1.9809, "step": 210504 }, { "epoch": 18.164251207729468, "grad_norm": 0.5242561101913452, "learning_rate": 0.001, "loss": 1.9968, "step": 210560 }, { "epoch": 18.169082125603865, "grad_norm": 2.146090030670166, "learning_rate": 0.001, "loss": 2.0018, "step": 210616 }, { "epoch": 18.17391304347826, "grad_norm": 11.038692474365234, "learning_rate": 0.001, "loss": 2.0037, "step": 210672 }, { "epoch": 18.17874396135266, "grad_norm": 0.8741036653518677, "learning_rate": 0.001, "loss": 1.9969, "step": 210728 }, { "epoch": 18.183574879227052, "grad_norm": 0.8639529347419739, "learning_rate": 0.001, "loss": 1.9918, "step": 210784 }, { "epoch": 18.18840579710145, "grad_norm": 1.4663217067718506, "learning_rate": 0.001, "loss": 1.9952, "step": 210840 }, { "epoch": 18.193236714975846, "grad_norm": 0.664835512638092, "learning_rate": 0.001, "loss": 1.9997, "step": 210896 }, { "epoch": 18.19806763285024, "grad_norm": 0.5348080396652222, "learning_rate": 0.001, "loss": 2.0063, "step": 210952 }, { "epoch": 18.202898550724637, "grad_norm": 1.5506716966629028, "learning_rate": 0.001, "loss": 2.0106, "step": 211008 }, { "epoch": 18.207729468599034, "grad_norm": 0.4524490237236023, "learning_rate": 0.001, "loss": 2.0049, "step": 211064 }, { "epoch": 18.21256038647343, "grad_norm": 3.699216365814209, "learning_rate": 0.001, "loss": 2.0165, "step": 211120 }, { "epoch": 18.217391304347824, "grad_norm": 2.090256452560425, "learning_rate": 0.001, "loss": 2.0055, "step": 211176 }, { "epoch": 18.22222222222222, "grad_norm": 0.47229790687561035, "learning_rate": 0.001, "loss": 2.0047, "step": 211232 }, { "epoch": 18.22705314009662, "grad_norm": 0.9265060424804688, "learning_rate": 0.001, "loss": 1.9924, "step": 211288 }, { "epoch": 18.231884057971016, "grad_norm": 1.0612058639526367, "learning_rate": 0.001, "loss": 1.9908, "step": 211344 }, { "epoch": 18.23671497584541, "grad_norm": 0.5027437806129456, "learning_rate": 0.001, "loss": 1.9897, "step": 211400 }, { "epoch": 18.241545893719806, "grad_norm": 1.002432942390442, "learning_rate": 0.001, "loss": 1.9947, "step": 211456 }, { "epoch": 18.246376811594203, "grad_norm": 0.40407782793045044, "learning_rate": 0.001, "loss": 1.9804, "step": 211512 }, { "epoch": 18.2512077294686, "grad_norm": 0.5669754147529602, "learning_rate": 0.001, "loss": 1.9788, "step": 211568 }, { "epoch": 18.256038647342994, "grad_norm": 0.5416087508201599, "learning_rate": 0.001, "loss": 1.9846, "step": 211624 }, { "epoch": 18.26086956521739, "grad_norm": 0.5927562117576599, "learning_rate": 0.001, "loss": 1.9898, "step": 211680 }, { "epoch": 18.265700483091788, "grad_norm": 1.023227334022522, "learning_rate": 0.001, "loss": 1.9987, "step": 211736 }, { "epoch": 18.270531400966185, "grad_norm": 0.4366677701473236, "learning_rate": 0.001, "loss": 2.0027, "step": 211792 }, { "epoch": 18.27536231884058, "grad_norm": 1.4531903266906738, "learning_rate": 0.001, "loss": 1.9961, "step": 211848 }, { "epoch": 18.280193236714975, "grad_norm": 1.572837233543396, "learning_rate": 0.001, "loss": 1.987, "step": 211904 }, { "epoch": 18.285024154589372, "grad_norm": 0.632854700088501, "learning_rate": 0.001, "loss": 1.9851, "step": 211960 }, { "epoch": 18.28985507246377, "grad_norm": 0.6068235635757446, "learning_rate": 0.001, "loss": 1.9804, "step": 212016 }, { "epoch": 18.294685990338163, "grad_norm": 0.43270981311798096, "learning_rate": 0.001, "loss": 1.99, "step": 212072 }, { "epoch": 18.29951690821256, "grad_norm": 0.9107401371002197, "learning_rate": 0.001, "loss": 1.9869, "step": 212128 }, { "epoch": 18.304347826086957, "grad_norm": 1.099626898765564, "learning_rate": 0.001, "loss": 1.9835, "step": 212184 }, { "epoch": 18.309178743961354, "grad_norm": 0.3801335394382477, "learning_rate": 0.001, "loss": 2.0022, "step": 212240 }, { "epoch": 18.314009661835748, "grad_norm": 0.41175681352615356, "learning_rate": 0.001, "loss": 1.9892, "step": 212296 }, { "epoch": 18.318840579710145, "grad_norm": 0.3538585305213928, "learning_rate": 0.001, "loss": 1.9837, "step": 212352 }, { "epoch": 18.32367149758454, "grad_norm": 0.800585150718689, "learning_rate": 0.001, "loss": 1.9781, "step": 212408 }, { "epoch": 18.32850241545894, "grad_norm": 0.6442814469337463, "learning_rate": 0.001, "loss": 1.9916, "step": 212464 }, { "epoch": 18.333333333333332, "grad_norm": 0.3548004925251007, "learning_rate": 0.001, "loss": 1.9816, "step": 212520 }, { "epoch": 18.33816425120773, "grad_norm": 0.3411487340927124, "learning_rate": 0.001, "loss": 1.9878, "step": 212576 }, { "epoch": 18.342995169082126, "grad_norm": 0.3141336441040039, "learning_rate": 0.001, "loss": 1.9908, "step": 212632 }, { "epoch": 18.347826086956523, "grad_norm": 0.3833332657814026, "learning_rate": 0.001, "loss": 1.9755, "step": 212688 }, { "epoch": 18.352657004830917, "grad_norm": 0.4970197081565857, "learning_rate": 0.001, "loss": 1.9793, "step": 212744 }, { "epoch": 18.357487922705314, "grad_norm": 0.47132453322410583, "learning_rate": 0.001, "loss": 1.9706, "step": 212800 }, { "epoch": 18.36231884057971, "grad_norm": 0.47788524627685547, "learning_rate": 0.001, "loss": 1.9742, "step": 212856 }, { "epoch": 18.367149758454108, "grad_norm": 0.5253958702087402, "learning_rate": 0.001, "loss": 1.9786, "step": 212912 }, { "epoch": 18.3719806763285, "grad_norm": 0.6821537613868713, "learning_rate": 0.001, "loss": 1.9747, "step": 212968 }, { "epoch": 18.3768115942029, "grad_norm": 0.5410891175270081, "learning_rate": 0.001, "loss": 1.9659, "step": 213024 }, { "epoch": 18.381642512077295, "grad_norm": 1.0737476348876953, "learning_rate": 0.001, "loss": 1.9721, "step": 213080 }, { "epoch": 18.386473429951693, "grad_norm": 5.27506160736084, "learning_rate": 0.001, "loss": 1.9784, "step": 213136 }, { "epoch": 18.391304347826086, "grad_norm": 1.2847074270248413, "learning_rate": 0.001, "loss": 1.975, "step": 213192 }, { "epoch": 18.396135265700483, "grad_norm": 1.0320801734924316, "learning_rate": 0.001, "loss": 1.9833, "step": 213248 }, { "epoch": 18.40096618357488, "grad_norm": 1.0841103792190552, "learning_rate": 0.001, "loss": 1.9793, "step": 213304 }, { "epoch": 18.405797101449274, "grad_norm": 0.36594316363334656, "learning_rate": 0.001, "loss": 1.976, "step": 213360 }, { "epoch": 18.41062801932367, "grad_norm": 0.32960349321365356, "learning_rate": 0.001, "loss": 1.9807, "step": 213416 }, { "epoch": 18.415458937198068, "grad_norm": 3.309676170349121, "learning_rate": 0.001, "loss": 1.98, "step": 213472 }, { "epoch": 18.420289855072465, "grad_norm": 0.3207552134990692, "learning_rate": 0.001, "loss": 1.9925, "step": 213528 }, { "epoch": 18.42512077294686, "grad_norm": 0.5544682741165161, "learning_rate": 0.001, "loss": 1.9801, "step": 213584 }, { "epoch": 18.429951690821255, "grad_norm": 0.3301583230495453, "learning_rate": 0.001, "loss": 1.9813, "step": 213640 }, { "epoch": 18.434782608695652, "grad_norm": 0.29986175894737244, "learning_rate": 0.001, "loss": 1.9918, "step": 213696 }, { "epoch": 18.43961352657005, "grad_norm": 1.6060774326324463, "learning_rate": 0.001, "loss": 1.9851, "step": 213752 }, { "epoch": 18.444444444444443, "grad_norm": 0.30713963508605957, "learning_rate": 0.001, "loss": 1.9937, "step": 213808 }, { "epoch": 18.44927536231884, "grad_norm": 0.3522479236125946, "learning_rate": 0.001, "loss": 1.9925, "step": 213864 }, { "epoch": 18.454106280193237, "grad_norm": 0.8559319376945496, "learning_rate": 0.001, "loss": 1.9847, "step": 213920 }, { "epoch": 18.458937198067634, "grad_norm": 1.3294062614440918, "learning_rate": 0.001, "loss": 1.9785, "step": 213976 }, { "epoch": 18.463768115942027, "grad_norm": 0.6562640070915222, "learning_rate": 0.001, "loss": 1.9873, "step": 214032 }, { "epoch": 18.468599033816425, "grad_norm": 0.3295597732067108, "learning_rate": 0.001, "loss": 1.9839, "step": 214088 }, { "epoch": 18.47342995169082, "grad_norm": 0.5714313983917236, "learning_rate": 0.001, "loss": 1.9792, "step": 214144 }, { "epoch": 18.47826086956522, "grad_norm": 0.48473870754241943, "learning_rate": 0.001, "loss": 1.9837, "step": 214200 }, { "epoch": 18.483091787439612, "grad_norm": 0.3419836163520813, "learning_rate": 0.001, "loss": 1.9836, "step": 214256 }, { "epoch": 18.48792270531401, "grad_norm": 0.3923450708389282, "learning_rate": 0.001, "loss": 1.9809, "step": 214312 }, { "epoch": 18.492753623188406, "grad_norm": 0.30418074131011963, "learning_rate": 0.001, "loss": 1.973, "step": 214368 }, { "epoch": 18.497584541062803, "grad_norm": 0.814293622970581, "learning_rate": 0.001, "loss": 1.9661, "step": 214424 }, { "epoch": 18.502415458937197, "grad_norm": 0.6015337109565735, "learning_rate": 0.001, "loss": 1.9937, "step": 214480 }, { "epoch": 18.507246376811594, "grad_norm": 2.4349822998046875, "learning_rate": 0.001, "loss": 2.0089, "step": 214536 }, { "epoch": 18.51207729468599, "grad_norm": 1.044708490371704, "learning_rate": 0.001, "loss": 2.0097, "step": 214592 }, { "epoch": 18.516908212560388, "grad_norm": 1.8226161003112793, "learning_rate": 0.001, "loss": 1.9983, "step": 214648 }, { "epoch": 18.52173913043478, "grad_norm": 0.6571477651596069, "learning_rate": 0.001, "loss": 1.991, "step": 214704 }, { "epoch": 18.52657004830918, "grad_norm": 1.1567578315734863, "learning_rate": 0.001, "loss": 2.011, "step": 214760 }, { "epoch": 18.531400966183575, "grad_norm": 2.9129598140716553, "learning_rate": 0.001, "loss": 2.0363, "step": 214816 }, { "epoch": 18.536231884057973, "grad_norm": 3.226778030395508, "learning_rate": 0.001, "loss": 2.0383, "step": 214872 }, { "epoch": 18.541062801932366, "grad_norm": 1.9838179349899292, "learning_rate": 0.001, "loss": 2.0341, "step": 214928 }, { "epoch": 18.545893719806763, "grad_norm": 2.919928550720215, "learning_rate": 0.001, "loss": 2.0382, "step": 214984 }, { "epoch": 18.55072463768116, "grad_norm": 1.4649657011032104, "learning_rate": 0.001, "loss": 2.0575, "step": 215040 }, { "epoch": 18.555555555555557, "grad_norm": 0.7409532070159912, "learning_rate": 0.001, "loss": 2.0387, "step": 215096 }, { "epoch": 18.56038647342995, "grad_norm": 0.48125436902046204, "learning_rate": 0.001, "loss": 2.0192, "step": 215152 }, { "epoch": 18.565217391304348, "grad_norm": 0.8807508945465088, "learning_rate": 0.001, "loss": 2.0104, "step": 215208 }, { "epoch": 18.570048309178745, "grad_norm": 0.9692062735557556, "learning_rate": 0.001, "loss": 2.0036, "step": 215264 }, { "epoch": 18.57487922705314, "grad_norm": 2.3533763885498047, "learning_rate": 0.001, "loss": 2.0039, "step": 215320 }, { "epoch": 18.579710144927535, "grad_norm": 0.8640643954277039, "learning_rate": 0.001, "loss": 1.9904, "step": 215376 }, { "epoch": 18.584541062801932, "grad_norm": 1.8725852966308594, "learning_rate": 0.001, "loss": 1.9856, "step": 215432 }, { "epoch": 18.58937198067633, "grad_norm": 1.1481963396072388, "learning_rate": 0.001, "loss": 1.9968, "step": 215488 }, { "epoch": 18.594202898550726, "grad_norm": 0.7930685877799988, "learning_rate": 0.001, "loss": 1.9909, "step": 215544 }, { "epoch": 18.59903381642512, "grad_norm": 1.2087026834487915, "learning_rate": 0.001, "loss": 1.9948, "step": 215600 }, { "epoch": 18.603864734299517, "grad_norm": 2.5625784397125244, "learning_rate": 0.001, "loss": 2.0023, "step": 215656 }, { "epoch": 18.608695652173914, "grad_norm": 1.762405514717102, "learning_rate": 0.001, "loss": 2.0036, "step": 215712 }, { "epoch": 18.613526570048307, "grad_norm": 1.0530073642730713, "learning_rate": 0.001, "loss": 2.0122, "step": 215768 }, { "epoch": 18.618357487922705, "grad_norm": 0.4137307107448578, "learning_rate": 0.001, "loss": 2.0064, "step": 215824 }, { "epoch": 18.6231884057971, "grad_norm": 0.5080470442771912, "learning_rate": 0.001, "loss": 2.0149, "step": 215880 }, { "epoch": 18.6280193236715, "grad_norm": 1.3162708282470703, "learning_rate": 0.001, "loss": 2.0165, "step": 215936 }, { "epoch": 18.632850241545892, "grad_norm": 1.289952278137207, "learning_rate": 0.001, "loss": 2.0119, "step": 215992 }, { "epoch": 18.63768115942029, "grad_norm": 0.5914528965950012, "learning_rate": 0.001, "loss": 2.0223, "step": 216048 }, { "epoch": 18.642512077294686, "grad_norm": 0.5511854887008667, "learning_rate": 0.001, "loss": 2.0098, "step": 216104 }, { "epoch": 18.647342995169083, "grad_norm": 0.6083223819732666, "learning_rate": 0.001, "loss": 2.0021, "step": 216160 }, { "epoch": 18.652173913043477, "grad_norm": 3.420048475265503, "learning_rate": 0.001, "loss": 2.0013, "step": 216216 }, { "epoch": 18.657004830917874, "grad_norm": 0.49880850315093994, "learning_rate": 0.001, "loss": 2.0075, "step": 216272 }, { "epoch": 18.66183574879227, "grad_norm": 0.8125979900360107, "learning_rate": 0.001, "loss": 1.9906, "step": 216328 }, { "epoch": 18.666666666666668, "grad_norm": 0.44447895884513855, "learning_rate": 0.001, "loss": 1.9937, "step": 216384 }, { "epoch": 18.67149758454106, "grad_norm": 0.40350720286369324, "learning_rate": 0.001, "loss": 1.9888, "step": 216440 }, { "epoch": 18.67632850241546, "grad_norm": 0.4602723717689514, "learning_rate": 0.001, "loss": 1.9904, "step": 216496 }, { "epoch": 18.681159420289855, "grad_norm": 2.628710985183716, "learning_rate": 0.001, "loss": 1.9891, "step": 216552 }, { "epoch": 18.685990338164252, "grad_norm": 0.44171974062919617, "learning_rate": 0.001, "loss": 1.9961, "step": 216608 }, { "epoch": 18.690821256038646, "grad_norm": 0.3909667432308197, "learning_rate": 0.001, "loss": 1.9891, "step": 216664 }, { "epoch": 18.695652173913043, "grad_norm": 0.6817483901977539, "learning_rate": 0.001, "loss": 1.9829, "step": 216720 }, { "epoch": 18.70048309178744, "grad_norm": 0.40724992752075195, "learning_rate": 0.001, "loss": 1.9892, "step": 216776 }, { "epoch": 18.705314009661837, "grad_norm": 0.5674595236778259, "learning_rate": 0.001, "loss": 1.985, "step": 216832 }, { "epoch": 18.71014492753623, "grad_norm": 0.7696330547332764, "learning_rate": 0.001, "loss": 1.994, "step": 216888 }, { "epoch": 18.714975845410628, "grad_norm": 0.3742004334926605, "learning_rate": 0.001, "loss": 1.9831, "step": 216944 }, { "epoch": 18.719806763285025, "grad_norm": 0.936351478099823, "learning_rate": 0.001, "loss": 1.9892, "step": 217000 }, { "epoch": 18.72463768115942, "grad_norm": 0.37267982959747314, "learning_rate": 0.001, "loss": 1.9833, "step": 217056 }, { "epoch": 18.729468599033815, "grad_norm": 2.975992441177368, "learning_rate": 0.001, "loss": 1.9766, "step": 217112 }, { "epoch": 18.734299516908212, "grad_norm": 1.151590347290039, "learning_rate": 0.001, "loss": 1.9813, "step": 217168 }, { "epoch": 18.73913043478261, "grad_norm": 1.8585960865020752, "learning_rate": 0.001, "loss": 1.9814, "step": 217224 }, { "epoch": 18.743961352657006, "grad_norm": 8.291177749633789, "learning_rate": 0.001, "loss": 1.9981, "step": 217280 }, { "epoch": 18.7487922705314, "grad_norm": 2.0655674934387207, "learning_rate": 0.001, "loss": 1.9948, "step": 217336 }, { "epoch": 18.753623188405797, "grad_norm": 1.350785493850708, "learning_rate": 0.001, "loss": 2.0186, "step": 217392 }, { "epoch": 18.758454106280194, "grad_norm": 1.0436420440673828, "learning_rate": 0.001, "loss": 2.0351, "step": 217448 }, { "epoch": 18.76328502415459, "grad_norm": 2.8785197734832764, "learning_rate": 0.001, "loss": 2.0694, "step": 217504 }, { "epoch": 18.768115942028984, "grad_norm": 1.2601258754730225, "learning_rate": 0.001, "loss": 2.0833, "step": 217560 }, { "epoch": 18.77294685990338, "grad_norm": 1.5567734241485596, "learning_rate": 0.001, "loss": 2.0833, "step": 217616 }, { "epoch": 18.77777777777778, "grad_norm": 0.930168628692627, "learning_rate": 0.001, "loss": 2.061, "step": 217672 }, { "epoch": 18.782608695652176, "grad_norm": 2.3632185459136963, "learning_rate": 0.001, "loss": 2.0468, "step": 217728 }, { "epoch": 18.78743961352657, "grad_norm": 1.0484144687652588, "learning_rate": 0.001, "loss": 2.0458, "step": 217784 }, { "epoch": 18.792270531400966, "grad_norm": 0.4987468719482422, "learning_rate": 0.001, "loss": 2.0512, "step": 217840 }, { "epoch": 18.797101449275363, "grad_norm": 1.6167337894439697, "learning_rate": 0.001, "loss": 2.0459, "step": 217896 }, { "epoch": 18.80193236714976, "grad_norm": 0.5412013530731201, "learning_rate": 0.001, "loss": 2.0521, "step": 217952 }, { "epoch": 18.806763285024154, "grad_norm": 1.7752583026885986, "learning_rate": 0.001, "loss": 2.0304, "step": 218008 }, { "epoch": 18.81159420289855, "grad_norm": 0.9795591831207275, "learning_rate": 0.001, "loss": 2.0247, "step": 218064 }, { "epoch": 18.816425120772948, "grad_norm": 1.9375684261322021, "learning_rate": 0.001, "loss": 2.0383, "step": 218120 }, { "epoch": 18.82125603864734, "grad_norm": 10.737184524536133, "learning_rate": 0.001, "loss": 2.0358, "step": 218176 }, { "epoch": 18.82608695652174, "grad_norm": 0.7466415762901306, "learning_rate": 0.001, "loss": 2.0255, "step": 218232 }, { "epoch": 18.830917874396135, "grad_norm": 2.184471845626831, "learning_rate": 0.001, "loss": 2.0099, "step": 218288 }, { "epoch": 18.835748792270532, "grad_norm": 0.9999092817306519, "learning_rate": 0.001, "loss": 2.0176, "step": 218344 }, { "epoch": 18.840579710144926, "grad_norm": 1.2198060750961304, "learning_rate": 0.001, "loss": 2.0186, "step": 218400 }, { "epoch": 18.845410628019323, "grad_norm": 0.639378011226654, "learning_rate": 0.001, "loss": 2.0248, "step": 218456 }, { "epoch": 18.85024154589372, "grad_norm": 1.5673160552978516, "learning_rate": 0.001, "loss": 2.0217, "step": 218512 }, { "epoch": 18.855072463768117, "grad_norm": 0.9663440585136414, "learning_rate": 0.001, "loss": 2.0384, "step": 218568 }, { "epoch": 18.85990338164251, "grad_norm": 1.1632825136184692, "learning_rate": 0.001, "loss": 2.0388, "step": 218624 }, { "epoch": 18.864734299516908, "grad_norm": 0.5182122588157654, "learning_rate": 0.001, "loss": 2.0331, "step": 218680 }, { "epoch": 18.869565217391305, "grad_norm": 1.1156498193740845, "learning_rate": 0.001, "loss": 2.0213, "step": 218736 }, { "epoch": 18.8743961352657, "grad_norm": 1.1048035621643066, "learning_rate": 0.001, "loss": 2.0165, "step": 218792 }, { "epoch": 18.879227053140095, "grad_norm": 2.802283525466919, "learning_rate": 0.001, "loss": 2.0173, "step": 218848 }, { "epoch": 18.884057971014492, "grad_norm": 2.455322027206421, "learning_rate": 0.001, "loss": 2.0135, "step": 218904 }, { "epoch": 18.88888888888889, "grad_norm": 1.1782455444335938, "learning_rate": 0.001, "loss": 2.0298, "step": 218960 }, { "epoch": 18.893719806763286, "grad_norm": 0.7688833475112915, "learning_rate": 0.001, "loss": 2.0232, "step": 219016 }, { "epoch": 18.89855072463768, "grad_norm": 1.4346656799316406, "learning_rate": 0.001, "loss": 2.032, "step": 219072 }, { "epoch": 18.903381642512077, "grad_norm": 1.6698538064956665, "learning_rate": 0.001, "loss": 2.0304, "step": 219128 }, { "epoch": 18.908212560386474, "grad_norm": 1.014288306236267, "learning_rate": 0.001, "loss": 2.0281, "step": 219184 }, { "epoch": 18.91304347826087, "grad_norm": 0.6735665202140808, "learning_rate": 0.001, "loss": 2.0311, "step": 219240 }, { "epoch": 18.917874396135264, "grad_norm": 2.1199779510498047, "learning_rate": 0.001, "loss": 2.0265, "step": 219296 }, { "epoch": 18.92270531400966, "grad_norm": 0.9367926120758057, "learning_rate": 0.001, "loss": 2.0268, "step": 219352 }, { "epoch": 18.92753623188406, "grad_norm": 1.0031538009643555, "learning_rate": 0.001, "loss": 2.0132, "step": 219408 }, { "epoch": 18.932367149758456, "grad_norm": 1.0376001596450806, "learning_rate": 0.001, "loss": 2.0098, "step": 219464 }, { "epoch": 18.93719806763285, "grad_norm": 2.2143495082855225, "learning_rate": 0.001, "loss": 2.0227, "step": 219520 }, { "epoch": 18.942028985507246, "grad_norm": 3.0990049839019775, "learning_rate": 0.001, "loss": 2.0217, "step": 219576 }, { "epoch": 18.946859903381643, "grad_norm": 1.3573864698410034, "learning_rate": 0.001, "loss": 2.0215, "step": 219632 }, { "epoch": 18.95169082125604, "grad_norm": 4.626236438751221, "learning_rate": 0.001, "loss": 2.0355, "step": 219688 }, { "epoch": 18.956521739130434, "grad_norm": 1.6406254768371582, "learning_rate": 0.001, "loss": 2.0343, "step": 219744 }, { "epoch": 18.96135265700483, "grad_norm": 3.6270246505737305, "learning_rate": 0.001, "loss": 2.0283, "step": 219800 }, { "epoch": 18.966183574879228, "grad_norm": 1.5768861770629883, "learning_rate": 0.001, "loss": 2.0334, "step": 219856 }, { "epoch": 18.971014492753625, "grad_norm": 1.299428939819336, "learning_rate": 0.001, "loss": 2.0309, "step": 219912 }, { "epoch": 18.97584541062802, "grad_norm": 1.4760308265686035, "learning_rate": 0.001, "loss": 2.0302, "step": 219968 }, { "epoch": 18.980676328502415, "grad_norm": 2.3277783393859863, "learning_rate": 0.001, "loss": 2.0248, "step": 220024 }, { "epoch": 18.985507246376812, "grad_norm": 2.1314172744750977, "learning_rate": 0.001, "loss": 2.0104, "step": 220080 }, { "epoch": 18.990338164251206, "grad_norm": 0.6804816722869873, "learning_rate": 0.001, "loss": 2.0114, "step": 220136 }, { "epoch": 18.995169082125603, "grad_norm": 0.5855560302734375, "learning_rate": 0.001, "loss": 2.0091, "step": 220192 }, { "epoch": 19.0, "grad_norm": 0.9299209117889404, "learning_rate": 0.001, "loss": 2.0251, "step": 220248 }, { "epoch": 19.004830917874397, "grad_norm": 1.3240443468093872, "learning_rate": 0.001, "loss": 1.9814, "step": 220304 }, { "epoch": 19.00966183574879, "grad_norm": 1.9409055709838867, "learning_rate": 0.001, "loss": 1.9854, "step": 220360 }, { "epoch": 19.014492753623188, "grad_norm": 1.02378249168396, "learning_rate": 0.001, "loss": 1.9874, "step": 220416 }, { "epoch": 19.019323671497585, "grad_norm": 0.5449873805046082, "learning_rate": 0.001, "loss": 1.9794, "step": 220472 }, { "epoch": 19.02415458937198, "grad_norm": 0.6264786124229431, "learning_rate": 0.001, "loss": 1.9753, "step": 220528 }, { "epoch": 19.028985507246375, "grad_norm": 0.6267743110656738, "learning_rate": 0.001, "loss": 1.9767, "step": 220584 }, { "epoch": 19.033816425120772, "grad_norm": 0.39243680238723755, "learning_rate": 0.001, "loss": 1.9774, "step": 220640 }, { "epoch": 19.03864734299517, "grad_norm": 1.022716760635376, "learning_rate": 0.001, "loss": 1.9687, "step": 220696 }, { "epoch": 19.043478260869566, "grad_norm": 0.38020774722099304, "learning_rate": 0.001, "loss": 1.9724, "step": 220752 }, { "epoch": 19.04830917874396, "grad_norm": 1.5430448055267334, "learning_rate": 0.001, "loss": 1.9738, "step": 220808 }, { "epoch": 19.053140096618357, "grad_norm": 2.1182010173797607, "learning_rate": 0.001, "loss": 1.9766, "step": 220864 }, { "epoch": 19.057971014492754, "grad_norm": 0.8641088008880615, "learning_rate": 0.001, "loss": 1.9746, "step": 220920 }, { "epoch": 19.06280193236715, "grad_norm": 3.1122047901153564, "learning_rate": 0.001, "loss": 1.9763, "step": 220976 }, { "epoch": 19.067632850241544, "grad_norm": 0.40802857279777527, "learning_rate": 0.001, "loss": 1.971, "step": 221032 }, { "epoch": 19.07246376811594, "grad_norm": 2.81142258644104, "learning_rate": 0.001, "loss": 1.9685, "step": 221088 }, { "epoch": 19.07729468599034, "grad_norm": 1.2277580499649048, "learning_rate": 0.001, "loss": 1.9762, "step": 221144 }, { "epoch": 19.082125603864736, "grad_norm": 0.9606290459632874, "learning_rate": 0.001, "loss": 1.9672, "step": 221200 }, { "epoch": 19.08695652173913, "grad_norm": 0.7883738875389099, "learning_rate": 0.001, "loss": 1.9714, "step": 221256 }, { "epoch": 19.091787439613526, "grad_norm": 1.73380446434021, "learning_rate": 0.001, "loss": 1.9706, "step": 221312 }, { "epoch": 19.096618357487923, "grad_norm": 0.3786293566226959, "learning_rate": 0.001, "loss": 1.9771, "step": 221368 }, { "epoch": 19.10144927536232, "grad_norm": 1.057479739189148, "learning_rate": 0.001, "loss": 1.9771, "step": 221424 }, { "epoch": 19.106280193236714, "grad_norm": 0.7734542489051819, "learning_rate": 0.001, "loss": 1.9751, "step": 221480 }, { "epoch": 19.11111111111111, "grad_norm": 0.5324559211730957, "learning_rate": 0.001, "loss": 1.9838, "step": 221536 }, { "epoch": 19.115942028985508, "grad_norm": 1.2888987064361572, "learning_rate": 0.001, "loss": 1.9709, "step": 221592 }, { "epoch": 19.120772946859905, "grad_norm": 0.9148516654968262, "learning_rate": 0.001, "loss": 1.9747, "step": 221648 }, { "epoch": 19.1256038647343, "grad_norm": 0.7805019021034241, "learning_rate": 0.001, "loss": 1.9767, "step": 221704 }, { "epoch": 19.130434782608695, "grad_norm": 2.173671007156372, "learning_rate": 0.001, "loss": 1.9752, "step": 221760 }, { "epoch": 19.135265700483092, "grad_norm": 0.40187910199165344, "learning_rate": 0.001, "loss": 1.9741, "step": 221816 }, { "epoch": 19.14009661835749, "grad_norm": 1.976532220840454, "learning_rate": 0.001, "loss": 1.9676, "step": 221872 }, { "epoch": 19.144927536231883, "grad_norm": 0.28142642974853516, "learning_rate": 0.001, "loss": 1.9613, "step": 221928 }, { "epoch": 19.14975845410628, "grad_norm": 0.4090527594089508, "learning_rate": 0.001, "loss": 1.9603, "step": 221984 }, { "epoch": 19.154589371980677, "grad_norm": 1.0258517265319824, "learning_rate": 0.001, "loss": 1.9692, "step": 222040 }, { "epoch": 19.159420289855074, "grad_norm": 1.1290998458862305, "learning_rate": 0.001, "loss": 1.9646, "step": 222096 }, { "epoch": 19.164251207729468, "grad_norm": 0.6550341248512268, "learning_rate": 0.001, "loss": 1.964, "step": 222152 }, { "epoch": 19.169082125603865, "grad_norm": 0.31408998370170593, "learning_rate": 0.001, "loss": 1.9712, "step": 222208 }, { "epoch": 19.17391304347826, "grad_norm": 0.6046551465988159, "learning_rate": 0.001, "loss": 1.9812, "step": 222264 }, { "epoch": 19.17874396135266, "grad_norm": 0.9274047017097473, "learning_rate": 0.001, "loss": 1.9724, "step": 222320 }, { "epoch": 19.183574879227052, "grad_norm": 0.6885830760002136, "learning_rate": 0.001, "loss": 1.974, "step": 222376 }, { "epoch": 19.18840579710145, "grad_norm": 1.0630031824111938, "learning_rate": 0.001, "loss": 1.9751, "step": 222432 }, { "epoch": 19.193236714975846, "grad_norm": 0.6759706735610962, "learning_rate": 0.001, "loss": 1.9776, "step": 222488 }, { "epoch": 19.19806763285024, "grad_norm": 1.002660870552063, "learning_rate": 0.001, "loss": 1.9781, "step": 222544 }, { "epoch": 19.202898550724637, "grad_norm": 0.9634578227996826, "learning_rate": 0.001, "loss": 1.9772, "step": 222600 }, { "epoch": 19.207729468599034, "grad_norm": 0.9043963551521301, "learning_rate": 0.001, "loss": 1.9781, "step": 222656 }, { "epoch": 19.21256038647343, "grad_norm": 0.9274563193321228, "learning_rate": 0.001, "loss": 1.9903, "step": 222712 }, { "epoch": 19.217391304347824, "grad_norm": 0.5402976274490356, "learning_rate": 0.001, "loss": 1.9973, "step": 222768 }, { "epoch": 19.22222222222222, "grad_norm": 0.6415870785713196, "learning_rate": 0.001, "loss": 1.9761, "step": 222824 }, { "epoch": 19.22705314009662, "grad_norm": 0.6730472445487976, "learning_rate": 0.001, "loss": 1.9795, "step": 222880 }, { "epoch": 19.231884057971016, "grad_norm": 0.574005126953125, "learning_rate": 0.001, "loss": 1.9758, "step": 222936 }, { "epoch": 19.23671497584541, "grad_norm": 0.5289649367332458, "learning_rate": 0.001, "loss": 1.9769, "step": 222992 }, { "epoch": 19.241545893719806, "grad_norm": 0.5496167540550232, "learning_rate": 0.001, "loss": 1.9719, "step": 223048 }, { "epoch": 19.246376811594203, "grad_norm": 0.5212436318397522, "learning_rate": 0.001, "loss": 1.9758, "step": 223104 }, { "epoch": 19.2512077294686, "grad_norm": 0.7783918380737305, "learning_rate": 0.001, "loss": 1.9636, "step": 223160 }, { "epoch": 19.256038647342994, "grad_norm": 0.45680466294288635, "learning_rate": 0.001, "loss": 1.9701, "step": 223216 }, { "epoch": 19.26086956521739, "grad_norm": 0.4279637932777405, "learning_rate": 0.001, "loss": 1.9632, "step": 223272 }, { "epoch": 19.265700483091788, "grad_norm": 0.3881438672542572, "learning_rate": 0.001, "loss": 1.9787, "step": 223328 }, { "epoch": 19.270531400966185, "grad_norm": 4.5589799880981445, "learning_rate": 0.001, "loss": 1.9653, "step": 223384 }, { "epoch": 19.27536231884058, "grad_norm": 1.1751387119293213, "learning_rate": 0.001, "loss": 1.9645, "step": 223440 }, { "epoch": 19.280193236714975, "grad_norm": 0.8862648606300354, "learning_rate": 0.001, "loss": 1.9549, "step": 223496 }, { "epoch": 19.285024154589372, "grad_norm": 0.8736650347709656, "learning_rate": 0.001, "loss": 1.9618, "step": 223552 }, { "epoch": 19.28985507246377, "grad_norm": 1.2138983011245728, "learning_rate": 0.001, "loss": 1.9639, "step": 223608 }, { "epoch": 19.294685990338163, "grad_norm": 1.3407403230667114, "learning_rate": 0.001, "loss": 1.9616, "step": 223664 }, { "epoch": 19.29951690821256, "grad_norm": 1.4980202913284302, "learning_rate": 0.001, "loss": 1.9703, "step": 223720 }, { "epoch": 19.304347826086957, "grad_norm": 1.0528651475906372, "learning_rate": 0.001, "loss": 1.9699, "step": 223776 }, { "epoch": 19.309178743961354, "grad_norm": 4.161685943603516, "learning_rate": 0.001, "loss": 1.9678, "step": 223832 }, { "epoch": 19.314009661835748, "grad_norm": 0.678322434425354, "learning_rate": 0.001, "loss": 1.9811, "step": 223888 }, { "epoch": 19.318840579710145, "grad_norm": 0.9019632935523987, "learning_rate": 0.001, "loss": 1.9781, "step": 223944 }, { "epoch": 19.32367149758454, "grad_norm": 0.7608224749565125, "learning_rate": 0.001, "loss": 1.9721, "step": 224000 }, { "epoch": 19.32850241545894, "grad_norm": 0.6512464284896851, "learning_rate": 0.001, "loss": 1.9797, "step": 224056 }, { "epoch": 19.333333333333332, "grad_norm": 1.1273747682571411, "learning_rate": 0.001, "loss": 1.9773, "step": 224112 }, { "epoch": 19.33816425120773, "grad_norm": 3.248605966567993, "learning_rate": 0.001, "loss": 1.9793, "step": 224168 }, { "epoch": 19.342995169082126, "grad_norm": 0.8719470500946045, "learning_rate": 0.001, "loss": 1.9844, "step": 224224 }, { "epoch": 19.347826086956523, "grad_norm": 0.774939775466919, "learning_rate": 0.001, "loss": 1.9964, "step": 224280 }, { "epoch": 19.352657004830917, "grad_norm": 0.5201621651649475, "learning_rate": 0.001, "loss": 1.9824, "step": 224336 }, { "epoch": 19.357487922705314, "grad_norm": 1.3088537454605103, "learning_rate": 0.001, "loss": 1.9977, "step": 224392 }, { "epoch": 19.36231884057971, "grad_norm": 2.41510272026062, "learning_rate": 0.001, "loss": 1.9913, "step": 224448 }, { "epoch": 19.367149758454108, "grad_norm": 1.0621293783187866, "learning_rate": 0.001, "loss": 1.9842, "step": 224504 }, { "epoch": 19.3719806763285, "grad_norm": 0.48479539155960083, "learning_rate": 0.001, "loss": 1.9843, "step": 224560 }, { "epoch": 19.3768115942029, "grad_norm": 0.7369958758354187, "learning_rate": 0.001, "loss": 1.9773, "step": 224616 }, { "epoch": 19.381642512077295, "grad_norm": 0.6733920574188232, "learning_rate": 0.001, "loss": 1.9798, "step": 224672 }, { "epoch": 19.386473429951693, "grad_norm": 0.717187762260437, "learning_rate": 0.001, "loss": 1.9765, "step": 224728 }, { "epoch": 19.391304347826086, "grad_norm": 1.4908002614974976, "learning_rate": 0.001, "loss": 1.9828, "step": 224784 }, { "epoch": 19.396135265700483, "grad_norm": 0.478899210691452, "learning_rate": 0.001, "loss": 1.9796, "step": 224840 }, { "epoch": 19.40096618357488, "grad_norm": 1.7314268350601196, "learning_rate": 0.001, "loss": 1.9762, "step": 224896 }, { "epoch": 19.405797101449274, "grad_norm": 0.9426409006118774, "learning_rate": 0.001, "loss": 1.98, "step": 224952 }, { "epoch": 19.41062801932367, "grad_norm": 1.0732628107070923, "learning_rate": 0.001, "loss": 1.9746, "step": 225008 }, { "epoch": 19.415458937198068, "grad_norm": 0.4627336263656616, "learning_rate": 0.001, "loss": 1.9871, "step": 225064 }, { "epoch": 19.420289855072465, "grad_norm": 2.255784749984741, "learning_rate": 0.001, "loss": 1.9789, "step": 225120 }, { "epoch": 19.42512077294686, "grad_norm": 1.3085148334503174, "learning_rate": 0.001, "loss": 1.9982, "step": 225176 }, { "epoch": 19.429951690821255, "grad_norm": 1.4280967712402344, "learning_rate": 0.001, "loss": 2.0059, "step": 225232 }, { "epoch": 19.434782608695652, "grad_norm": 1.0377458333969116, "learning_rate": 0.001, "loss": 1.9999, "step": 225288 }, { "epoch": 19.43961352657005, "grad_norm": 0.5367633700370789, "learning_rate": 0.001, "loss": 2.0083, "step": 225344 }, { "epoch": 19.444444444444443, "grad_norm": 1.6878571510314941, "learning_rate": 0.001, "loss": 2.0079, "step": 225400 }, { "epoch": 19.44927536231884, "grad_norm": 0.8240814208984375, "learning_rate": 0.001, "loss": 1.9977, "step": 225456 }, { "epoch": 19.454106280193237, "grad_norm": 1.278629183769226, "learning_rate": 0.001, "loss": 1.9974, "step": 225512 }, { "epoch": 19.458937198067634, "grad_norm": 2.1589179039001465, "learning_rate": 0.001, "loss": 1.9868, "step": 225568 }, { "epoch": 19.463768115942027, "grad_norm": 0.6857483983039856, "learning_rate": 0.001, "loss": 1.9954, "step": 225624 }, { "epoch": 19.468599033816425, "grad_norm": 0.6987135410308838, "learning_rate": 0.001, "loss": 1.9865, "step": 225680 }, { "epoch": 19.47342995169082, "grad_norm": 0.6640617847442627, "learning_rate": 0.001, "loss": 1.9869, "step": 225736 }, { "epoch": 19.47826086956522, "grad_norm": 0.5606048107147217, "learning_rate": 0.001, "loss": 1.99, "step": 225792 }, { "epoch": 19.483091787439612, "grad_norm": 0.8694623112678528, "learning_rate": 0.001, "loss": 1.9795, "step": 225848 }, { "epoch": 19.48792270531401, "grad_norm": 1.1421536207199097, "learning_rate": 0.001, "loss": 1.9801, "step": 225904 }, { "epoch": 19.492753623188406, "grad_norm": 4.96950626373291, "learning_rate": 0.001, "loss": 1.9882, "step": 225960 }, { "epoch": 19.497584541062803, "grad_norm": 1.1328555345535278, "learning_rate": 0.001, "loss": 1.9839, "step": 226016 }, { "epoch": 19.502415458937197, "grad_norm": 0.5209496021270752, "learning_rate": 0.001, "loss": 1.9797, "step": 226072 }, { "epoch": 19.507246376811594, "grad_norm": 0.528617262840271, "learning_rate": 0.001, "loss": 1.9836, "step": 226128 }, { "epoch": 19.51207729468599, "grad_norm": 0.7308517098426819, "learning_rate": 0.001, "loss": 1.9707, "step": 226184 }, { "epoch": 19.516908212560388, "grad_norm": 0.6073090434074402, "learning_rate": 0.001, "loss": 1.9784, "step": 226240 }, { "epoch": 19.52173913043478, "grad_norm": 0.6104119420051575, "learning_rate": 0.001, "loss": 1.9792, "step": 226296 }, { "epoch": 19.52657004830918, "grad_norm": 1.0912226438522339, "learning_rate": 0.001, "loss": 1.9781, "step": 226352 }, { "epoch": 19.531400966183575, "grad_norm": 1.078590989112854, "learning_rate": 0.001, "loss": 1.9759, "step": 226408 }, { "epoch": 19.536231884057973, "grad_norm": 0.5421713590621948, "learning_rate": 0.001, "loss": 1.9919, "step": 226464 }, { "epoch": 19.541062801932366, "grad_norm": 0.5305871367454529, "learning_rate": 0.001, "loss": 1.9841, "step": 226520 }, { "epoch": 19.545893719806763, "grad_norm": 0.7110370397567749, "learning_rate": 0.001, "loss": 1.9936, "step": 226576 }, { "epoch": 19.55072463768116, "grad_norm": 1.0523451566696167, "learning_rate": 0.001, "loss": 1.9974, "step": 226632 }, { "epoch": 19.555555555555557, "grad_norm": 0.5616431832313538, "learning_rate": 0.001, "loss": 1.9781, "step": 226688 }, { "epoch": 19.56038647342995, "grad_norm": 1.0167936086654663, "learning_rate": 0.001, "loss": 1.9798, "step": 226744 }, { "epoch": 19.565217391304348, "grad_norm": 0.645546019077301, "learning_rate": 0.001, "loss": 1.9773, "step": 226800 }, { "epoch": 19.570048309178745, "grad_norm": 0.3923267424106598, "learning_rate": 0.001, "loss": 1.9725, "step": 226856 }, { "epoch": 19.57487922705314, "grad_norm": 0.5731530785560608, "learning_rate": 0.001, "loss": 1.9824, "step": 226912 }, { "epoch": 19.579710144927535, "grad_norm": 0.44377586245536804, "learning_rate": 0.001, "loss": 1.979, "step": 226968 }, { "epoch": 19.584541062801932, "grad_norm": 1.0439039468765259, "learning_rate": 0.001, "loss": 1.9757, "step": 227024 }, { "epoch": 19.58937198067633, "grad_norm": 0.7364981770515442, "learning_rate": 0.001, "loss": 1.9745, "step": 227080 }, { "epoch": 19.594202898550726, "grad_norm": 1.5196747779846191, "learning_rate": 0.001, "loss": 1.977, "step": 227136 }, { "epoch": 19.59903381642512, "grad_norm": 2.876661777496338, "learning_rate": 0.001, "loss": 1.9834, "step": 227192 }, { "epoch": 19.603864734299517, "grad_norm": 0.5320111513137817, "learning_rate": 0.001, "loss": 1.978, "step": 227248 }, { "epoch": 19.608695652173914, "grad_norm": 2.156527519226074, "learning_rate": 0.001, "loss": 1.9792, "step": 227304 }, { "epoch": 19.613526570048307, "grad_norm": 0.59865802526474, "learning_rate": 0.001, "loss": 1.9736, "step": 227360 }, { "epoch": 19.618357487922705, "grad_norm": 0.9262649416923523, "learning_rate": 0.001, "loss": 1.9847, "step": 227416 }, { "epoch": 19.6231884057971, "grad_norm": 1.1218934059143066, "learning_rate": 0.001, "loss": 1.9805, "step": 227472 }, { "epoch": 19.6280193236715, "grad_norm": 1.2085765600204468, "learning_rate": 0.001, "loss": 1.9778, "step": 227528 }, { "epoch": 19.632850241545892, "grad_norm": 0.8585699796676636, "learning_rate": 0.001, "loss": 1.9848, "step": 227584 }, { "epoch": 19.63768115942029, "grad_norm": 1.0675084590911865, "learning_rate": 0.001, "loss": 1.9894, "step": 227640 }, { "epoch": 19.642512077294686, "grad_norm": 1.194161057472229, "learning_rate": 0.001, "loss": 1.9752, "step": 227696 }, { "epoch": 19.647342995169083, "grad_norm": 0.6294888257980347, "learning_rate": 0.001, "loss": 1.9892, "step": 227752 }, { "epoch": 19.652173913043477, "grad_norm": 0.8624576926231384, "learning_rate": 0.001, "loss": 1.9978, "step": 227808 }, { "epoch": 19.657004830917874, "grad_norm": 0.6598075032234192, "learning_rate": 0.001, "loss": 1.9911, "step": 227864 }, { "epoch": 19.66183574879227, "grad_norm": 0.6938336491584778, "learning_rate": 0.001, "loss": 1.9744, "step": 227920 }, { "epoch": 19.666666666666668, "grad_norm": 0.7940035462379456, "learning_rate": 0.001, "loss": 1.9842, "step": 227976 }, { "epoch": 19.67149758454106, "grad_norm": 1.3795771598815918, "learning_rate": 0.001, "loss": 1.9897, "step": 228032 }, { "epoch": 19.67632850241546, "grad_norm": 0.7683830261230469, "learning_rate": 0.001, "loss": 1.9829, "step": 228088 }, { "epoch": 19.681159420289855, "grad_norm": 0.514057457447052, "learning_rate": 0.001, "loss": 1.9816, "step": 228144 }, { "epoch": 19.685990338164252, "grad_norm": 0.6565588712692261, "learning_rate": 0.001, "loss": 1.9878, "step": 228200 }, { "epoch": 19.690821256038646, "grad_norm": 0.8971356749534607, "learning_rate": 0.001, "loss": 1.9893, "step": 228256 }, { "epoch": 19.695652173913043, "grad_norm": 0.7112553715705872, "learning_rate": 0.001, "loss": 1.9878, "step": 228312 }, { "epoch": 19.70048309178744, "grad_norm": 0.7366810441017151, "learning_rate": 0.001, "loss": 1.9869, "step": 228368 }, { "epoch": 19.705314009661837, "grad_norm": 1.141973614692688, "learning_rate": 0.001, "loss": 1.9899, "step": 228424 }, { "epoch": 19.71014492753623, "grad_norm": 1.2263668775558472, "learning_rate": 0.001, "loss": 1.9937, "step": 228480 }, { "epoch": 19.714975845410628, "grad_norm": 0.36490365862846375, "learning_rate": 0.001, "loss": 1.9935, "step": 228536 }, { "epoch": 19.719806763285025, "grad_norm": 7.048654556274414, "learning_rate": 0.001, "loss": 1.9904, "step": 228592 }, { "epoch": 19.72463768115942, "grad_norm": 0.6913021802902222, "learning_rate": 0.001, "loss": 1.9791, "step": 228648 }, { "epoch": 19.729468599033815, "grad_norm": 0.5232803821563721, "learning_rate": 0.001, "loss": 1.9716, "step": 228704 }, { "epoch": 19.734299516908212, "grad_norm": 2.327164888381958, "learning_rate": 0.001, "loss": 1.9808, "step": 228760 }, { "epoch": 19.73913043478261, "grad_norm": 0.7183934450149536, "learning_rate": 0.001, "loss": 1.9811, "step": 228816 }, { "epoch": 19.743961352657006, "grad_norm": 0.7957116365432739, "learning_rate": 0.001, "loss": 1.9875, "step": 228872 }, { "epoch": 19.7487922705314, "grad_norm": 0.9885707497596741, "learning_rate": 0.001, "loss": 1.9767, "step": 228928 }, { "epoch": 19.753623188405797, "grad_norm": 0.3768506348133087, "learning_rate": 0.001, "loss": 1.9796, "step": 228984 }, { "epoch": 19.758454106280194, "grad_norm": 0.7667130827903748, "learning_rate": 0.001, "loss": 1.9828, "step": 229040 }, { "epoch": 19.76328502415459, "grad_norm": 1.3434653282165527, "learning_rate": 0.001, "loss": 1.9828, "step": 229096 }, { "epoch": 19.768115942028984, "grad_norm": 1.129123568534851, "learning_rate": 0.001, "loss": 2.0026, "step": 229152 }, { "epoch": 19.77294685990338, "grad_norm": 1.3615796566009521, "learning_rate": 0.001, "loss": 1.9888, "step": 229208 }, { "epoch": 19.77777777777778, "grad_norm": 0.6908249855041504, "learning_rate": 0.001, "loss": 1.9866, "step": 229264 }, { "epoch": 19.782608695652176, "grad_norm": 0.659037709236145, "learning_rate": 0.001, "loss": 1.9931, "step": 229320 }, { "epoch": 19.78743961352657, "grad_norm": 1.0920194387435913, "learning_rate": 0.001, "loss": 1.9833, "step": 229376 }, { "epoch": 19.792270531400966, "grad_norm": 0.835077702999115, "learning_rate": 0.001, "loss": 1.9785, "step": 229432 }, { "epoch": 19.797101449275363, "grad_norm": 0.8240960240364075, "learning_rate": 0.001, "loss": 1.9823, "step": 229488 }, { "epoch": 19.80193236714976, "grad_norm": 0.6170571446418762, "learning_rate": 0.001, "loss": 1.9864, "step": 229544 }, { "epoch": 19.806763285024154, "grad_norm": 0.672433614730835, "learning_rate": 0.001, "loss": 1.9819, "step": 229600 }, { "epoch": 19.81159420289855, "grad_norm": 0.631607711315155, "learning_rate": 0.001, "loss": 1.9789, "step": 229656 }, { "epoch": 19.816425120772948, "grad_norm": 0.7736732959747314, "learning_rate": 0.001, "loss": 1.9868, "step": 229712 }, { "epoch": 19.82125603864734, "grad_norm": 1.0211807489395142, "learning_rate": 0.001, "loss": 1.9879, "step": 229768 }, { "epoch": 19.82608695652174, "grad_norm": 0.4449273645877838, "learning_rate": 0.001, "loss": 1.9927, "step": 229824 }, { "epoch": 19.830917874396135, "grad_norm": 2.2995989322662354, "learning_rate": 0.001, "loss": 1.9864, "step": 229880 }, { "epoch": 19.835748792270532, "grad_norm": 0.6107759475708008, "learning_rate": 0.001, "loss": 1.9815, "step": 229936 }, { "epoch": 19.840579710144926, "grad_norm": 0.7510660290718079, "learning_rate": 0.001, "loss": 1.9988, "step": 229992 }, { "epoch": 19.845410628019323, "grad_norm": 0.4979003667831421, "learning_rate": 0.001, "loss": 1.9925, "step": 230048 }, { "epoch": 19.85024154589372, "grad_norm": 0.8143007755279541, "learning_rate": 0.001, "loss": 1.9949, "step": 230104 }, { "epoch": 19.855072463768117, "grad_norm": 0.7308885455131531, "learning_rate": 0.001, "loss": 2.0326, "step": 230160 }, { "epoch": 19.85990338164251, "grad_norm": 2.2266623973846436, "learning_rate": 0.001, "loss": 2.0238, "step": 230216 }, { "epoch": 19.864734299516908, "grad_norm": 0.8096334338188171, "learning_rate": 0.001, "loss": 2.0284, "step": 230272 }, { "epoch": 19.869565217391305, "grad_norm": 2.0409088134765625, "learning_rate": 0.001, "loss": 2.0419, "step": 230328 }, { "epoch": 19.8743961352657, "grad_norm": 1.359377145767212, "learning_rate": 0.001, "loss": 2.0245, "step": 230384 }, { "epoch": 19.879227053140095, "grad_norm": 1.4059951305389404, "learning_rate": 0.001, "loss": 2.0202, "step": 230440 }, { "epoch": 19.884057971014492, "grad_norm": 0.5972735285758972, "learning_rate": 0.001, "loss": 1.9967, "step": 230496 }, { "epoch": 19.88888888888889, "grad_norm": 1.6779500246047974, "learning_rate": 0.001, "loss": 2.0037, "step": 230552 }, { "epoch": 19.893719806763286, "grad_norm": 1.0977379083633423, "learning_rate": 0.001, "loss": 2.0117, "step": 230608 }, { "epoch": 19.89855072463768, "grad_norm": 0.6582438945770264, "learning_rate": 0.001, "loss": 2.0048, "step": 230664 }, { "epoch": 19.903381642512077, "grad_norm": 1.4949947595596313, "learning_rate": 0.001, "loss": 2.0004, "step": 230720 }, { "epoch": 19.908212560386474, "grad_norm": 2.100862979888916, "learning_rate": 0.001, "loss": 1.9998, "step": 230776 }, { "epoch": 19.91304347826087, "grad_norm": 0.6234824657440186, "learning_rate": 0.001, "loss": 1.9959, "step": 230832 }, { "epoch": 19.917874396135264, "grad_norm": 0.7279660105705261, "learning_rate": 0.001, "loss": 1.9978, "step": 230888 }, { "epoch": 19.92270531400966, "grad_norm": 0.8285762071609497, "learning_rate": 0.001, "loss": 2.0119, "step": 230944 }, { "epoch": 19.92753623188406, "grad_norm": 1.867323875427246, "learning_rate": 0.001, "loss": 2.0022, "step": 231000 }, { "epoch": 19.932367149758456, "grad_norm": 0.9133573174476624, "learning_rate": 0.001, "loss": 1.9993, "step": 231056 }, { "epoch": 19.93719806763285, "grad_norm": 0.6855758428573608, "learning_rate": 0.001, "loss": 2.0008, "step": 231112 }, { "epoch": 19.942028985507246, "grad_norm": 0.9852924346923828, "learning_rate": 0.001, "loss": 1.9971, "step": 231168 }, { "epoch": 19.946859903381643, "grad_norm": 1.0809262990951538, "learning_rate": 0.001, "loss": 2.0023, "step": 231224 }, { "epoch": 19.95169082125604, "grad_norm": 1.0367103815078735, "learning_rate": 0.001, "loss": 1.9968, "step": 231280 }, { "epoch": 19.956521739130434, "grad_norm": 1.608024001121521, "learning_rate": 0.001, "loss": 1.9999, "step": 231336 }, { "epoch": 19.96135265700483, "grad_norm": 1.5402884483337402, "learning_rate": 0.001, "loss": 1.9969, "step": 231392 }, { "epoch": 19.966183574879228, "grad_norm": 1.115765929222107, "learning_rate": 0.001, "loss": 2.0021, "step": 231448 }, { "epoch": 19.971014492753625, "grad_norm": 0.7538047432899475, "learning_rate": 0.001, "loss": 1.9897, "step": 231504 }, { "epoch": 19.97584541062802, "grad_norm": 2.4690496921539307, "learning_rate": 0.001, "loss": 1.9988, "step": 231560 }, { "epoch": 19.980676328502415, "grad_norm": 0.557518482208252, "learning_rate": 0.001, "loss": 1.9967, "step": 231616 }, { "epoch": 19.985507246376812, "grad_norm": 1.1611177921295166, "learning_rate": 0.001, "loss": 1.9929, "step": 231672 }, { "epoch": 19.990338164251206, "grad_norm": 3.2506847381591797, "learning_rate": 0.001, "loss": 1.992, "step": 231728 }, { "epoch": 19.995169082125603, "grad_norm": 0.4481503665447235, "learning_rate": 0.001, "loss": 1.9838, "step": 231784 }, { "epoch": 20.0, "grad_norm": 0.9680782556533813, "learning_rate": 0.001, "loss": 1.9729, "step": 231840 }, { "epoch": 20.004830917874397, "grad_norm": 0.9590798616409302, "learning_rate": 0.001, "loss": 1.9479, "step": 231896 }, { "epoch": 20.00966183574879, "grad_norm": 0.9135307669639587, "learning_rate": 0.001, "loss": 1.9511, "step": 231952 }, { "epoch": 20.014492753623188, "grad_norm": 1.6131869554519653, "learning_rate": 0.001, "loss": 1.9446, "step": 232008 }, { "epoch": 20.019323671497585, "grad_norm": 0.6807425618171692, "learning_rate": 0.001, "loss": 1.9399, "step": 232064 }, { "epoch": 20.02415458937198, "grad_norm": 0.7212848663330078, "learning_rate": 0.001, "loss": 1.9534, "step": 232120 }, { "epoch": 20.028985507246375, "grad_norm": 4.037197113037109, "learning_rate": 0.001, "loss": 1.9684, "step": 232176 }, { "epoch": 20.033816425120772, "grad_norm": 0.7982797622680664, "learning_rate": 0.001, "loss": 1.9605, "step": 232232 }, { "epoch": 20.03864734299517, "grad_norm": 0.7451841235160828, "learning_rate": 0.001, "loss": 1.9602, "step": 232288 }, { "epoch": 20.043478260869566, "grad_norm": 1.8921542167663574, "learning_rate": 0.001, "loss": 1.9628, "step": 232344 }, { "epoch": 20.04830917874396, "grad_norm": 1.2120712995529175, "learning_rate": 0.001, "loss": 1.9679, "step": 232400 }, { "epoch": 20.053140096618357, "grad_norm": 0.7218134999275208, "learning_rate": 0.001, "loss": 1.9679, "step": 232456 }, { "epoch": 20.057971014492754, "grad_norm": 0.7021347880363464, "learning_rate": 0.001, "loss": 1.9639, "step": 232512 }, { "epoch": 20.06280193236715, "grad_norm": 0.43417754769325256, "learning_rate": 0.001, "loss": 1.946, "step": 232568 }, { "epoch": 20.067632850241544, "grad_norm": 0.8222406506538391, "learning_rate": 0.001, "loss": 1.9543, "step": 232624 }, { "epoch": 20.07246376811594, "grad_norm": 3.986311435699463, "learning_rate": 0.001, "loss": 1.9451, "step": 232680 }, { "epoch": 20.07729468599034, "grad_norm": 0.6204794645309448, "learning_rate": 0.001, "loss": 1.9466, "step": 232736 }, { "epoch": 20.082125603864736, "grad_norm": 1.3899047374725342, "learning_rate": 0.001, "loss": 1.9551, "step": 232792 }, { "epoch": 20.08695652173913, "grad_norm": 1.5048880577087402, "learning_rate": 0.001, "loss": 1.9655, "step": 232848 }, { "epoch": 20.091787439613526, "grad_norm": 0.8524477481842041, "learning_rate": 0.001, "loss": 1.9634, "step": 232904 }, { "epoch": 20.096618357487923, "grad_norm": 0.9918443560600281, "learning_rate": 0.001, "loss": 1.9755, "step": 232960 }, { "epoch": 20.10144927536232, "grad_norm": 1.0246857404708862, "learning_rate": 0.001, "loss": 1.9691, "step": 233016 }, { "epoch": 20.106280193236714, "grad_norm": 0.8825823068618774, "learning_rate": 0.001, "loss": 1.981, "step": 233072 }, { "epoch": 20.11111111111111, "grad_norm": 1.254522681236267, "learning_rate": 0.001, "loss": 1.9884, "step": 233128 }, { "epoch": 20.115942028985508, "grad_norm": 1.3309866189956665, "learning_rate": 0.001, "loss": 1.9841, "step": 233184 }, { "epoch": 20.120772946859905, "grad_norm": 0.4176846444606781, "learning_rate": 0.001, "loss": 1.987, "step": 233240 }, { "epoch": 20.1256038647343, "grad_norm": 1.0715160369873047, "learning_rate": 0.001, "loss": 1.9659, "step": 233296 }, { "epoch": 20.130434782608695, "grad_norm": 0.9458504319190979, "learning_rate": 0.001, "loss": 1.9786, "step": 233352 }, { "epoch": 20.135265700483092, "grad_norm": 0.8436697125434875, "learning_rate": 0.001, "loss": 1.9877, "step": 233408 }, { "epoch": 20.14009661835749, "grad_norm": 2.2962377071380615, "learning_rate": 0.001, "loss": 1.9741, "step": 233464 }, { "epoch": 20.144927536231883, "grad_norm": 1.7697343826293945, "learning_rate": 0.001, "loss": 1.9631, "step": 233520 }, { "epoch": 20.14975845410628, "grad_norm": 0.9942611455917358, "learning_rate": 0.001, "loss": 1.9645, "step": 233576 }, { "epoch": 20.154589371980677, "grad_norm": 1.7412433624267578, "learning_rate": 0.001, "loss": 1.9755, "step": 233632 }, { "epoch": 20.159420289855074, "grad_norm": 0.9654859304428101, "learning_rate": 0.001, "loss": 1.971, "step": 233688 }, { "epoch": 20.164251207729468, "grad_norm": 2.0905511379241943, "learning_rate": 0.001, "loss": 1.9869, "step": 233744 }, { "epoch": 20.169082125603865, "grad_norm": 8.522933006286621, "learning_rate": 0.001, "loss": 1.9756, "step": 233800 }, { "epoch": 20.17391304347826, "grad_norm": 2.926584243774414, "learning_rate": 0.001, "loss": 1.9718, "step": 233856 }, { "epoch": 20.17874396135266, "grad_norm": 0.5673848986625671, "learning_rate": 0.001, "loss": 1.9786, "step": 233912 }, { "epoch": 20.183574879227052, "grad_norm": 0.9512795209884644, "learning_rate": 0.001, "loss": 1.9703, "step": 233968 }, { "epoch": 20.18840579710145, "grad_norm": 0.8473700881004333, "learning_rate": 0.001, "loss": 1.9734, "step": 234024 }, { "epoch": 20.193236714975846, "grad_norm": 1.9674136638641357, "learning_rate": 0.001, "loss": 1.9703, "step": 234080 }, { "epoch": 20.19806763285024, "grad_norm": 4.291630268096924, "learning_rate": 0.001, "loss": 1.973, "step": 234136 }, { "epoch": 20.202898550724637, "grad_norm": 0.6540539264678955, "learning_rate": 0.001, "loss": 1.964, "step": 234192 }, { "epoch": 20.207729468599034, "grad_norm": 2.7743849754333496, "learning_rate": 0.001, "loss": 1.9725, "step": 234248 }, { "epoch": 20.21256038647343, "grad_norm": 1.1074353456497192, "learning_rate": 0.001, "loss": 1.9837, "step": 234304 }, { "epoch": 20.217391304347824, "grad_norm": 0.7145338654518127, "learning_rate": 0.001, "loss": 1.9923, "step": 234360 }, { "epoch": 20.22222222222222, "grad_norm": 1.6077476739883423, "learning_rate": 0.001, "loss": 1.9757, "step": 234416 }, { "epoch": 20.22705314009662, "grad_norm": 1.3347930908203125, "learning_rate": 0.001, "loss": 1.9694, "step": 234472 }, { "epoch": 20.231884057971016, "grad_norm": 1.9627556800842285, "learning_rate": 0.001, "loss": 1.9697, "step": 234528 }, { "epoch": 20.23671497584541, "grad_norm": 0.6664007902145386, "learning_rate": 0.001, "loss": 1.973, "step": 234584 }, { "epoch": 20.241545893719806, "grad_norm": 2.4399492740631104, "learning_rate": 0.001, "loss": 1.9724, "step": 234640 }, { "epoch": 20.246376811594203, "grad_norm": 1.4547888040542603, "learning_rate": 0.001, "loss": 1.963, "step": 234696 }, { "epoch": 20.2512077294686, "grad_norm": 1.2826889753341675, "learning_rate": 0.001, "loss": 1.9709, "step": 234752 }, { "epoch": 20.256038647342994, "grad_norm": 1.1072330474853516, "learning_rate": 0.001, "loss": 1.9797, "step": 234808 }, { "epoch": 20.26086956521739, "grad_norm": 1.7266050577163696, "learning_rate": 0.001, "loss": 1.976, "step": 234864 }, { "epoch": 20.265700483091788, "grad_norm": 0.5628601908683777, "learning_rate": 0.001, "loss": 1.9668, "step": 234920 }, { "epoch": 20.270531400966185, "grad_norm": 1.725822925567627, "learning_rate": 0.001, "loss": 1.9673, "step": 234976 }, { "epoch": 20.27536231884058, "grad_norm": 0.7686460614204407, "learning_rate": 0.001, "loss": 1.9683, "step": 235032 }, { "epoch": 20.280193236714975, "grad_norm": 0.58089280128479, "learning_rate": 0.001, "loss": 1.97, "step": 235088 }, { "epoch": 20.285024154589372, "grad_norm": 0.7374380230903625, "learning_rate": 0.001, "loss": 1.9712, "step": 235144 }, { "epoch": 20.28985507246377, "grad_norm": 0.4292374849319458, "learning_rate": 0.001, "loss": 1.9669, "step": 235200 }, { "epoch": 20.294685990338163, "grad_norm": 0.5511637926101685, "learning_rate": 0.001, "loss": 1.9654, "step": 235256 }, { "epoch": 20.29951690821256, "grad_norm": 0.9945524334907532, "learning_rate": 0.001, "loss": 1.9639, "step": 235312 }, { "epoch": 20.304347826086957, "grad_norm": 0.37212470173835754, "learning_rate": 0.001, "loss": 1.9681, "step": 235368 }, { "epoch": 20.309178743961354, "grad_norm": 0.699050784111023, "learning_rate": 0.001, "loss": 1.9586, "step": 235424 }, { "epoch": 20.314009661835748, "grad_norm": 0.4967105984687805, "learning_rate": 0.001, "loss": 1.9574, "step": 235480 }, { "epoch": 20.318840579710145, "grad_norm": 1.2617794275283813, "learning_rate": 0.001, "loss": 1.9528, "step": 235536 }, { "epoch": 20.32367149758454, "grad_norm": 3.291747570037842, "learning_rate": 0.001, "loss": 1.9599, "step": 235592 }, { "epoch": 20.32850241545894, "grad_norm": 3.361870527267456, "learning_rate": 0.001, "loss": 1.9768, "step": 235648 }, { "epoch": 20.333333333333332, "grad_norm": 1.6715673208236694, "learning_rate": 0.001, "loss": 1.9608, "step": 235704 }, { "epoch": 20.33816425120773, "grad_norm": 1.1638327836990356, "learning_rate": 0.001, "loss": 1.9639, "step": 235760 }, { "epoch": 20.342995169082126, "grad_norm": 1.3658504486083984, "learning_rate": 0.001, "loss": 1.9694, "step": 235816 }, { "epoch": 20.347826086956523, "grad_norm": 1.2266860008239746, "learning_rate": 0.001, "loss": 1.9647, "step": 235872 }, { "epoch": 20.352657004830917, "grad_norm": 0.7702404856681824, "learning_rate": 0.001, "loss": 1.9693, "step": 235928 }, { "epoch": 20.357487922705314, "grad_norm": 0.6060357093811035, "learning_rate": 0.001, "loss": 1.9668, "step": 235984 }, { "epoch": 20.36231884057971, "grad_norm": 2.484067916870117, "learning_rate": 0.001, "loss": 1.9574, "step": 236040 }, { "epoch": 20.367149758454108, "grad_norm": 0.48463204503059387, "learning_rate": 0.001, "loss": 1.966, "step": 236096 }, { "epoch": 20.3719806763285, "grad_norm": 0.6289322972297668, "learning_rate": 0.001, "loss": 1.9767, "step": 236152 }, { "epoch": 20.3768115942029, "grad_norm": 1.8797649145126343, "learning_rate": 0.001, "loss": 1.9703, "step": 236208 }, { "epoch": 20.381642512077295, "grad_norm": 0.6994513869285583, "learning_rate": 0.001, "loss": 1.9644, "step": 236264 }, { "epoch": 20.386473429951693, "grad_norm": 1.5874524116516113, "learning_rate": 0.001, "loss": 1.9574, "step": 236320 }, { "epoch": 20.391304347826086, "grad_norm": 0.9595794677734375, "learning_rate": 0.001, "loss": 1.9543, "step": 236376 }, { "epoch": 20.396135265700483, "grad_norm": 0.7710196375846863, "learning_rate": 0.001, "loss": 1.9564, "step": 236432 }, { "epoch": 20.40096618357488, "grad_norm": 1.320600986480713, "learning_rate": 0.001, "loss": 1.9473, "step": 236488 }, { "epoch": 20.405797101449274, "grad_norm": 1.52142333984375, "learning_rate": 0.001, "loss": 1.959, "step": 236544 }, { "epoch": 20.41062801932367, "grad_norm": 0.9746045470237732, "learning_rate": 0.001, "loss": 1.9591, "step": 236600 }, { "epoch": 20.415458937198068, "grad_norm": 0.8647395968437195, "learning_rate": 0.001, "loss": 1.9562, "step": 236656 }, { "epoch": 20.420289855072465, "grad_norm": 0.9446902871131897, "learning_rate": 0.001, "loss": 1.947, "step": 236712 }, { "epoch": 20.42512077294686, "grad_norm": 1.2758747339248657, "learning_rate": 0.001, "loss": 1.9627, "step": 236768 }, { "epoch": 20.429951690821255, "grad_norm": 0.8882046937942505, "learning_rate": 0.001, "loss": 1.9611, "step": 236824 }, { "epoch": 20.434782608695652, "grad_norm": 1.239133596420288, "learning_rate": 0.001, "loss": 1.971, "step": 236880 }, { "epoch": 20.43961352657005, "grad_norm": 0.73228520154953, "learning_rate": 0.001, "loss": 1.9678, "step": 236936 }, { "epoch": 20.444444444444443, "grad_norm": 1.2003484964370728, "learning_rate": 0.001, "loss": 1.9661, "step": 236992 }, { "epoch": 20.44927536231884, "grad_norm": 0.7513173222541809, "learning_rate": 0.001, "loss": 1.9508, "step": 237048 }, { "epoch": 20.454106280193237, "grad_norm": 0.9872759580612183, "learning_rate": 0.001, "loss": 1.9552, "step": 237104 }, { "epoch": 20.458937198067634, "grad_norm": 1.145910382270813, "learning_rate": 0.001, "loss": 1.9623, "step": 237160 }, { "epoch": 20.463768115942027, "grad_norm": 0.9457828402519226, "learning_rate": 0.001, "loss": 1.9656, "step": 237216 }, { "epoch": 20.468599033816425, "grad_norm": 0.5922992825508118, "learning_rate": 0.001, "loss": 1.9683, "step": 237272 }, { "epoch": 20.47342995169082, "grad_norm": 0.8686397671699524, "learning_rate": 0.001, "loss": 1.9672, "step": 237328 }, { "epoch": 20.47826086956522, "grad_norm": 0.43666476011276245, "learning_rate": 0.001, "loss": 1.9717, "step": 237384 }, { "epoch": 20.483091787439612, "grad_norm": 0.813663899898529, "learning_rate": 0.001, "loss": 1.9803, "step": 237440 }, { "epoch": 20.48792270531401, "grad_norm": 0.9560878872871399, "learning_rate": 0.001, "loss": 1.9589, "step": 237496 }, { "epoch": 20.492753623188406, "grad_norm": 1.1921910047531128, "learning_rate": 0.001, "loss": 1.956, "step": 237552 }, { "epoch": 20.497584541062803, "grad_norm": 0.5158993601799011, "learning_rate": 0.001, "loss": 1.948, "step": 237608 }, { "epoch": 20.502415458937197, "grad_norm": 0.5725904107093811, "learning_rate": 0.001, "loss": 1.9586, "step": 237664 }, { "epoch": 20.507246376811594, "grad_norm": 0.49894335865974426, "learning_rate": 0.001, "loss": 1.9687, "step": 237720 }, { "epoch": 20.51207729468599, "grad_norm": 0.7635971903800964, "learning_rate": 0.001, "loss": 1.9532, "step": 237776 }, { "epoch": 20.516908212560388, "grad_norm": 0.48177558183670044, "learning_rate": 0.001, "loss": 1.9567, "step": 237832 }, { "epoch": 20.52173913043478, "grad_norm": 0.34677308797836304, "learning_rate": 0.001, "loss": 1.9485, "step": 237888 }, { "epoch": 20.52657004830918, "grad_norm": 1.500851035118103, "learning_rate": 0.001, "loss": 1.9579, "step": 237944 }, { "epoch": 20.531400966183575, "grad_norm": 0.6042265295982361, "learning_rate": 0.001, "loss": 1.9487, "step": 238000 }, { "epoch": 20.536231884057973, "grad_norm": 0.9513376355171204, "learning_rate": 0.001, "loss": 1.9542, "step": 238056 }, { "epoch": 20.541062801932366, "grad_norm": 0.5707837343215942, "learning_rate": 0.001, "loss": 1.9597, "step": 238112 }, { "epoch": 20.545893719806763, "grad_norm": 1.4689085483551025, "learning_rate": 0.001, "loss": 1.9625, "step": 238168 }, { "epoch": 20.55072463768116, "grad_norm": 0.47666293382644653, "learning_rate": 0.001, "loss": 1.9573, "step": 238224 }, { "epoch": 20.555555555555557, "grad_norm": 1.582378625869751, "learning_rate": 0.001, "loss": 1.9551, "step": 238280 }, { "epoch": 20.56038647342995, "grad_norm": 0.7956146597862244, "learning_rate": 0.001, "loss": 1.9573, "step": 238336 }, { "epoch": 20.565217391304348, "grad_norm": 3.0987062454223633, "learning_rate": 0.001, "loss": 1.9573, "step": 238392 }, { "epoch": 20.570048309178745, "grad_norm": 2.240318775177002, "learning_rate": 0.001, "loss": 1.9525, "step": 238448 }, { "epoch": 20.57487922705314, "grad_norm": 3.2296688556671143, "learning_rate": 0.001, "loss": 1.9641, "step": 238504 }, { "epoch": 20.579710144927535, "grad_norm": 12.198625564575195, "learning_rate": 0.001, "loss": 1.9738, "step": 238560 }, { "epoch": 20.584541062801932, "grad_norm": 1.043789267539978, "learning_rate": 0.001, "loss": 1.9538, "step": 238616 }, { "epoch": 20.58937198067633, "grad_norm": 1.2483795881271362, "learning_rate": 0.001, "loss": 1.9599, "step": 238672 }, { "epoch": 20.594202898550726, "grad_norm": 0.42853376269340515, "learning_rate": 0.001, "loss": 1.9625, "step": 238728 }, { "epoch": 20.59903381642512, "grad_norm": 1.8072736263275146, "learning_rate": 0.001, "loss": 1.9561, "step": 238784 }, { "epoch": 20.603864734299517, "grad_norm": 1.2119892835617065, "learning_rate": 0.001, "loss": 1.9563, "step": 238840 }, { "epoch": 20.608695652173914, "grad_norm": 0.8437949419021606, "learning_rate": 0.001, "loss": 1.9599, "step": 238896 }, { "epoch": 20.613526570048307, "grad_norm": 0.5938432216644287, "learning_rate": 0.001, "loss": 1.9531, "step": 238952 }, { "epoch": 20.618357487922705, "grad_norm": 0.5191885232925415, "learning_rate": 0.001, "loss": 1.9477, "step": 239008 }, { "epoch": 20.6231884057971, "grad_norm": 0.5827580690383911, "learning_rate": 0.001, "loss": 1.9444, "step": 239064 }, { "epoch": 20.6280193236715, "grad_norm": 0.575282871723175, "learning_rate": 0.001, "loss": 1.9457, "step": 239120 }, { "epoch": 20.632850241545892, "grad_norm": 1.3534448146820068, "learning_rate": 0.001, "loss": 1.9528, "step": 239176 }, { "epoch": 20.63768115942029, "grad_norm": 0.29908227920532227, "learning_rate": 0.001, "loss": 1.9446, "step": 239232 }, { "epoch": 20.642512077294686, "grad_norm": 0.4297437369823456, "learning_rate": 0.001, "loss": 1.9578, "step": 239288 }, { "epoch": 20.647342995169083, "grad_norm": 9.909271240234375, "learning_rate": 0.001, "loss": 1.9516, "step": 239344 }, { "epoch": 20.652173913043477, "grad_norm": 2.4474244117736816, "learning_rate": 0.001, "loss": 1.9469, "step": 239400 }, { "epoch": 20.657004830917874, "grad_norm": 1.4169622659683228, "learning_rate": 0.001, "loss": 1.9467, "step": 239456 }, { "epoch": 20.66183574879227, "grad_norm": 0.5401244759559631, "learning_rate": 0.001, "loss": 1.9458, "step": 239512 }, { "epoch": 20.666666666666668, "grad_norm": 0.5860778093338013, "learning_rate": 0.001, "loss": 1.9592, "step": 239568 }, { "epoch": 20.67149758454106, "grad_norm": 0.3608378767967224, "learning_rate": 0.001, "loss": 1.961, "step": 239624 }, { "epoch": 20.67632850241546, "grad_norm": 1.0052027702331543, "learning_rate": 0.001, "loss": 1.9602, "step": 239680 }, { "epoch": 20.681159420289855, "grad_norm": 0.9896047711372375, "learning_rate": 0.001, "loss": 1.9541, "step": 239736 }, { "epoch": 20.685990338164252, "grad_norm": 0.6349100470542908, "learning_rate": 0.001, "loss": 1.9628, "step": 239792 }, { "epoch": 20.690821256038646, "grad_norm": 0.674525260925293, "learning_rate": 0.001, "loss": 1.9598, "step": 239848 }, { "epoch": 20.695652173913043, "grad_norm": 0.47772717475891113, "learning_rate": 0.001, "loss": 1.9625, "step": 239904 }, { "epoch": 20.70048309178744, "grad_norm": 1.067716360092163, "learning_rate": 0.001, "loss": 1.9568, "step": 239960 }, { "epoch": 20.705314009661837, "grad_norm": 0.8412366509437561, "learning_rate": 0.001, "loss": 1.9475, "step": 240016 }, { "epoch": 20.71014492753623, "grad_norm": 2.141857385635376, "learning_rate": 0.001, "loss": 1.947, "step": 240072 }, { "epoch": 20.714975845410628, "grad_norm": 0.4832453429698944, "learning_rate": 0.001, "loss": 1.9451, "step": 240128 }, { "epoch": 20.719806763285025, "grad_norm": 0.8958045840263367, "learning_rate": 0.001, "loss": 1.934, "step": 240184 }, { "epoch": 20.72463768115942, "grad_norm": 0.7899490594863892, "learning_rate": 0.001, "loss": 1.9443, "step": 240240 }, { "epoch": 20.729468599033815, "grad_norm": 0.6653137803077698, "learning_rate": 0.001, "loss": 1.967, "step": 240296 }, { "epoch": 20.734299516908212, "grad_norm": 0.5967240929603577, "learning_rate": 0.001, "loss": 1.9642, "step": 240352 }, { "epoch": 20.73913043478261, "grad_norm": 0.7364872097969055, "learning_rate": 0.001, "loss": 1.9585, "step": 240408 }, { "epoch": 20.743961352657006, "grad_norm": 0.5708608031272888, "learning_rate": 0.001, "loss": 1.9609, "step": 240464 }, { "epoch": 20.7487922705314, "grad_norm": 0.9739353656768799, "learning_rate": 0.001, "loss": 1.9587, "step": 240520 }, { "epoch": 20.753623188405797, "grad_norm": 0.5648186802864075, "learning_rate": 0.001, "loss": 1.9648, "step": 240576 }, { "epoch": 20.758454106280194, "grad_norm": 0.45446285605430603, "learning_rate": 0.001, "loss": 1.9644, "step": 240632 }, { "epoch": 20.76328502415459, "grad_norm": 0.7312005162239075, "learning_rate": 0.001, "loss": 1.9563, "step": 240688 }, { "epoch": 20.768115942028984, "grad_norm": 0.9654141664505005, "learning_rate": 0.001, "loss": 1.9631, "step": 240744 }, { "epoch": 20.77294685990338, "grad_norm": 0.8596492409706116, "learning_rate": 0.001, "loss": 1.9565, "step": 240800 }, { "epoch": 20.77777777777778, "grad_norm": 1.0629878044128418, "learning_rate": 0.001, "loss": 1.9608, "step": 240856 }, { "epoch": 20.782608695652176, "grad_norm": 0.4907311797142029, "learning_rate": 0.001, "loss": 1.9584, "step": 240912 }, { "epoch": 20.78743961352657, "grad_norm": 3.250532627105713, "learning_rate": 0.001, "loss": 1.949, "step": 240968 }, { "epoch": 20.792270531400966, "grad_norm": 4.6710309982299805, "learning_rate": 0.001, "loss": 1.9441, "step": 241024 }, { "epoch": 20.797101449275363, "grad_norm": 1.580987572669983, "learning_rate": 0.001, "loss": 1.9583, "step": 241080 }, { "epoch": 20.80193236714976, "grad_norm": 2.200958490371704, "learning_rate": 0.001, "loss": 1.9699, "step": 241136 }, { "epoch": 20.806763285024154, "grad_norm": 1.9953570365905762, "learning_rate": 0.001, "loss": 1.9696, "step": 241192 }, { "epoch": 20.81159420289855, "grad_norm": 0.43790823221206665, "learning_rate": 0.001, "loss": 1.9647, "step": 241248 }, { "epoch": 20.816425120772948, "grad_norm": 0.3859184980392456, "learning_rate": 0.001, "loss": 1.9552, "step": 241304 }, { "epoch": 20.82125603864734, "grad_norm": 0.6125442981719971, "learning_rate": 0.001, "loss": 1.9596, "step": 241360 }, { "epoch": 20.82608695652174, "grad_norm": 1.0904134511947632, "learning_rate": 0.001, "loss": 1.9697, "step": 241416 }, { "epoch": 20.830917874396135, "grad_norm": 1.3848557472229004, "learning_rate": 0.001, "loss": 1.9709, "step": 241472 }, { "epoch": 20.835748792270532, "grad_norm": 1.8763023614883423, "learning_rate": 0.001, "loss": 1.9597, "step": 241528 }, { "epoch": 20.840579710144926, "grad_norm": 1.2586201429367065, "learning_rate": 0.001, "loss": 1.9677, "step": 241584 }, { "epoch": 20.845410628019323, "grad_norm": 1.1125701665878296, "learning_rate": 0.001, "loss": 1.9712, "step": 241640 }, { "epoch": 20.85024154589372, "grad_norm": 1.9199594259262085, "learning_rate": 0.001, "loss": 1.9586, "step": 241696 }, { "epoch": 20.855072463768117, "grad_norm": 1.488676905632019, "learning_rate": 0.001, "loss": 1.9612, "step": 241752 }, { "epoch": 20.85990338164251, "grad_norm": 0.9288828372955322, "learning_rate": 0.001, "loss": 1.9496, "step": 241808 }, { "epoch": 20.864734299516908, "grad_norm": 1.688301682472229, "learning_rate": 0.001, "loss": 1.9491, "step": 241864 }, { "epoch": 20.869565217391305, "grad_norm": 0.3567260801792145, "learning_rate": 0.001, "loss": 1.9582, "step": 241920 }, { "epoch": 20.8743961352657, "grad_norm": 1.4549705982208252, "learning_rate": 0.001, "loss": 1.9629, "step": 241976 }, { "epoch": 20.879227053140095, "grad_norm": 1.4151066541671753, "learning_rate": 0.001, "loss": 1.9586, "step": 242032 }, { "epoch": 20.884057971014492, "grad_norm": 0.5660560727119446, "learning_rate": 0.001, "loss": 1.9616, "step": 242088 }, { "epoch": 20.88888888888889, "grad_norm": 1.82489013671875, "learning_rate": 0.001, "loss": 1.9505, "step": 242144 }, { "epoch": 20.893719806763286, "grad_norm": 4.623912334442139, "learning_rate": 0.001, "loss": 1.9553, "step": 242200 }, { "epoch": 20.89855072463768, "grad_norm": 0.346300333738327, "learning_rate": 0.001, "loss": 1.9493, "step": 242256 }, { "epoch": 20.903381642512077, "grad_norm": 0.7645660638809204, "learning_rate": 0.001, "loss": 1.9508, "step": 242312 }, { "epoch": 20.908212560386474, "grad_norm": 0.4117395281791687, "learning_rate": 0.001, "loss": 1.9501, "step": 242368 }, { "epoch": 20.91304347826087, "grad_norm": 1.3760991096496582, "learning_rate": 0.001, "loss": 1.9573, "step": 242424 }, { "epoch": 20.917874396135264, "grad_norm": 1.501476526260376, "learning_rate": 0.001, "loss": 1.985, "step": 242480 }, { "epoch": 20.92270531400966, "grad_norm": 0.8489803075790405, "learning_rate": 0.001, "loss": 1.9632, "step": 242536 }, { "epoch": 20.92753623188406, "grad_norm": 0.6838896870613098, "learning_rate": 0.001, "loss": 1.9537, "step": 242592 }, { "epoch": 20.932367149758456, "grad_norm": 1.043967843055725, "learning_rate": 0.001, "loss": 1.9525, "step": 242648 }, { "epoch": 20.93719806763285, "grad_norm": 0.951227605342865, "learning_rate": 0.001, "loss": 1.9512, "step": 242704 }, { "epoch": 20.942028985507246, "grad_norm": 1.2392356395721436, "learning_rate": 0.001, "loss": 1.9531, "step": 242760 }, { "epoch": 20.946859903381643, "grad_norm": 1.1150872707366943, "learning_rate": 0.001, "loss": 1.9594, "step": 242816 }, { "epoch": 20.95169082125604, "grad_norm": 1.5052411556243896, "learning_rate": 0.001, "loss": 1.9732, "step": 242872 }, { "epoch": 20.956521739130434, "grad_norm": 1.1393407583236694, "learning_rate": 0.001, "loss": 1.9759, "step": 242928 }, { "epoch": 20.96135265700483, "grad_norm": 0.6628881692886353, "learning_rate": 0.001, "loss": 1.9674, "step": 242984 }, { "epoch": 20.966183574879228, "grad_norm": 0.44781407713890076, "learning_rate": 0.001, "loss": 1.972, "step": 243040 }, { "epoch": 20.971014492753625, "grad_norm": 2.7506120204925537, "learning_rate": 0.001, "loss": 1.9802, "step": 243096 }, { "epoch": 20.97584541062802, "grad_norm": 1.3479582071304321, "learning_rate": 0.001, "loss": 1.9959, "step": 243152 }, { "epoch": 20.980676328502415, "grad_norm": 1.218040108680725, "learning_rate": 0.001, "loss": 1.9829, "step": 243208 }, { "epoch": 20.985507246376812, "grad_norm": 0.7947763204574585, "learning_rate": 0.001, "loss": 1.9793, "step": 243264 }, { "epoch": 20.990338164251206, "grad_norm": 1.330209493637085, "learning_rate": 0.001, "loss": 1.9712, "step": 243320 }, { "epoch": 20.995169082125603, "grad_norm": 1.218634009361267, "learning_rate": 0.001, "loss": 1.9714, "step": 243376 }, { "epoch": 21.0, "grad_norm": 0.7437846064567566, "learning_rate": 0.001, "loss": 1.9746, "step": 243432 }, { "epoch": 21.004830917874397, "grad_norm": 1.6389663219451904, "learning_rate": 0.001, "loss": 1.9342, "step": 243488 }, { "epoch": 21.00966183574879, "grad_norm": 1.472157597541809, "learning_rate": 0.001, "loss": 1.9357, "step": 243544 }, { "epoch": 21.014492753623188, "grad_norm": 0.6316161751747131, "learning_rate": 0.001, "loss": 1.9418, "step": 243600 }, { "epoch": 21.019323671497585, "grad_norm": 0.4211271107196808, "learning_rate": 0.001, "loss": 1.9396, "step": 243656 }, { "epoch": 21.02415458937198, "grad_norm": 1.4315630197525024, "learning_rate": 0.001, "loss": 1.9387, "step": 243712 }, { "epoch": 21.028985507246375, "grad_norm": 0.746213436126709, "learning_rate": 0.001, "loss": 1.9381, "step": 243768 }, { "epoch": 21.033816425120772, "grad_norm": 0.7639557123184204, "learning_rate": 0.001, "loss": 1.9325, "step": 243824 }, { "epoch": 21.03864734299517, "grad_norm": 4.488649845123291, "learning_rate": 0.001, "loss": 1.9366, "step": 243880 }, { "epoch": 21.043478260869566, "grad_norm": 0.8225687742233276, "learning_rate": 0.001, "loss": 1.9344, "step": 243936 }, { "epoch": 21.04830917874396, "grad_norm": 1.2400037050247192, "learning_rate": 0.001, "loss": 1.9347, "step": 243992 }, { "epoch": 21.053140096618357, "grad_norm": 1.25209641456604, "learning_rate": 0.001, "loss": 1.9313, "step": 244048 }, { "epoch": 21.057971014492754, "grad_norm": 1.6570799350738525, "learning_rate": 0.001, "loss": 1.9266, "step": 244104 }, { "epoch": 21.06280193236715, "grad_norm": 2.0417397022247314, "learning_rate": 0.001, "loss": 1.9246, "step": 244160 }, { "epoch": 21.067632850241544, "grad_norm": 0.47704049944877625, "learning_rate": 0.001, "loss": 1.9384, "step": 244216 }, { "epoch": 21.07246376811594, "grad_norm": 0.8360207676887512, "learning_rate": 0.001, "loss": 1.9381, "step": 244272 }, { "epoch": 21.07729468599034, "grad_norm": 0.41122159361839294, "learning_rate": 0.001, "loss": 1.937, "step": 244328 }, { "epoch": 21.082125603864736, "grad_norm": 0.5890462398529053, "learning_rate": 0.001, "loss": 1.9341, "step": 244384 }, { "epoch": 21.08695652173913, "grad_norm": 0.4236384630203247, "learning_rate": 0.001, "loss": 1.9342, "step": 244440 }, { "epoch": 21.091787439613526, "grad_norm": 0.4738198518753052, "learning_rate": 0.001, "loss": 1.9301, "step": 244496 }, { "epoch": 21.096618357487923, "grad_norm": 0.9215365052223206, "learning_rate": 0.001, "loss": 1.9248, "step": 244552 }, { "epoch": 21.10144927536232, "grad_norm": 0.7103099822998047, "learning_rate": 0.001, "loss": 1.9308, "step": 244608 }, { "epoch": 21.106280193236714, "grad_norm": 0.7142167687416077, "learning_rate": 0.001, "loss": 1.9262, "step": 244664 }, { "epoch": 21.11111111111111, "grad_norm": 0.5004721879959106, "learning_rate": 0.001, "loss": 1.9236, "step": 244720 }, { "epoch": 21.115942028985508, "grad_norm": 0.8549935817718506, "learning_rate": 0.001, "loss": 1.9362, "step": 244776 }, { "epoch": 21.120772946859905, "grad_norm": 0.7482916712760925, "learning_rate": 0.001, "loss": 1.9389, "step": 244832 }, { "epoch": 21.1256038647343, "grad_norm": 0.8914834856987, "learning_rate": 0.001, "loss": 1.9333, "step": 244888 }, { "epoch": 21.130434782608695, "grad_norm": 1.3610881567001343, "learning_rate": 0.001, "loss": 1.9311, "step": 244944 }, { "epoch": 21.135265700483092, "grad_norm": 2.021111011505127, "learning_rate": 0.001, "loss": 1.9482, "step": 245000 }, { "epoch": 21.14009661835749, "grad_norm": 1.15712571144104, "learning_rate": 0.001, "loss": 1.9554, "step": 245056 }, { "epoch": 21.144927536231883, "grad_norm": 1.3414568901062012, "learning_rate": 0.001, "loss": 1.9551, "step": 245112 }, { "epoch": 21.14975845410628, "grad_norm": 1.4819480180740356, "learning_rate": 0.001, "loss": 1.95, "step": 245168 }, { "epoch": 21.154589371980677, "grad_norm": 3.9842798709869385, "learning_rate": 0.001, "loss": 1.9694, "step": 245224 }, { "epoch": 21.159420289855074, "grad_norm": 0.8567904233932495, "learning_rate": 0.001, "loss": 1.9619, "step": 245280 }, { "epoch": 21.164251207729468, "grad_norm": 1.3237590789794922, "learning_rate": 0.001, "loss": 1.9512, "step": 245336 }, { "epoch": 21.169082125603865, "grad_norm": 0.42718395590782166, "learning_rate": 0.001, "loss": 1.9642, "step": 245392 }, { "epoch": 21.17391304347826, "grad_norm": 0.46414899826049805, "learning_rate": 0.001, "loss": 1.956, "step": 245448 }, { "epoch": 21.17874396135266, "grad_norm": 0.48734915256500244, "learning_rate": 0.001, "loss": 1.9487, "step": 245504 }, { "epoch": 21.183574879227052, "grad_norm": 2.552711248397827, "learning_rate": 0.001, "loss": 1.9428, "step": 245560 }, { "epoch": 21.18840579710145, "grad_norm": 5.905545711517334, "learning_rate": 0.001, "loss": 1.9305, "step": 245616 }, { "epoch": 21.193236714975846, "grad_norm": 0.7736048102378845, "learning_rate": 0.001, "loss": 1.9291, "step": 245672 }, { "epoch": 21.19806763285024, "grad_norm": 0.5431773662567139, "learning_rate": 0.001, "loss": 1.9357, "step": 245728 }, { "epoch": 21.202898550724637, "grad_norm": 0.8890239000320435, "learning_rate": 0.001, "loss": 1.9248, "step": 245784 }, { "epoch": 21.207729468599034, "grad_norm": 0.8334510326385498, "learning_rate": 0.001, "loss": 1.9355, "step": 245840 }, { "epoch": 21.21256038647343, "grad_norm": 0.6951484680175781, "learning_rate": 0.001, "loss": 1.9376, "step": 245896 }, { "epoch": 21.217391304347824, "grad_norm": 0.33767789602279663, "learning_rate": 0.001, "loss": 1.9379, "step": 245952 }, { "epoch": 21.22222222222222, "grad_norm": 0.296726256608963, "learning_rate": 0.001, "loss": 1.9389, "step": 246008 }, { "epoch": 21.22705314009662, "grad_norm": 0.6288598775863647, "learning_rate": 0.001, "loss": 1.9383, "step": 246064 }, { "epoch": 21.231884057971016, "grad_norm": 3.858222246170044, "learning_rate": 0.001, "loss": 1.931, "step": 246120 }, { "epoch": 21.23671497584541, "grad_norm": 9.175567626953125, "learning_rate": 0.001, "loss": 1.9343, "step": 246176 }, { "epoch": 21.241545893719806, "grad_norm": 0.35068923234939575, "learning_rate": 0.001, "loss": 1.9267, "step": 246232 }, { "epoch": 21.246376811594203, "grad_norm": 0.40852609276771545, "learning_rate": 0.001, "loss": 1.9379, "step": 246288 }, { "epoch": 21.2512077294686, "grad_norm": 0.8824296593666077, "learning_rate": 0.001, "loss": 1.9357, "step": 246344 }, { "epoch": 21.256038647342994, "grad_norm": 0.507612407207489, "learning_rate": 0.001, "loss": 1.9396, "step": 246400 }, { "epoch": 21.26086956521739, "grad_norm": 0.4482591450214386, "learning_rate": 0.001, "loss": 1.9308, "step": 246456 }, { "epoch": 21.265700483091788, "grad_norm": 1.2142528295516968, "learning_rate": 0.001, "loss": 1.9399, "step": 246512 }, { "epoch": 21.270531400966185, "grad_norm": 0.3617037832736969, "learning_rate": 0.001, "loss": 1.9418, "step": 246568 }, { "epoch": 21.27536231884058, "grad_norm": 1.38692045211792, "learning_rate": 0.001, "loss": 1.9378, "step": 246624 }, { "epoch": 21.280193236714975, "grad_norm": 0.7056344747543335, "learning_rate": 0.001, "loss": 1.9331, "step": 246680 }, { "epoch": 21.285024154589372, "grad_norm": 0.641124963760376, "learning_rate": 0.001, "loss": 1.9351, "step": 246736 }, { "epoch": 21.28985507246377, "grad_norm": 0.4981977939605713, "learning_rate": 0.001, "loss": 1.9279, "step": 246792 }, { "epoch": 21.294685990338163, "grad_norm": 2.1799654960632324, "learning_rate": 0.001, "loss": 1.9383, "step": 246848 }, { "epoch": 21.29951690821256, "grad_norm": 0.5045608878135681, "learning_rate": 0.001, "loss": 1.9342, "step": 246904 }, { "epoch": 21.304347826086957, "grad_norm": 0.5069738030433655, "learning_rate": 0.001, "loss": 1.9353, "step": 246960 }, { "epoch": 21.309178743961354, "grad_norm": 2.1662988662719727, "learning_rate": 0.001, "loss": 1.9326, "step": 247016 }, { "epoch": 21.314009661835748, "grad_norm": 1.150823712348938, "learning_rate": 0.001, "loss": 1.9466, "step": 247072 }, { "epoch": 21.318840579710145, "grad_norm": 0.6821958422660828, "learning_rate": 0.001, "loss": 1.9352, "step": 247128 }, { "epoch": 21.32367149758454, "grad_norm": 0.4901876747608185, "learning_rate": 0.001, "loss": 1.9294, "step": 247184 }, { "epoch": 21.32850241545894, "grad_norm": 0.48851287364959717, "learning_rate": 0.001, "loss": 1.935, "step": 247240 }, { "epoch": 21.333333333333332, "grad_norm": 1.0849577188491821, "learning_rate": 0.001, "loss": 1.9513, "step": 247296 }, { "epoch": 21.33816425120773, "grad_norm": 1.6245534420013428, "learning_rate": 0.001, "loss": 1.9428, "step": 247352 }, { "epoch": 21.342995169082126, "grad_norm": 1.0170176029205322, "learning_rate": 0.001, "loss": 1.9521, "step": 247408 }, { "epoch": 21.347826086956523, "grad_norm": 0.8316085934638977, "learning_rate": 0.001, "loss": 1.9505, "step": 247464 }, { "epoch": 21.352657004830917, "grad_norm": 0.5427383780479431, "learning_rate": 0.001, "loss": 1.9648, "step": 247520 }, { "epoch": 21.357487922705314, "grad_norm": 0.48038238286972046, "learning_rate": 0.001, "loss": 1.9459, "step": 247576 }, { "epoch": 21.36231884057971, "grad_norm": 0.5514872074127197, "learning_rate": 0.001, "loss": 1.9428, "step": 247632 }, { "epoch": 21.367149758454108, "grad_norm": 0.5876020193099976, "learning_rate": 0.001, "loss": 1.9514, "step": 247688 }, { "epoch": 21.3719806763285, "grad_norm": 0.559477686882019, "learning_rate": 0.001, "loss": 1.9504, "step": 247744 }, { "epoch": 21.3768115942029, "grad_norm": 0.9388629198074341, "learning_rate": 0.001, "loss": 1.9457, "step": 247800 }, { "epoch": 21.381642512077295, "grad_norm": 0.5193232893943787, "learning_rate": 0.001, "loss": 1.9406, "step": 247856 }, { "epoch": 21.386473429951693, "grad_norm": 0.466985821723938, "learning_rate": 0.001, "loss": 1.9601, "step": 247912 }, { "epoch": 21.391304347826086, "grad_norm": 0.5548844933509827, "learning_rate": 0.001, "loss": 1.9647, "step": 247968 }, { "epoch": 21.396135265700483, "grad_norm": 0.7892430424690247, "learning_rate": 0.001, "loss": 1.9664, "step": 248024 }, { "epoch": 21.40096618357488, "grad_norm": 2.712905168533325, "learning_rate": 0.001, "loss": 1.9643, "step": 248080 }, { "epoch": 21.405797101449274, "grad_norm": 1.1068408489227295, "learning_rate": 0.001, "loss": 1.96, "step": 248136 }, { "epoch": 21.41062801932367, "grad_norm": 1.3370461463928223, "learning_rate": 0.001, "loss": 1.9575, "step": 248192 }, { "epoch": 21.415458937198068, "grad_norm": 1.0986648797988892, "learning_rate": 0.001, "loss": 1.9567, "step": 248248 }, { "epoch": 21.420289855072465, "grad_norm": 0.8226195573806763, "learning_rate": 0.001, "loss": 1.9541, "step": 248304 }, { "epoch": 21.42512077294686, "grad_norm": 0.7474024891853333, "learning_rate": 0.001, "loss": 1.9416, "step": 248360 }, { "epoch": 21.429951690821255, "grad_norm": 0.8005509376525879, "learning_rate": 0.001, "loss": 1.9543, "step": 248416 }, { "epoch": 21.434782608695652, "grad_norm": 0.5598627328872681, "learning_rate": 0.001, "loss": 1.9454, "step": 248472 }, { "epoch": 21.43961352657005, "grad_norm": 0.8114826679229736, "learning_rate": 0.001, "loss": 1.9524, "step": 248528 }, { "epoch": 21.444444444444443, "grad_norm": 0.8691081404685974, "learning_rate": 0.001, "loss": 1.948, "step": 248584 }, { "epoch": 21.44927536231884, "grad_norm": 6.244614601135254, "learning_rate": 0.001, "loss": 1.9491, "step": 248640 }, { "epoch": 21.454106280193237, "grad_norm": 0.8425182104110718, "learning_rate": 0.001, "loss": 1.9522, "step": 248696 }, { "epoch": 21.458937198067634, "grad_norm": 1.651371717453003, "learning_rate": 0.001, "loss": 1.9539, "step": 248752 }, { "epoch": 21.463768115942027, "grad_norm": 2.101410388946533, "learning_rate": 0.001, "loss": 1.9477, "step": 248808 }, { "epoch": 21.468599033816425, "grad_norm": 0.5928142070770264, "learning_rate": 0.001, "loss": 1.9579, "step": 248864 }, { "epoch": 21.47342995169082, "grad_norm": 0.8492199778556824, "learning_rate": 0.001, "loss": 1.9586, "step": 248920 }, { "epoch": 21.47826086956522, "grad_norm": 0.7929604053497314, "learning_rate": 0.001, "loss": 1.9696, "step": 248976 }, { "epoch": 21.483091787439612, "grad_norm": 0.4867192804813385, "learning_rate": 0.001, "loss": 1.9625, "step": 249032 }, { "epoch": 21.48792270531401, "grad_norm": 0.45051974058151245, "learning_rate": 0.001, "loss": 1.957, "step": 249088 }, { "epoch": 21.492753623188406, "grad_norm": 5.42129373550415, "learning_rate": 0.001, "loss": 1.9661, "step": 249144 }, { "epoch": 21.497584541062803, "grad_norm": 0.7890036702156067, "learning_rate": 0.001, "loss": 1.9513, "step": 249200 }, { "epoch": 21.502415458937197, "grad_norm": 1.4707787036895752, "learning_rate": 0.001, "loss": 1.9629, "step": 249256 }, { "epoch": 21.507246376811594, "grad_norm": 1.2665071487426758, "learning_rate": 0.001, "loss": 1.9633, "step": 249312 }, { "epoch": 21.51207729468599, "grad_norm": 0.975257158279419, "learning_rate": 0.001, "loss": 1.9693, "step": 249368 }, { "epoch": 21.516908212560388, "grad_norm": 0.6110181212425232, "learning_rate": 0.001, "loss": 1.9626, "step": 249424 }, { "epoch": 21.52173913043478, "grad_norm": 1.4485918283462524, "learning_rate": 0.001, "loss": 1.964, "step": 249480 }, { "epoch": 21.52657004830918, "grad_norm": 1.9796454906463623, "learning_rate": 0.001, "loss": 1.97, "step": 249536 }, { "epoch": 21.531400966183575, "grad_norm": 0.555401086807251, "learning_rate": 0.001, "loss": 1.9702, "step": 249592 }, { "epoch": 21.536231884057973, "grad_norm": 1.062963604927063, "learning_rate": 0.001, "loss": 1.9654, "step": 249648 }, { "epoch": 21.541062801932366, "grad_norm": 0.538298487663269, "learning_rate": 0.001, "loss": 1.9558, "step": 249704 }, { "epoch": 21.545893719806763, "grad_norm": 0.4900556802749634, "learning_rate": 0.001, "loss": 1.9608, "step": 249760 }, { "epoch": 21.55072463768116, "grad_norm": 0.5375021696090698, "learning_rate": 0.001, "loss": 1.9531, "step": 249816 }, { "epoch": 21.555555555555557, "grad_norm": 3.2594358921051025, "learning_rate": 0.001, "loss": 1.9561, "step": 249872 }, { "epoch": 21.56038647342995, "grad_norm": 1.2410610914230347, "learning_rate": 0.001, "loss": 1.966, "step": 249928 }, { "epoch": 21.565217391304348, "grad_norm": 1.7509667873382568, "learning_rate": 0.001, "loss": 1.9605, "step": 249984 }, { "epoch": 21.570048309178745, "grad_norm": 0.5602964162826538, "learning_rate": 0.001, "loss": 1.9669, "step": 250040 }, { "epoch": 21.57487922705314, "grad_norm": 0.9668024182319641, "learning_rate": 0.001, "loss": 1.9668, "step": 250096 }, { "epoch": 21.579710144927535, "grad_norm": 1.0046262741088867, "learning_rate": 0.001, "loss": 1.964, "step": 250152 }, { "epoch": 21.584541062801932, "grad_norm": 0.8888593912124634, "learning_rate": 0.001, "loss": 1.9575, "step": 250208 }, { "epoch": 21.58937198067633, "grad_norm": 0.644279956817627, "learning_rate": 0.001, "loss": 1.9601, "step": 250264 }, { "epoch": 21.594202898550726, "grad_norm": 0.8787930011749268, "learning_rate": 0.001, "loss": 1.9654, "step": 250320 }, { "epoch": 21.59903381642512, "grad_norm": 6.46877384185791, "learning_rate": 0.001, "loss": 1.9708, "step": 250376 }, { "epoch": 21.603864734299517, "grad_norm": 8.018998146057129, "learning_rate": 0.001, "loss": 1.958, "step": 250432 }, { "epoch": 21.608695652173914, "grad_norm": 0.4972819983959198, "learning_rate": 0.001, "loss": 1.9457, "step": 250488 }, { "epoch": 21.613526570048307, "grad_norm": 0.5577712059020996, "learning_rate": 0.001, "loss": 1.9596, "step": 250544 }, { "epoch": 21.618357487922705, "grad_norm": 0.8427213430404663, "learning_rate": 0.001, "loss": 1.955, "step": 250600 }, { "epoch": 21.6231884057971, "grad_norm": 0.662353515625, "learning_rate": 0.001, "loss": 1.9601, "step": 250656 }, { "epoch": 21.6280193236715, "grad_norm": 0.8701677322387695, "learning_rate": 0.001, "loss": 1.9579, "step": 250712 }, { "epoch": 21.632850241545892, "grad_norm": 0.6668132543563843, "learning_rate": 0.001, "loss": 1.9521, "step": 250768 }, { "epoch": 21.63768115942029, "grad_norm": 0.5875878930091858, "learning_rate": 0.001, "loss": 1.9385, "step": 250824 }, { "epoch": 21.642512077294686, "grad_norm": 1.148645281791687, "learning_rate": 0.001, "loss": 1.9497, "step": 250880 }, { "epoch": 21.647342995169083, "grad_norm": 0.7564665079116821, "learning_rate": 0.001, "loss": 1.96, "step": 250936 }, { "epoch": 21.652173913043477, "grad_norm": 2.311861276626587, "learning_rate": 0.001, "loss": 1.9512, "step": 250992 }, { "epoch": 21.657004830917874, "grad_norm": 1.6183760166168213, "learning_rate": 0.001, "loss": 1.9551, "step": 251048 }, { "epoch": 21.66183574879227, "grad_norm": 0.9762577414512634, "learning_rate": 0.001, "loss": 1.9565, "step": 251104 }, { "epoch": 21.666666666666668, "grad_norm": 0.5730143189430237, "learning_rate": 0.001, "loss": 1.9685, "step": 251160 }, { "epoch": 21.67149758454106, "grad_norm": 0.5153380632400513, "learning_rate": 0.001, "loss": 1.9565, "step": 251216 }, { "epoch": 21.67632850241546, "grad_norm": 0.9978387355804443, "learning_rate": 0.001, "loss": 1.9589, "step": 251272 }, { "epoch": 21.681159420289855, "grad_norm": 1.0492725372314453, "learning_rate": 0.001, "loss": 1.9601, "step": 251328 }, { "epoch": 21.685990338164252, "grad_norm": 0.42663252353668213, "learning_rate": 0.001, "loss": 1.9624, "step": 251384 }, { "epoch": 21.690821256038646, "grad_norm": 0.9065369367599487, "learning_rate": 0.001, "loss": 1.9574, "step": 251440 }, { "epoch": 21.695652173913043, "grad_norm": 0.8139636516571045, "learning_rate": 0.001, "loss": 1.9693, "step": 251496 }, { "epoch": 21.70048309178744, "grad_norm": 0.4350810647010803, "learning_rate": 0.001, "loss": 1.9842, "step": 251552 }, { "epoch": 21.705314009661837, "grad_norm": 0.7480289340019226, "learning_rate": 0.001, "loss": 1.9733, "step": 251608 }, { "epoch": 21.71014492753623, "grad_norm": 0.6209117770195007, "learning_rate": 0.001, "loss": 1.9764, "step": 251664 }, { "epoch": 21.714975845410628, "grad_norm": 1.1292963027954102, "learning_rate": 0.001, "loss": 1.9683, "step": 251720 }, { "epoch": 21.719806763285025, "grad_norm": 0.5321400165557861, "learning_rate": 0.001, "loss": 1.9562, "step": 251776 }, { "epoch": 21.72463768115942, "grad_norm": 1.6908754110336304, "learning_rate": 0.001, "loss": 1.9479, "step": 251832 }, { "epoch": 21.729468599033815, "grad_norm": 4.08009672164917, "learning_rate": 0.001, "loss": 1.9612, "step": 251888 }, { "epoch": 21.734299516908212, "grad_norm": 0.5517629384994507, "learning_rate": 0.001, "loss": 1.9542, "step": 251944 }, { "epoch": 21.73913043478261, "grad_norm": 0.5395469069480896, "learning_rate": 0.001, "loss": 1.9501, "step": 252000 }, { "epoch": 21.743961352657006, "grad_norm": 8.448577880859375, "learning_rate": 0.001, "loss": 1.9488, "step": 252056 }, { "epoch": 21.7487922705314, "grad_norm": 0.4472997784614563, "learning_rate": 0.001, "loss": 1.9465, "step": 252112 }, { "epoch": 21.753623188405797, "grad_norm": 0.576563835144043, "learning_rate": 0.001, "loss": 1.9431, "step": 252168 }, { "epoch": 21.758454106280194, "grad_norm": 0.43651366233825684, "learning_rate": 0.001, "loss": 1.938, "step": 252224 }, { "epoch": 21.76328502415459, "grad_norm": 0.3002920150756836, "learning_rate": 0.001, "loss": 1.9533, "step": 252280 }, { "epoch": 21.768115942028984, "grad_norm": 0.4790380001068115, "learning_rate": 0.001, "loss": 1.9446, "step": 252336 }, { "epoch": 21.77294685990338, "grad_norm": 1.0764104127883911, "learning_rate": 0.001, "loss": 1.9644, "step": 252392 }, { "epoch": 21.77777777777778, "grad_norm": 0.5034885406494141, "learning_rate": 0.001, "loss": 1.9535, "step": 252448 }, { "epoch": 21.782608695652176, "grad_norm": 0.7224101424217224, "learning_rate": 0.001, "loss": 1.9495, "step": 252504 }, { "epoch": 21.78743961352657, "grad_norm": 1.0009119510650635, "learning_rate": 0.001, "loss": 1.947, "step": 252560 }, { "epoch": 21.792270531400966, "grad_norm": 0.9348419308662415, "learning_rate": 0.001, "loss": 1.9512, "step": 252616 }, { "epoch": 21.797101449275363, "grad_norm": 10.893620491027832, "learning_rate": 0.001, "loss": 1.9488, "step": 252672 }, { "epoch": 21.80193236714976, "grad_norm": 1.8768951892852783, "learning_rate": 0.001, "loss": 1.9554, "step": 252728 }, { "epoch": 21.806763285024154, "grad_norm": 0.6475788354873657, "learning_rate": 0.001, "loss": 1.9549, "step": 252784 }, { "epoch": 21.81159420289855, "grad_norm": 0.5225663781166077, "learning_rate": 0.001, "loss": 1.954, "step": 252840 }, { "epoch": 21.816425120772948, "grad_norm": 0.8639033436775208, "learning_rate": 0.001, "loss": 1.9581, "step": 252896 }, { "epoch": 21.82125603864734, "grad_norm": 0.6297056674957275, "learning_rate": 0.001, "loss": 1.9534, "step": 252952 }, { "epoch": 21.82608695652174, "grad_norm": 0.7101808786392212, "learning_rate": 0.001, "loss": 1.9752, "step": 253008 }, { "epoch": 21.830917874396135, "grad_norm": 0.9261828660964966, "learning_rate": 0.001, "loss": 1.9769, "step": 253064 }, { "epoch": 21.835748792270532, "grad_norm": 0.5968183279037476, "learning_rate": 0.001, "loss": 1.9633, "step": 253120 }, { "epoch": 21.840579710144926, "grad_norm": 0.34197914600372314, "learning_rate": 0.001, "loss": 1.9627, "step": 253176 }, { "epoch": 21.845410628019323, "grad_norm": 0.5174090266227722, "learning_rate": 0.001, "loss": 1.9664, "step": 253232 }, { "epoch": 21.85024154589372, "grad_norm": 0.4941200017929077, "learning_rate": 0.001, "loss": 1.962, "step": 253288 }, { "epoch": 21.855072463768117, "grad_norm": 0.7915153503417969, "learning_rate": 0.001, "loss": 1.9569, "step": 253344 }, { "epoch": 21.85990338164251, "grad_norm": 1.2240104675292969, "learning_rate": 0.001, "loss": 1.952, "step": 253400 }, { "epoch": 21.864734299516908, "grad_norm": 0.4947909414768219, "learning_rate": 0.001, "loss": 1.9553, "step": 253456 }, { "epoch": 21.869565217391305, "grad_norm": 1.1203914880752563, "learning_rate": 0.001, "loss": 1.9581, "step": 253512 }, { "epoch": 21.8743961352657, "grad_norm": 0.45310690999031067, "learning_rate": 0.001, "loss": 1.9558, "step": 253568 }, { "epoch": 21.879227053140095, "grad_norm": 0.7869296073913574, "learning_rate": 0.001, "loss": 1.9506, "step": 253624 }, { "epoch": 21.884057971014492, "grad_norm": 0.7560972571372986, "learning_rate": 0.001, "loss": 1.9458, "step": 253680 }, { "epoch": 21.88888888888889, "grad_norm": 0.4116303026676178, "learning_rate": 0.001, "loss": 1.9573, "step": 253736 }, { "epoch": 21.893719806763286, "grad_norm": 0.6125767230987549, "learning_rate": 0.001, "loss": 1.9526, "step": 253792 }, { "epoch": 21.89855072463768, "grad_norm": 0.7014297246932983, "learning_rate": 0.001, "loss": 1.9817, "step": 253848 }, { "epoch": 21.903381642512077, "grad_norm": 0.861863374710083, "learning_rate": 0.001, "loss": 1.9777, "step": 253904 }, { "epoch": 21.908212560386474, "grad_norm": 0.4163946509361267, "learning_rate": 0.001, "loss": 1.965, "step": 253960 }, { "epoch": 21.91304347826087, "grad_norm": 1.5239109992980957, "learning_rate": 0.001, "loss": 1.9494, "step": 254016 }, { "epoch": 21.917874396135264, "grad_norm": 0.559363067150116, "learning_rate": 0.001, "loss": 1.9656, "step": 254072 }, { "epoch": 21.92270531400966, "grad_norm": 2.498509407043457, "learning_rate": 0.001, "loss": 1.9598, "step": 254128 }, { "epoch": 21.92753623188406, "grad_norm": 0.9224415421485901, "learning_rate": 0.001, "loss": 1.9677, "step": 254184 }, { "epoch": 21.932367149758456, "grad_norm": 6.18450403213501, "learning_rate": 0.001, "loss": 2.0191, "step": 254240 }, { "epoch": 21.93719806763285, "grad_norm": 2.443880319595337, "learning_rate": 0.001, "loss": 1.9948, "step": 254296 }, { "epoch": 21.942028985507246, "grad_norm": 0.8769632577896118, "learning_rate": 0.001, "loss": 1.986, "step": 254352 }, { "epoch": 21.946859903381643, "grad_norm": 1.0776855945587158, "learning_rate": 0.001, "loss": 1.9765, "step": 254408 }, { "epoch": 21.95169082125604, "grad_norm": 1.7792994976043701, "learning_rate": 0.001, "loss": 1.9878, "step": 254464 }, { "epoch": 21.956521739130434, "grad_norm": 2.6442644596099854, "learning_rate": 0.001, "loss": 1.9823, "step": 254520 }, { "epoch": 21.96135265700483, "grad_norm": 1.9945226907730103, "learning_rate": 0.001, "loss": 1.99, "step": 254576 }, { "epoch": 21.966183574879228, "grad_norm": 0.5213829278945923, "learning_rate": 0.001, "loss": 2.0043, "step": 254632 }, { "epoch": 21.971014492753625, "grad_norm": 0.6495558023452759, "learning_rate": 0.001, "loss": 1.9943, "step": 254688 }, { "epoch": 21.97584541062802, "grad_norm": 3.5094666481018066, "learning_rate": 0.001, "loss": 1.9883, "step": 254744 }, { "epoch": 21.980676328502415, "grad_norm": 0.7619023323059082, "learning_rate": 0.001, "loss": 2.0011, "step": 254800 }, { "epoch": 21.985507246376812, "grad_norm": 0.5413810610771179, "learning_rate": 0.001, "loss": 1.997, "step": 254856 }, { "epoch": 21.990338164251206, "grad_norm": 0.38880395889282227, "learning_rate": 0.001, "loss": 1.9819, "step": 254912 }, { "epoch": 21.995169082125603, "grad_norm": 0.6126607656478882, "learning_rate": 0.001, "loss": 1.9962, "step": 254968 }, { "epoch": 22.0, "grad_norm": 0.2876018285751343, "learning_rate": 0.001, "loss": 1.9782, "step": 255024 }, { "epoch": 22.004830917874397, "grad_norm": 1.6907565593719482, "learning_rate": 0.001, "loss": 1.9427, "step": 255080 }, { "epoch": 22.00966183574879, "grad_norm": 2.5868358612060547, "learning_rate": 0.001, "loss": 1.9524, "step": 255136 }, { "epoch": 22.014492753623188, "grad_norm": 0.373887836933136, "learning_rate": 0.001, "loss": 1.9462, "step": 255192 }, { "epoch": 22.019323671497585, "grad_norm": 0.6144969463348389, "learning_rate": 0.001, "loss": 1.9499, "step": 255248 }, { "epoch": 22.02415458937198, "grad_norm": 2.028465986251831, "learning_rate": 0.001, "loss": 1.9426, "step": 255304 }, { "epoch": 22.028985507246375, "grad_norm": 0.5481564998626709, "learning_rate": 0.001, "loss": 1.9414, "step": 255360 }, { "epoch": 22.033816425120772, "grad_norm": 0.5983251929283142, "learning_rate": 0.001, "loss": 1.9317, "step": 255416 }, { "epoch": 22.03864734299517, "grad_norm": 2.5769171714782715, "learning_rate": 0.001, "loss": 1.9324, "step": 255472 }, { "epoch": 22.043478260869566, "grad_norm": 1.0366284847259521, "learning_rate": 0.001, "loss": 1.9367, "step": 255528 }, { "epoch": 22.04830917874396, "grad_norm": 1.253514051437378, "learning_rate": 0.001, "loss": 1.9357, "step": 255584 }, { "epoch": 22.053140096618357, "grad_norm": 2.0568759441375732, "learning_rate": 0.001, "loss": 1.9293, "step": 255640 }, { "epoch": 22.057971014492754, "grad_norm": 0.457313597202301, "learning_rate": 0.001, "loss": 1.9292, "step": 255696 }, { "epoch": 22.06280193236715, "grad_norm": 1.003413200378418, "learning_rate": 0.001, "loss": 1.9358, "step": 255752 }, { "epoch": 22.067632850241544, "grad_norm": 1.9449431896209717, "learning_rate": 0.001, "loss": 1.9521, "step": 255808 }, { "epoch": 22.07246376811594, "grad_norm": 2.135148525238037, "learning_rate": 0.001, "loss": 1.944, "step": 255864 }, { "epoch": 22.07729468599034, "grad_norm": 1.2061859369277954, "learning_rate": 0.001, "loss": 1.9501, "step": 255920 }, { "epoch": 22.082125603864736, "grad_norm": 0.8198530673980713, "learning_rate": 0.001, "loss": 1.9509, "step": 255976 }, { "epoch": 22.08695652173913, "grad_norm": 1.3472731113433838, "learning_rate": 0.001, "loss": 1.9412, "step": 256032 }, { "epoch": 22.091787439613526, "grad_norm": 1.099510908126831, "learning_rate": 0.001, "loss": 1.9385, "step": 256088 }, { "epoch": 22.096618357487923, "grad_norm": 0.6818681955337524, "learning_rate": 0.001, "loss": 1.9387, "step": 256144 }, { "epoch": 22.10144927536232, "grad_norm": 1.3436921834945679, "learning_rate": 0.001, "loss": 1.9472, "step": 256200 }, { "epoch": 22.106280193236714, "grad_norm": 0.5090356469154358, "learning_rate": 0.001, "loss": 1.9653, "step": 256256 }, { "epoch": 22.11111111111111, "grad_norm": 0.6917729377746582, "learning_rate": 0.001, "loss": 1.9651, "step": 256312 }, { "epoch": 22.115942028985508, "grad_norm": 0.6725999712944031, "learning_rate": 0.001, "loss": 1.954, "step": 256368 }, { "epoch": 22.120772946859905, "grad_norm": 2.0450620651245117, "learning_rate": 0.001, "loss": 1.9479, "step": 256424 }, { "epoch": 22.1256038647343, "grad_norm": 1.9453144073486328, "learning_rate": 0.001, "loss": 1.9493, "step": 256480 }, { "epoch": 22.130434782608695, "grad_norm": 1.6416852474212646, "learning_rate": 0.001, "loss": 1.9627, "step": 256536 }, { "epoch": 22.135265700483092, "grad_norm": 3.5912067890167236, "learning_rate": 0.001, "loss": 1.973, "step": 256592 }, { "epoch": 22.14009661835749, "grad_norm": 0.9328228831291199, "learning_rate": 0.001, "loss": 1.9733, "step": 256648 }, { "epoch": 22.144927536231883, "grad_norm": 1.6226860284805298, "learning_rate": 0.001, "loss": 1.9663, "step": 256704 }, { "epoch": 22.14975845410628, "grad_norm": 9.201728820800781, "learning_rate": 0.001, "loss": 1.9643, "step": 256760 }, { "epoch": 22.154589371980677, "grad_norm": 0.9437302947044373, "learning_rate": 0.001, "loss": 1.9568, "step": 256816 }, { "epoch": 22.159420289855074, "grad_norm": 2.7350730895996094, "learning_rate": 0.001, "loss": 1.9589, "step": 256872 }, { "epoch": 22.164251207729468, "grad_norm": 3.4525701999664307, "learning_rate": 0.001, "loss": 1.95, "step": 256928 }, { "epoch": 22.169082125603865, "grad_norm": 0.9513868689537048, "learning_rate": 0.001, "loss": 1.9371, "step": 256984 }, { "epoch": 22.17391304347826, "grad_norm": 1.0627455711364746, "learning_rate": 0.001, "loss": 1.9447, "step": 257040 }, { "epoch": 22.17874396135266, "grad_norm": 2.0969691276550293, "learning_rate": 0.001, "loss": 1.9483, "step": 257096 }, { "epoch": 22.183574879227052, "grad_norm": 3.5297787189483643, "learning_rate": 0.001, "loss": 1.9683, "step": 257152 }, { "epoch": 22.18840579710145, "grad_norm": 2.0619828701019287, "learning_rate": 0.001, "loss": 1.9726, "step": 257208 }, { "epoch": 22.193236714975846, "grad_norm": 1.5788389444351196, "learning_rate": 0.001, "loss": 1.9595, "step": 257264 }, { "epoch": 22.19806763285024, "grad_norm": 2.6914665699005127, "learning_rate": 0.001, "loss": 1.9776, "step": 257320 }, { "epoch": 22.202898550724637, "grad_norm": 3.4024970531463623, "learning_rate": 0.001, "loss": 1.976, "step": 257376 }, { "epoch": 22.207729468599034, "grad_norm": 1.668730616569519, "learning_rate": 0.001, "loss": 1.9785, "step": 257432 }, { "epoch": 22.21256038647343, "grad_norm": 1.8232885599136353, "learning_rate": 0.001, "loss": 1.974, "step": 257488 }, { "epoch": 22.217391304347824, "grad_norm": 1.1327075958251953, "learning_rate": 0.001, "loss": 1.9736, "step": 257544 }, { "epoch": 22.22222222222222, "grad_norm": 1.664980173110962, "learning_rate": 0.001, "loss": 1.9579, "step": 257600 }, { "epoch": 22.22705314009662, "grad_norm": 2.429081678390503, "learning_rate": 0.001, "loss": 1.9636, "step": 257656 }, { "epoch": 22.231884057971016, "grad_norm": 2.0256426334381104, "learning_rate": 0.001, "loss": 1.9682, "step": 257712 }, { "epoch": 22.23671497584541, "grad_norm": 1.3792738914489746, "learning_rate": 0.001, "loss": 1.9635, "step": 257768 }, { "epoch": 22.241545893719806, "grad_norm": 0.6907938122749329, "learning_rate": 0.001, "loss": 1.9657, "step": 257824 }, { "epoch": 22.246376811594203, "grad_norm": 2.450981855392456, "learning_rate": 0.001, "loss": 1.9755, "step": 257880 }, { "epoch": 22.2512077294686, "grad_norm": 0.929191529750824, "learning_rate": 0.001, "loss": 1.9717, "step": 257936 }, { "epoch": 22.256038647342994, "grad_norm": 0.8927347660064697, "learning_rate": 0.001, "loss": 1.9687, "step": 257992 }, { "epoch": 22.26086956521739, "grad_norm": 1.0469293594360352, "learning_rate": 0.001, "loss": 1.9681, "step": 258048 }, { "epoch": 22.265700483091788, "grad_norm": 1.4159148931503296, "learning_rate": 0.001, "loss": 1.9693, "step": 258104 }, { "epoch": 22.270531400966185, "grad_norm": 2.8150036334991455, "learning_rate": 0.001, "loss": 1.9659, "step": 258160 }, { "epoch": 22.27536231884058, "grad_norm": 1.4112862348556519, "learning_rate": 0.001, "loss": 1.9685, "step": 258216 }, { "epoch": 22.280193236714975, "grad_norm": 1.6838390827178955, "learning_rate": 0.001, "loss": 1.9568, "step": 258272 }, { "epoch": 22.285024154589372, "grad_norm": 0.8449746370315552, "learning_rate": 0.001, "loss": 1.9663, "step": 258328 }, { "epoch": 22.28985507246377, "grad_norm": 1.397044062614441, "learning_rate": 0.001, "loss": 1.9595, "step": 258384 }, { "epoch": 22.294685990338163, "grad_norm": 0.6615034937858582, "learning_rate": 0.001, "loss": 1.9577, "step": 258440 }, { "epoch": 22.29951690821256, "grad_norm": 1.102160096168518, "learning_rate": 0.001, "loss": 1.9608, "step": 258496 }, { "epoch": 22.304347826086957, "grad_norm": 6.598901748657227, "learning_rate": 0.001, "loss": 1.971, "step": 258552 }, { "epoch": 22.309178743961354, "grad_norm": 0.7988231182098389, "learning_rate": 0.001, "loss": 1.973, "step": 258608 }, { "epoch": 22.314009661835748, "grad_norm": 1.2946897745132446, "learning_rate": 0.001, "loss": 1.9623, "step": 258664 }, { "epoch": 22.318840579710145, "grad_norm": 0.6789493560791016, "learning_rate": 0.001, "loss": 1.9583, "step": 258720 }, { "epoch": 22.32367149758454, "grad_norm": 1.1809608936309814, "learning_rate": 0.001, "loss": 1.9578, "step": 258776 }, { "epoch": 22.32850241545894, "grad_norm": 0.37459585070610046, "learning_rate": 0.001, "loss": 1.952, "step": 258832 }, { "epoch": 22.333333333333332, "grad_norm": 1.11725914478302, "learning_rate": 0.001, "loss": 1.957, "step": 258888 }, { "epoch": 22.33816425120773, "grad_norm": 1.155311942100525, "learning_rate": 0.001, "loss": 1.9567, "step": 258944 }, { "epoch": 22.342995169082126, "grad_norm": 0.4662001132965088, "learning_rate": 0.001, "loss": 1.9639, "step": 259000 }, { "epoch": 22.347826086956523, "grad_norm": 1.0789437294006348, "learning_rate": 0.001, "loss": 1.9549, "step": 259056 }, { "epoch": 22.352657004830917, "grad_norm": 0.4004863202571869, "learning_rate": 0.001, "loss": 1.9528, "step": 259112 }, { "epoch": 22.357487922705314, "grad_norm": 1.6848139762878418, "learning_rate": 0.001, "loss": 1.9478, "step": 259168 }, { "epoch": 22.36231884057971, "grad_norm": 0.5138862133026123, "learning_rate": 0.001, "loss": 1.943, "step": 259224 }, { "epoch": 22.367149758454108, "grad_norm": 0.48951512575149536, "learning_rate": 0.001, "loss": 1.9462, "step": 259280 }, { "epoch": 22.3719806763285, "grad_norm": 1.8375505208969116, "learning_rate": 0.001, "loss": 1.9468, "step": 259336 }, { "epoch": 22.3768115942029, "grad_norm": 3.8329737186431885, "learning_rate": 0.001, "loss": 1.9509, "step": 259392 }, { "epoch": 22.381642512077295, "grad_norm": 3.703517436981201, "learning_rate": 0.001, "loss": 1.9579, "step": 259448 }, { "epoch": 22.386473429951693, "grad_norm": 1.425474762916565, "learning_rate": 0.001, "loss": 1.9575, "step": 259504 }, { "epoch": 22.391304347826086, "grad_norm": 0.7898656129837036, "learning_rate": 0.001, "loss": 1.9562, "step": 259560 }, { "epoch": 22.396135265700483, "grad_norm": 0.5097714066505432, "learning_rate": 0.001, "loss": 1.9487, "step": 259616 }, { "epoch": 22.40096618357488, "grad_norm": 1.0579745769500732, "learning_rate": 0.001, "loss": 1.9608, "step": 259672 }, { "epoch": 22.405797101449274, "grad_norm": 2.4833602905273438, "learning_rate": 0.001, "loss": 1.9549, "step": 259728 }, { "epoch": 22.41062801932367, "grad_norm": 0.5513945817947388, "learning_rate": 0.001, "loss": 1.9462, "step": 259784 }, { "epoch": 22.415458937198068, "grad_norm": 0.6396842002868652, "learning_rate": 0.001, "loss": 1.9553, "step": 259840 }, { "epoch": 22.420289855072465, "grad_norm": 0.5756796002388, "learning_rate": 0.001, "loss": 1.9577, "step": 259896 }, { "epoch": 22.42512077294686, "grad_norm": 1.0407577753067017, "learning_rate": 0.001, "loss": 1.9584, "step": 259952 }, { "epoch": 22.429951690821255, "grad_norm": 0.8441076874732971, "learning_rate": 0.001, "loss": 1.9621, "step": 260008 }, { "epoch": 22.434782608695652, "grad_norm": 0.40860188007354736, "learning_rate": 0.001, "loss": 1.9587, "step": 260064 }, { "epoch": 22.43961352657005, "grad_norm": 1.200321912765503, "learning_rate": 0.001, "loss": 1.9453, "step": 260120 }, { "epoch": 22.444444444444443, "grad_norm": 1.1166417598724365, "learning_rate": 0.001, "loss": 1.9499, "step": 260176 }, { "epoch": 22.44927536231884, "grad_norm": 0.9163259863853455, "learning_rate": 0.001, "loss": 1.949, "step": 260232 }, { "epoch": 22.454106280193237, "grad_norm": 0.7720276713371277, "learning_rate": 0.001, "loss": 1.9493, "step": 260288 }, { "epoch": 22.458937198067634, "grad_norm": 1.336298942565918, "learning_rate": 0.001, "loss": 1.9481, "step": 260344 }, { "epoch": 22.463768115942027, "grad_norm": 1.046801209449768, "learning_rate": 0.001, "loss": 1.9402, "step": 260400 }, { "epoch": 22.468599033816425, "grad_norm": 0.36165645718574524, "learning_rate": 0.001, "loss": 1.9427, "step": 260456 }, { "epoch": 22.47342995169082, "grad_norm": 0.5344457626342773, "learning_rate": 0.001, "loss": 1.931, "step": 260512 }, { "epoch": 22.47826086956522, "grad_norm": 1.3417903184890747, "learning_rate": 0.001, "loss": 1.9395, "step": 260568 }, { "epoch": 22.483091787439612, "grad_norm": 2.8190696239471436, "learning_rate": 0.001, "loss": 1.9437, "step": 260624 }, { "epoch": 22.48792270531401, "grad_norm": 0.7186737656593323, "learning_rate": 0.001, "loss": 1.944, "step": 260680 }, { "epoch": 22.492753623188406, "grad_norm": 1.1330615282058716, "learning_rate": 0.001, "loss": 1.933, "step": 260736 }, { "epoch": 22.497584541062803, "grad_norm": 0.5884013772010803, "learning_rate": 0.001, "loss": 1.9414, "step": 260792 }, { "epoch": 22.502415458937197, "grad_norm": 1.282533884048462, "learning_rate": 0.001, "loss": 1.9434, "step": 260848 }, { "epoch": 22.507246376811594, "grad_norm": 0.92621248960495, "learning_rate": 0.001, "loss": 1.9455, "step": 260904 }, { "epoch": 22.51207729468599, "grad_norm": 0.4475570321083069, "learning_rate": 0.001, "loss": 1.9466, "step": 260960 }, { "epoch": 22.516908212560388, "grad_norm": 0.9518371224403381, "learning_rate": 0.001, "loss": 1.9419, "step": 261016 }, { "epoch": 22.52173913043478, "grad_norm": 0.4265706539154053, "learning_rate": 0.001, "loss": 1.9355, "step": 261072 }, { "epoch": 22.52657004830918, "grad_norm": 0.686202347278595, "learning_rate": 0.001, "loss": 1.9423, "step": 261128 }, { "epoch": 22.531400966183575, "grad_norm": 0.7299894690513611, "learning_rate": 0.001, "loss": 1.9279, "step": 261184 }, { "epoch": 22.536231884057973, "grad_norm": 2.3413619995117188, "learning_rate": 0.001, "loss": 1.9372, "step": 261240 }, { "epoch": 22.541062801932366, "grad_norm": 0.9178498983383179, "learning_rate": 0.001, "loss": 1.9369, "step": 261296 }, { "epoch": 22.545893719806763, "grad_norm": 0.8261169195175171, "learning_rate": 0.001, "loss": 1.9391, "step": 261352 }, { "epoch": 22.55072463768116, "grad_norm": 0.5099338293075562, "learning_rate": 0.001, "loss": 1.9392, "step": 261408 }, { "epoch": 22.555555555555557, "grad_norm": 0.6083644032478333, "learning_rate": 0.001, "loss": 1.9446, "step": 261464 }, { "epoch": 22.56038647342995, "grad_norm": 0.6935392022132874, "learning_rate": 0.001, "loss": 1.9526, "step": 261520 }, { "epoch": 22.565217391304348, "grad_norm": 1.1220214366912842, "learning_rate": 0.001, "loss": 1.947, "step": 261576 }, { "epoch": 22.570048309178745, "grad_norm": 0.49901720881462097, "learning_rate": 0.001, "loss": 1.9426, "step": 261632 }, { "epoch": 22.57487922705314, "grad_norm": 1.1224339008331299, "learning_rate": 0.001, "loss": 1.9494, "step": 261688 }, { "epoch": 22.579710144927535, "grad_norm": 1.5109951496124268, "learning_rate": 0.001, "loss": 1.9571, "step": 261744 }, { "epoch": 22.584541062801932, "grad_norm": 1.7177014350891113, "learning_rate": 0.001, "loss": 1.9471, "step": 261800 }, { "epoch": 22.58937198067633, "grad_norm": 0.31399255990982056, "learning_rate": 0.001, "loss": 1.9452, "step": 261856 }, { "epoch": 22.594202898550726, "grad_norm": 1.3104915618896484, "learning_rate": 0.001, "loss": 1.944, "step": 261912 }, { "epoch": 22.59903381642512, "grad_norm": 0.6545467972755432, "learning_rate": 0.001, "loss": 1.9369, "step": 261968 }, { "epoch": 22.603864734299517, "grad_norm": 0.49555808305740356, "learning_rate": 0.001, "loss": 1.9378, "step": 262024 }, { "epoch": 22.608695652173914, "grad_norm": 0.7857664227485657, "learning_rate": 0.001, "loss": 1.9496, "step": 262080 }, { "epoch": 22.613526570048307, "grad_norm": 0.35892102122306824, "learning_rate": 0.001, "loss": 1.951, "step": 262136 }, { "epoch": 22.618357487922705, "grad_norm": 0.6544508337974548, "learning_rate": 0.001, "loss": 1.9429, "step": 262192 }, { "epoch": 22.6231884057971, "grad_norm": 0.42215535044670105, "learning_rate": 0.001, "loss": 1.9331, "step": 262248 }, { "epoch": 22.6280193236715, "grad_norm": 1.4311772584915161, "learning_rate": 0.001, "loss": 1.9538, "step": 262304 }, { "epoch": 22.632850241545892, "grad_norm": 0.33909595012664795, "learning_rate": 0.001, "loss": 1.9479, "step": 262360 }, { "epoch": 22.63768115942029, "grad_norm": 0.6888494491577148, "learning_rate": 0.001, "loss": 1.9347, "step": 262416 }, { "epoch": 22.642512077294686, "grad_norm": 0.7251987457275391, "learning_rate": 0.001, "loss": 1.9536, "step": 262472 }, { "epoch": 22.647342995169083, "grad_norm": 0.98261958360672, "learning_rate": 0.001, "loss": 1.9581, "step": 262528 }, { "epoch": 22.652173913043477, "grad_norm": 0.4945251941680908, "learning_rate": 0.001, "loss": 1.9555, "step": 262584 }, { "epoch": 22.657004830917874, "grad_norm": 1.670192003250122, "learning_rate": 0.001, "loss": 1.9472, "step": 262640 }, { "epoch": 22.66183574879227, "grad_norm": 1.2051167488098145, "learning_rate": 0.001, "loss": 1.9592, "step": 262696 }, { "epoch": 22.666666666666668, "grad_norm": 2.4427123069763184, "learning_rate": 0.001, "loss": 1.9454, "step": 262752 }, { "epoch": 22.67149758454106, "grad_norm": 1.210001826286316, "learning_rate": 0.001, "loss": 1.9552, "step": 262808 }, { "epoch": 22.67632850241546, "grad_norm": 0.5402510166168213, "learning_rate": 0.001, "loss": 1.9516, "step": 262864 }, { "epoch": 22.681159420289855, "grad_norm": 1.3431830406188965, "learning_rate": 0.001, "loss": 1.9525, "step": 262920 }, { "epoch": 22.685990338164252, "grad_norm": 0.5810667872428894, "learning_rate": 0.001, "loss": 1.9515, "step": 262976 }, { "epoch": 22.690821256038646, "grad_norm": 2.991968870162964, "learning_rate": 0.001, "loss": 1.9561, "step": 263032 }, { "epoch": 22.695652173913043, "grad_norm": 2.019123077392578, "learning_rate": 0.001, "loss": 1.9539, "step": 263088 }, { "epoch": 22.70048309178744, "grad_norm": 4.0230865478515625, "learning_rate": 0.001, "loss": 1.9634, "step": 263144 }, { "epoch": 22.705314009661837, "grad_norm": 5.997570037841797, "learning_rate": 0.001, "loss": 1.9585, "step": 263200 }, { "epoch": 22.71014492753623, "grad_norm": 1.5734467506408691, "learning_rate": 0.001, "loss": 1.9598, "step": 263256 }, { "epoch": 22.714975845410628, "grad_norm": 1.5002771615982056, "learning_rate": 0.001, "loss": 1.955, "step": 263312 }, { "epoch": 22.719806763285025, "grad_norm": 1.072264313697815, "learning_rate": 0.001, "loss": 1.96, "step": 263368 }, { "epoch": 22.72463768115942, "grad_norm": 0.8158003687858582, "learning_rate": 0.001, "loss": 1.964, "step": 263424 }, { "epoch": 22.729468599033815, "grad_norm": 7.6841816902160645, "learning_rate": 0.001, "loss": 1.9657, "step": 263480 }, { "epoch": 22.734299516908212, "grad_norm": 0.35602399706840515, "learning_rate": 0.001, "loss": 1.9505, "step": 263536 }, { "epoch": 22.73913043478261, "grad_norm": 3.834961175918579, "learning_rate": 0.001, "loss": 1.9471, "step": 263592 }, { "epoch": 22.743961352657006, "grad_norm": 1.1503311395645142, "learning_rate": 0.001, "loss": 1.9422, "step": 263648 }, { "epoch": 22.7487922705314, "grad_norm": 2.0164794921875, "learning_rate": 0.001, "loss": 1.9425, "step": 263704 }, { "epoch": 22.753623188405797, "grad_norm": 0.3509223759174347, "learning_rate": 0.001, "loss": 1.9396, "step": 263760 }, { "epoch": 22.758454106280194, "grad_norm": 0.9490211009979248, "learning_rate": 0.001, "loss": 1.9367, "step": 263816 }, { "epoch": 22.76328502415459, "grad_norm": 0.7100268602371216, "learning_rate": 0.001, "loss": 1.9328, "step": 263872 }, { "epoch": 22.768115942028984, "grad_norm": 0.572968065738678, "learning_rate": 0.001, "loss": 1.937, "step": 263928 }, { "epoch": 22.77294685990338, "grad_norm": 0.5126324892044067, "learning_rate": 0.001, "loss": 1.9326, "step": 263984 }, { "epoch": 22.77777777777778, "grad_norm": 2.8755831718444824, "learning_rate": 0.001, "loss": 1.9236, "step": 264040 }, { "epoch": 22.782608695652176, "grad_norm": 0.2994726598262787, "learning_rate": 0.001, "loss": 1.9353, "step": 264096 }, { "epoch": 22.78743961352657, "grad_norm": 0.9157410264015198, "learning_rate": 0.001, "loss": 1.9348, "step": 264152 }, { "epoch": 22.792270531400966, "grad_norm": 0.5256150960922241, "learning_rate": 0.001, "loss": 1.9411, "step": 264208 }, { "epoch": 22.797101449275363, "grad_norm": 1.080043077468872, "learning_rate": 0.001, "loss": 1.9361, "step": 264264 }, { "epoch": 22.80193236714976, "grad_norm": 0.4238528311252594, "learning_rate": 0.001, "loss": 1.9336, "step": 264320 }, { "epoch": 22.806763285024154, "grad_norm": 0.33494430780410767, "learning_rate": 0.001, "loss": 1.9299, "step": 264376 }, { "epoch": 22.81159420289855, "grad_norm": 0.6373468637466431, "learning_rate": 0.001, "loss": 1.931, "step": 264432 }, { "epoch": 22.816425120772948, "grad_norm": 1.9185104370117188, "learning_rate": 0.001, "loss": 1.9452, "step": 264488 }, { "epoch": 22.82125603864734, "grad_norm": 4.179603099822998, "learning_rate": 0.001, "loss": 1.9352, "step": 264544 }, { "epoch": 22.82608695652174, "grad_norm": 16.9693660736084, "learning_rate": 0.001, "loss": 1.9481, "step": 264600 }, { "epoch": 22.830917874396135, "grad_norm": 0.431379497051239, "learning_rate": 0.001, "loss": 1.9401, "step": 264656 }, { "epoch": 22.835748792270532, "grad_norm": 0.48328131437301636, "learning_rate": 0.001, "loss": 1.9415, "step": 264712 }, { "epoch": 22.840579710144926, "grad_norm": 0.3476356267929077, "learning_rate": 0.001, "loss": 1.9343, "step": 264768 }, { "epoch": 22.845410628019323, "grad_norm": 1.6065945625305176, "learning_rate": 0.001, "loss": 1.9359, "step": 264824 }, { "epoch": 22.85024154589372, "grad_norm": 0.8579918742179871, "learning_rate": 0.001, "loss": 1.9473, "step": 264880 }, { "epoch": 22.855072463768117, "grad_norm": 0.678260326385498, "learning_rate": 0.001, "loss": 1.9437, "step": 264936 }, { "epoch": 22.85990338164251, "grad_norm": 1.4892468452453613, "learning_rate": 0.001, "loss": 1.9596, "step": 264992 }, { "epoch": 22.864734299516908, "grad_norm": 0.3541586995124817, "learning_rate": 0.001, "loss": 1.9522, "step": 265048 }, { "epoch": 22.869565217391305, "grad_norm": 0.328909695148468, "learning_rate": 0.001, "loss": 1.9413, "step": 265104 }, { "epoch": 22.8743961352657, "grad_norm": 0.42425069212913513, "learning_rate": 0.001, "loss": 1.9496, "step": 265160 }, { "epoch": 22.879227053140095, "grad_norm": 1.1150788068771362, "learning_rate": 0.001, "loss": 1.9402, "step": 265216 }, { "epoch": 22.884057971014492, "grad_norm": 1.2667944431304932, "learning_rate": 0.001, "loss": 1.9502, "step": 265272 }, { "epoch": 22.88888888888889, "grad_norm": 0.9590685963630676, "learning_rate": 0.001, "loss": 1.9565, "step": 265328 }, { "epoch": 22.893719806763286, "grad_norm": 0.48470574617385864, "learning_rate": 0.001, "loss": 1.9455, "step": 265384 }, { "epoch": 22.89855072463768, "grad_norm": 0.7624456286430359, "learning_rate": 0.001, "loss": 1.9391, "step": 265440 }, { "epoch": 22.903381642512077, "grad_norm": 0.44625183939933777, "learning_rate": 0.001, "loss": 1.9341, "step": 265496 }, { "epoch": 22.908212560386474, "grad_norm": 0.4900270700454712, "learning_rate": 0.001, "loss": 1.9433, "step": 265552 }, { "epoch": 22.91304347826087, "grad_norm": 0.409993052482605, "learning_rate": 0.001, "loss": 1.9526, "step": 265608 }, { "epoch": 22.917874396135264, "grad_norm": 1.2805964946746826, "learning_rate": 0.001, "loss": 1.942, "step": 265664 }, { "epoch": 22.92270531400966, "grad_norm": 0.45714086294174194, "learning_rate": 0.001, "loss": 1.9451, "step": 265720 }, { "epoch": 22.92753623188406, "grad_norm": 0.6697834134101868, "learning_rate": 0.001, "loss": 1.9438, "step": 265776 }, { "epoch": 22.932367149758456, "grad_norm": 1.9860825538635254, "learning_rate": 0.001, "loss": 1.9645, "step": 265832 }, { "epoch": 22.93719806763285, "grad_norm": 0.4236160218715668, "learning_rate": 0.001, "loss": 1.9564, "step": 265888 }, { "epoch": 22.942028985507246, "grad_norm": 11.387377738952637, "learning_rate": 0.001, "loss": 1.9652, "step": 265944 }, { "epoch": 22.946859903381643, "grad_norm": 6.489840507507324, "learning_rate": 0.001, "loss": 1.9927, "step": 266000 }, { "epoch": 22.95169082125604, "grad_norm": 1.2012877464294434, "learning_rate": 0.001, "loss": 1.9726, "step": 266056 }, { "epoch": 22.956521739130434, "grad_norm": 0.5947778820991516, "learning_rate": 0.001, "loss": 1.9632, "step": 266112 }, { "epoch": 22.96135265700483, "grad_norm": 0.6485669016838074, "learning_rate": 0.001, "loss": 1.9597, "step": 266168 }, { "epoch": 22.966183574879228, "grad_norm": 1.7881348133087158, "learning_rate": 0.001, "loss": 1.9629, "step": 266224 }, { "epoch": 22.971014492753625, "grad_norm": 0.38715559244155884, "learning_rate": 0.001, "loss": 1.9528, "step": 266280 }, { "epoch": 22.97584541062802, "grad_norm": 0.6476491689682007, "learning_rate": 0.001, "loss": 1.9587, "step": 266336 }, { "epoch": 22.980676328502415, "grad_norm": 0.9831525683403015, "learning_rate": 0.001, "loss": 1.9766, "step": 266392 }, { "epoch": 22.985507246376812, "grad_norm": 1.6372984647750854, "learning_rate": 0.001, "loss": 1.9687, "step": 266448 }, { "epoch": 22.990338164251206, "grad_norm": 1.201743483543396, "learning_rate": 0.001, "loss": 1.9591, "step": 266504 }, { "epoch": 22.995169082125603, "grad_norm": 0.8657007217407227, "learning_rate": 0.001, "loss": 1.9533, "step": 266560 }, { "epoch": 23.0, "grad_norm": 0.5309742093086243, "learning_rate": 0.001, "loss": 1.9551, "step": 266616 }, { "epoch": 23.004830917874397, "grad_norm": 0.44991835951805115, "learning_rate": 0.001, "loss": 1.9153, "step": 266672 }, { "epoch": 23.00966183574879, "grad_norm": 0.7238132357597351, "learning_rate": 0.001, "loss": 1.9129, "step": 266728 }, { "epoch": 23.014492753623188, "grad_norm": 0.6165079474449158, "learning_rate": 0.001, "loss": 1.9192, "step": 266784 }, { "epoch": 23.019323671497585, "grad_norm": 0.3864997625350952, "learning_rate": 0.001, "loss": 1.9213, "step": 266840 }, { "epoch": 23.02415458937198, "grad_norm": 2.08044171333313, "learning_rate": 0.001, "loss": 1.9079, "step": 266896 }, { "epoch": 23.028985507246375, "grad_norm": 0.41637352108955383, "learning_rate": 0.001, "loss": 1.9034, "step": 266952 }, { "epoch": 23.033816425120772, "grad_norm": 0.2871229648590088, "learning_rate": 0.001, "loss": 1.9032, "step": 267008 }, { "epoch": 23.03864734299517, "grad_norm": 0.48673805594444275, "learning_rate": 0.001, "loss": 1.9013, "step": 267064 }, { "epoch": 23.043478260869566, "grad_norm": 1.3410234451293945, "learning_rate": 0.001, "loss": 1.9032, "step": 267120 }, { "epoch": 23.04830917874396, "grad_norm": 0.5184250473976135, "learning_rate": 0.001, "loss": 1.9099, "step": 267176 }, { "epoch": 23.053140096618357, "grad_norm": 0.3497347831726074, "learning_rate": 0.001, "loss": 1.917, "step": 267232 }, { "epoch": 23.057971014492754, "grad_norm": 1.1664408445358276, "learning_rate": 0.001, "loss": 1.921, "step": 267288 }, { "epoch": 23.06280193236715, "grad_norm": 0.45426231622695923, "learning_rate": 0.001, "loss": 1.9101, "step": 267344 }, { "epoch": 23.067632850241544, "grad_norm": 0.833065927028656, "learning_rate": 0.001, "loss": 1.9098, "step": 267400 }, { "epoch": 23.07246376811594, "grad_norm": 0.6405877470970154, "learning_rate": 0.001, "loss": 1.9242, "step": 267456 }, { "epoch": 23.07729468599034, "grad_norm": 0.3535110652446747, "learning_rate": 0.001, "loss": 1.9143, "step": 267512 }, { "epoch": 23.082125603864736, "grad_norm": 0.7435158491134644, "learning_rate": 0.001, "loss": 1.9063, "step": 267568 }, { "epoch": 23.08695652173913, "grad_norm": 1.4694548845291138, "learning_rate": 0.001, "loss": 1.9115, "step": 267624 }, { "epoch": 23.091787439613526, "grad_norm": 0.5443825721740723, "learning_rate": 0.001, "loss": 1.9243, "step": 267680 }, { "epoch": 23.096618357487923, "grad_norm": 1.9281439781188965, "learning_rate": 0.001, "loss": 1.9108, "step": 267736 }, { "epoch": 23.10144927536232, "grad_norm": 3.498905658721924, "learning_rate": 0.001, "loss": 1.9149, "step": 267792 }, { "epoch": 23.106280193236714, "grad_norm": 0.4698292315006256, "learning_rate": 0.001, "loss": 1.9205, "step": 267848 }, { "epoch": 23.11111111111111, "grad_norm": 0.6591044664382935, "learning_rate": 0.001, "loss": 1.925, "step": 267904 }, { "epoch": 23.115942028985508, "grad_norm": 0.9701511859893799, "learning_rate": 0.001, "loss": 1.9264, "step": 267960 }, { "epoch": 23.120772946859905, "grad_norm": 1.7473798990249634, "learning_rate": 0.001, "loss": 1.9244, "step": 268016 }, { "epoch": 23.1256038647343, "grad_norm": 1.1335879564285278, "learning_rate": 0.001, "loss": 1.9253, "step": 268072 }, { "epoch": 23.130434782608695, "grad_norm": 0.6535305976867676, "learning_rate": 0.001, "loss": 1.9285, "step": 268128 }, { "epoch": 23.135265700483092, "grad_norm": 0.9533494710922241, "learning_rate": 0.001, "loss": 1.9293, "step": 268184 }, { "epoch": 23.14009661835749, "grad_norm": 0.6012714505195618, "learning_rate": 0.001, "loss": 1.9458, "step": 268240 }, { "epoch": 23.144927536231883, "grad_norm": 0.97189861536026, "learning_rate": 0.001, "loss": 1.9405, "step": 268296 }, { "epoch": 23.14975845410628, "grad_norm": 0.41616836190223694, "learning_rate": 0.001, "loss": 1.9363, "step": 268352 }, { "epoch": 23.154589371980677, "grad_norm": 1.1855220794677734, "learning_rate": 0.001, "loss": 1.9197, "step": 268408 }, { "epoch": 23.159420289855074, "grad_norm": 0.9708030223846436, "learning_rate": 0.001, "loss": 1.9225, "step": 268464 }, { "epoch": 23.164251207729468, "grad_norm": 0.7180496454238892, "learning_rate": 0.001, "loss": 1.9173, "step": 268520 }, { "epoch": 23.169082125603865, "grad_norm": 0.44036784768104553, "learning_rate": 0.001, "loss": 1.9196, "step": 268576 }, { "epoch": 23.17391304347826, "grad_norm": 0.8156594038009644, "learning_rate": 0.001, "loss": 1.9118, "step": 268632 }, { "epoch": 23.17874396135266, "grad_norm": 0.5292342901229858, "learning_rate": 0.001, "loss": 1.9125, "step": 268688 }, { "epoch": 23.183574879227052, "grad_norm": 0.8278608918190002, "learning_rate": 0.001, "loss": 1.9213, "step": 268744 }, { "epoch": 23.18840579710145, "grad_norm": 1.8440238237380981, "learning_rate": 0.001, "loss": 1.9168, "step": 268800 }, { "epoch": 23.193236714975846, "grad_norm": 1.2640239000320435, "learning_rate": 0.001, "loss": 1.9111, "step": 268856 }, { "epoch": 23.19806763285024, "grad_norm": 1.6505805253982544, "learning_rate": 0.001, "loss": 1.9142, "step": 268912 }, { "epoch": 23.202898550724637, "grad_norm": 0.7507835626602173, "learning_rate": 0.001, "loss": 1.916, "step": 268968 }, { "epoch": 23.207729468599034, "grad_norm": 0.6968690752983093, "learning_rate": 0.001, "loss": 1.9077, "step": 269024 }, { "epoch": 23.21256038647343, "grad_norm": 0.44710081815719604, "learning_rate": 0.001, "loss": 1.9208, "step": 269080 }, { "epoch": 23.217391304347824, "grad_norm": 2.3278090953826904, "learning_rate": 0.001, "loss": 1.9082, "step": 269136 }, { "epoch": 23.22222222222222, "grad_norm": 0.7018927335739136, "learning_rate": 0.001, "loss": 1.9075, "step": 269192 }, { "epoch": 23.22705314009662, "grad_norm": 0.6166730523109436, "learning_rate": 0.001, "loss": 1.9133, "step": 269248 }, { "epoch": 23.231884057971016, "grad_norm": 2.4484236240386963, "learning_rate": 0.001, "loss": 1.9207, "step": 269304 }, { "epoch": 23.23671497584541, "grad_norm": 0.44285163283348083, "learning_rate": 0.001, "loss": 1.9267, "step": 269360 }, { "epoch": 23.241545893719806, "grad_norm": 0.5581911206245422, "learning_rate": 0.001, "loss": 1.9172, "step": 269416 }, { "epoch": 23.246376811594203, "grad_norm": 1.135427474975586, "learning_rate": 0.001, "loss": 1.9268, "step": 269472 }, { "epoch": 23.2512077294686, "grad_norm": 0.9358544945716858, "learning_rate": 0.001, "loss": 1.926, "step": 269528 }, { "epoch": 23.256038647342994, "grad_norm": 0.7905175089836121, "learning_rate": 0.001, "loss": 1.913, "step": 269584 }, { "epoch": 23.26086956521739, "grad_norm": 1.0659804344177246, "learning_rate": 0.001, "loss": 1.9087, "step": 269640 }, { "epoch": 23.265700483091788, "grad_norm": 0.39708977937698364, "learning_rate": 0.001, "loss": 1.9268, "step": 269696 }, { "epoch": 23.270531400966185, "grad_norm": 2.2125682830810547, "learning_rate": 0.001, "loss": 1.9285, "step": 269752 }, { "epoch": 23.27536231884058, "grad_norm": 0.63042813539505, "learning_rate": 0.001, "loss": 1.9315, "step": 269808 }, { "epoch": 23.280193236714975, "grad_norm": 0.5057905912399292, "learning_rate": 0.001, "loss": 1.9136, "step": 269864 }, { "epoch": 23.285024154589372, "grad_norm": 0.6847552061080933, "learning_rate": 0.001, "loss": 1.9216, "step": 269920 }, { "epoch": 23.28985507246377, "grad_norm": 0.7906085848808289, "learning_rate": 0.001, "loss": 1.9179, "step": 269976 }, { "epoch": 23.294685990338163, "grad_norm": 0.6572842597961426, "learning_rate": 0.001, "loss": 1.925, "step": 270032 }, { "epoch": 23.29951690821256, "grad_norm": 2.6821482181549072, "learning_rate": 0.001, "loss": 1.9338, "step": 270088 }, { "epoch": 23.304347826086957, "grad_norm": 16.653940200805664, "learning_rate": 0.001, "loss": 1.938, "step": 270144 }, { "epoch": 23.309178743961354, "grad_norm": 1.4522290229797363, "learning_rate": 0.001, "loss": 1.9419, "step": 270200 }, { "epoch": 23.314009661835748, "grad_norm": 2.248033046722412, "learning_rate": 0.001, "loss": 1.9318, "step": 270256 }, { "epoch": 23.318840579710145, "grad_norm": 2.474079132080078, "learning_rate": 0.001, "loss": 1.9322, "step": 270312 }, { "epoch": 23.32367149758454, "grad_norm": 0.5638799667358398, "learning_rate": 0.001, "loss": 1.9458, "step": 270368 }, { "epoch": 23.32850241545894, "grad_norm": 0.9347525835037231, "learning_rate": 0.001, "loss": 1.9452, "step": 270424 }, { "epoch": 23.333333333333332, "grad_norm": 0.6353224515914917, "learning_rate": 0.001, "loss": 1.9473, "step": 270480 }, { "epoch": 23.33816425120773, "grad_norm": 13.776043891906738, "learning_rate": 0.001, "loss": 1.9432, "step": 270536 }, { "epoch": 23.342995169082126, "grad_norm": 0.9044269919395447, "learning_rate": 0.001, "loss": 1.9396, "step": 270592 }, { "epoch": 23.347826086956523, "grad_norm": 0.5013958215713501, "learning_rate": 0.001, "loss": 1.9478, "step": 270648 }, { "epoch": 23.352657004830917, "grad_norm": 1.413511037826538, "learning_rate": 0.001, "loss": 1.9481, "step": 270704 }, { "epoch": 23.357487922705314, "grad_norm": 1.0283443927764893, "learning_rate": 0.001, "loss": 1.9455, "step": 270760 }, { "epoch": 23.36231884057971, "grad_norm": 0.4326821565628052, "learning_rate": 0.001, "loss": 1.9305, "step": 270816 }, { "epoch": 23.367149758454108, "grad_norm": 0.513903021812439, "learning_rate": 0.001, "loss": 1.9313, "step": 270872 }, { "epoch": 23.3719806763285, "grad_norm": 1.6875108480453491, "learning_rate": 0.001, "loss": 1.9228, "step": 270928 }, { "epoch": 23.3768115942029, "grad_norm": 0.6795575618743896, "learning_rate": 0.001, "loss": 1.9263, "step": 270984 }, { "epoch": 23.381642512077295, "grad_norm": 0.40820255875587463, "learning_rate": 0.001, "loss": 1.9278, "step": 271040 }, { "epoch": 23.386473429951693, "grad_norm": 0.39137184619903564, "learning_rate": 0.001, "loss": 1.935, "step": 271096 }, { "epoch": 23.391304347826086, "grad_norm": 0.9427878856658936, "learning_rate": 0.001, "loss": 1.9358, "step": 271152 }, { "epoch": 23.396135265700483, "grad_norm": 0.803469717502594, "learning_rate": 0.001, "loss": 1.933, "step": 271208 }, { "epoch": 23.40096618357488, "grad_norm": 0.5574473142623901, "learning_rate": 0.001, "loss": 1.9374, "step": 271264 }, { "epoch": 23.405797101449274, "grad_norm": 0.9779496788978577, "learning_rate": 0.001, "loss": 1.9358, "step": 271320 }, { "epoch": 23.41062801932367, "grad_norm": 0.9210959076881409, "learning_rate": 0.001, "loss": 1.9385, "step": 271376 }, { "epoch": 23.415458937198068, "grad_norm": 1.215304970741272, "learning_rate": 0.001, "loss": 1.9464, "step": 271432 }, { "epoch": 23.420289855072465, "grad_norm": 1.754247784614563, "learning_rate": 0.001, "loss": 1.9331, "step": 271488 }, { "epoch": 23.42512077294686, "grad_norm": 0.5789891481399536, "learning_rate": 0.001, "loss": 1.9337, "step": 271544 }, { "epoch": 23.429951690821255, "grad_norm": 3.837973117828369, "learning_rate": 0.001, "loss": 1.9241, "step": 271600 }, { "epoch": 23.434782608695652, "grad_norm": 0.5541539192199707, "learning_rate": 0.001, "loss": 1.9251, "step": 271656 }, { "epoch": 23.43961352657005, "grad_norm": 0.6401068568229675, "learning_rate": 0.001, "loss": 1.933, "step": 271712 }, { "epoch": 23.444444444444443, "grad_norm": 4.834632873535156, "learning_rate": 0.001, "loss": 1.9261, "step": 271768 }, { "epoch": 23.44927536231884, "grad_norm": 0.6982259750366211, "learning_rate": 0.001, "loss": 1.9233, "step": 271824 }, { "epoch": 23.454106280193237, "grad_norm": 0.6482769846916199, "learning_rate": 0.001, "loss": 1.9293, "step": 271880 }, { "epoch": 23.458937198067634, "grad_norm": 0.29602423310279846, "learning_rate": 0.001, "loss": 1.936, "step": 271936 }, { "epoch": 23.463768115942027, "grad_norm": 0.7366529107093811, "learning_rate": 0.001, "loss": 1.926, "step": 271992 }, { "epoch": 23.468599033816425, "grad_norm": 0.7094994187355042, "learning_rate": 0.001, "loss": 1.9302, "step": 272048 }, { "epoch": 23.47342995169082, "grad_norm": 0.3691755533218384, "learning_rate": 0.001, "loss": 1.9278, "step": 272104 }, { "epoch": 23.47826086956522, "grad_norm": 0.6301421523094177, "learning_rate": 0.001, "loss": 1.9332, "step": 272160 }, { "epoch": 23.483091787439612, "grad_norm": 1.0800213813781738, "learning_rate": 0.001, "loss": 1.9457, "step": 272216 }, { "epoch": 23.48792270531401, "grad_norm": 1.2811214923858643, "learning_rate": 0.001, "loss": 1.9453, "step": 272272 }, { "epoch": 23.492753623188406, "grad_norm": 1.2232860326766968, "learning_rate": 0.001, "loss": 1.94, "step": 272328 }, { "epoch": 23.497584541062803, "grad_norm": 0.7985347509384155, "learning_rate": 0.001, "loss": 1.9425, "step": 272384 }, { "epoch": 23.502415458937197, "grad_norm": 1.2063349485397339, "learning_rate": 0.001, "loss": 1.9311, "step": 272440 }, { "epoch": 23.507246376811594, "grad_norm": 0.3971107304096222, "learning_rate": 0.001, "loss": 1.9282, "step": 272496 }, { "epoch": 23.51207729468599, "grad_norm": 0.5735574960708618, "learning_rate": 0.001, "loss": 1.9155, "step": 272552 }, { "epoch": 23.516908212560388, "grad_norm": 0.613368570804596, "learning_rate": 0.001, "loss": 1.9353, "step": 272608 }, { "epoch": 23.52173913043478, "grad_norm": 0.3817559778690338, "learning_rate": 0.001, "loss": 1.9254, "step": 272664 }, { "epoch": 23.52657004830918, "grad_norm": 2.759641170501709, "learning_rate": 0.001, "loss": 1.9198, "step": 272720 }, { "epoch": 23.531400966183575, "grad_norm": 2.7991578578948975, "learning_rate": 0.001, "loss": 1.934, "step": 272776 }, { "epoch": 23.536231884057973, "grad_norm": 0.6614062190055847, "learning_rate": 0.001, "loss": 1.9448, "step": 272832 }, { "epoch": 23.541062801932366, "grad_norm": 1.376397967338562, "learning_rate": 0.001, "loss": 1.955, "step": 272888 }, { "epoch": 23.545893719806763, "grad_norm": 0.43629536032676697, "learning_rate": 0.001, "loss": 1.9366, "step": 272944 }, { "epoch": 23.55072463768116, "grad_norm": 0.5023860335350037, "learning_rate": 0.001, "loss": 1.9353, "step": 273000 }, { "epoch": 23.555555555555557, "grad_norm": 0.6915550231933594, "learning_rate": 0.001, "loss": 1.9348, "step": 273056 }, { "epoch": 23.56038647342995, "grad_norm": 1.468752145767212, "learning_rate": 0.001, "loss": 1.9475, "step": 273112 }, { "epoch": 23.565217391304348, "grad_norm": 1.9340236186981201, "learning_rate": 0.001, "loss": 1.9348, "step": 273168 }, { "epoch": 23.570048309178745, "grad_norm": 0.5305664539337158, "learning_rate": 0.001, "loss": 1.9357, "step": 273224 }, { "epoch": 23.57487922705314, "grad_norm": 0.859559953212738, "learning_rate": 0.001, "loss": 1.9352, "step": 273280 }, { "epoch": 23.579710144927535, "grad_norm": 0.8822153210639954, "learning_rate": 0.001, "loss": 1.9334, "step": 273336 }, { "epoch": 23.584541062801932, "grad_norm": 0.6296571493148804, "learning_rate": 0.001, "loss": 1.9437, "step": 273392 }, { "epoch": 23.58937198067633, "grad_norm": 1.165420413017273, "learning_rate": 0.001, "loss": 1.9532, "step": 273448 }, { "epoch": 23.594202898550726, "grad_norm": 0.3977389335632324, "learning_rate": 0.001, "loss": 1.9552, "step": 273504 }, { "epoch": 23.59903381642512, "grad_norm": 1.1903618574142456, "learning_rate": 0.001, "loss": 1.9627, "step": 273560 }, { "epoch": 23.603864734299517, "grad_norm": 1.353217363357544, "learning_rate": 0.001, "loss": 1.9471, "step": 273616 }, { "epoch": 23.608695652173914, "grad_norm": 1.2180839776992798, "learning_rate": 0.001, "loss": 1.9364, "step": 273672 }, { "epoch": 23.613526570048307, "grad_norm": 0.4751861095428467, "learning_rate": 0.001, "loss": 1.93, "step": 273728 }, { "epoch": 23.618357487922705, "grad_norm": 0.9146961569786072, "learning_rate": 0.001, "loss": 1.932, "step": 273784 }, { "epoch": 23.6231884057971, "grad_norm": 0.5517374873161316, "learning_rate": 0.001, "loss": 1.9304, "step": 273840 }, { "epoch": 23.6280193236715, "grad_norm": 2.276715040206909, "learning_rate": 0.001, "loss": 1.936, "step": 273896 }, { "epoch": 23.632850241545892, "grad_norm": 0.6137073636054993, "learning_rate": 0.001, "loss": 1.9293, "step": 273952 }, { "epoch": 23.63768115942029, "grad_norm": 0.6306750774383545, "learning_rate": 0.001, "loss": 1.9159, "step": 274008 }, { "epoch": 23.642512077294686, "grad_norm": 1.0202962160110474, "learning_rate": 0.001, "loss": 1.9202, "step": 274064 }, { "epoch": 23.647342995169083, "grad_norm": 0.8184786438941956, "learning_rate": 0.001, "loss": 1.92, "step": 274120 }, { "epoch": 23.652173913043477, "grad_norm": 0.5771294832229614, "learning_rate": 0.001, "loss": 1.921, "step": 274176 }, { "epoch": 23.657004830917874, "grad_norm": 0.999147355556488, "learning_rate": 0.001, "loss": 1.9385, "step": 274232 }, { "epoch": 23.66183574879227, "grad_norm": 1.0157830715179443, "learning_rate": 0.001, "loss": 1.9354, "step": 274288 }, { "epoch": 23.666666666666668, "grad_norm": 1.5097806453704834, "learning_rate": 0.001, "loss": 1.9556, "step": 274344 }, { "epoch": 23.67149758454106, "grad_norm": 0.7526201009750366, "learning_rate": 0.001, "loss": 1.9477, "step": 274400 }, { "epoch": 23.67632850241546, "grad_norm": 2.9513816833496094, "learning_rate": 0.001, "loss": 1.9335, "step": 274456 }, { "epoch": 23.681159420289855, "grad_norm": 1.3187352418899536, "learning_rate": 0.001, "loss": 1.9352, "step": 274512 }, { "epoch": 23.685990338164252, "grad_norm": 1.5674198865890503, "learning_rate": 0.001, "loss": 1.9372, "step": 274568 }, { "epoch": 23.690821256038646, "grad_norm": 2.2266364097595215, "learning_rate": 0.001, "loss": 1.9383, "step": 274624 }, { "epoch": 23.695652173913043, "grad_norm": 1.3555537462234497, "learning_rate": 0.001, "loss": 1.9405, "step": 274680 }, { "epoch": 23.70048309178744, "grad_norm": 1.062801480293274, "learning_rate": 0.001, "loss": 1.9376, "step": 274736 }, { "epoch": 23.705314009661837, "grad_norm": 1.090857744216919, "learning_rate": 0.001, "loss": 1.933, "step": 274792 }, { "epoch": 23.71014492753623, "grad_norm": 1.7963591814041138, "learning_rate": 0.001, "loss": 1.9413, "step": 274848 }, { "epoch": 23.714975845410628, "grad_norm": 0.6484135389328003, "learning_rate": 0.001, "loss": 1.9368, "step": 274904 }, { "epoch": 23.719806763285025, "grad_norm": 0.6178915500640869, "learning_rate": 0.001, "loss": 1.9415, "step": 274960 }, { "epoch": 23.72463768115942, "grad_norm": 1.8334332704544067, "learning_rate": 0.001, "loss": 1.934, "step": 275016 }, { "epoch": 23.729468599033815, "grad_norm": 1.631144404411316, "learning_rate": 0.001, "loss": 1.9416, "step": 275072 }, { "epoch": 23.734299516908212, "grad_norm": 0.9070121049880981, "learning_rate": 0.001, "loss": 1.9406, "step": 275128 }, { "epoch": 23.73913043478261, "grad_norm": 0.8993938565254211, "learning_rate": 0.001, "loss": 1.9447, "step": 275184 }, { "epoch": 23.743961352657006, "grad_norm": 1.5650254487991333, "learning_rate": 0.001, "loss": 1.938, "step": 275240 }, { "epoch": 23.7487922705314, "grad_norm": 2.48445463180542, "learning_rate": 0.001, "loss": 1.9383, "step": 275296 }, { "epoch": 23.753623188405797, "grad_norm": 0.9374467134475708, "learning_rate": 0.001, "loss": 1.9403, "step": 275352 }, { "epoch": 23.758454106280194, "grad_norm": 0.7290564775466919, "learning_rate": 0.001, "loss": 1.931, "step": 275408 }, { "epoch": 23.76328502415459, "grad_norm": 1.9603421688079834, "learning_rate": 0.001, "loss": 1.9355, "step": 275464 }, { "epoch": 23.768115942028984, "grad_norm": 0.5246350169181824, "learning_rate": 0.001, "loss": 1.9287, "step": 275520 }, { "epoch": 23.77294685990338, "grad_norm": 0.40989193320274353, "learning_rate": 0.001, "loss": 1.9327, "step": 275576 }, { "epoch": 23.77777777777778, "grad_norm": 0.46656450629234314, "learning_rate": 0.001, "loss": 1.928, "step": 275632 }, { "epoch": 23.782608695652176, "grad_norm": 1.1532413959503174, "learning_rate": 0.001, "loss": 1.9357, "step": 275688 }, { "epoch": 23.78743961352657, "grad_norm": 0.6365094780921936, "learning_rate": 0.001, "loss": 1.9278, "step": 275744 }, { "epoch": 23.792270531400966, "grad_norm": 1.0296921730041504, "learning_rate": 0.001, "loss": 1.9241, "step": 275800 }, { "epoch": 23.797101449275363, "grad_norm": 0.5454058647155762, "learning_rate": 0.001, "loss": 1.9259, "step": 275856 }, { "epoch": 23.80193236714976, "grad_norm": 0.7904008030891418, "learning_rate": 0.001, "loss": 1.9276, "step": 275912 }, { "epoch": 23.806763285024154, "grad_norm": 1.605818510055542, "learning_rate": 0.001, "loss": 1.9365, "step": 275968 }, { "epoch": 23.81159420289855, "grad_norm": 1.0176571607589722, "learning_rate": 0.001, "loss": 1.9354, "step": 276024 }, { "epoch": 23.816425120772948, "grad_norm": 2.5698883533477783, "learning_rate": 0.001, "loss": 1.9346, "step": 276080 }, { "epoch": 23.82125603864734, "grad_norm": 0.46160316467285156, "learning_rate": 0.001, "loss": 1.9415, "step": 276136 }, { "epoch": 23.82608695652174, "grad_norm": 0.6733005046844482, "learning_rate": 0.001, "loss": 1.9211, "step": 276192 }, { "epoch": 23.830917874396135, "grad_norm": 0.961740255355835, "learning_rate": 0.001, "loss": 1.9296, "step": 276248 }, { "epoch": 23.835748792270532, "grad_norm": 2.839693069458008, "learning_rate": 0.001, "loss": 1.9241, "step": 276304 }, { "epoch": 23.840579710144926, "grad_norm": 0.9255725145339966, "learning_rate": 0.001, "loss": 1.9231, "step": 276360 }, { "epoch": 23.845410628019323, "grad_norm": 1.6812142133712769, "learning_rate": 0.001, "loss": 1.9349, "step": 276416 }, { "epoch": 23.85024154589372, "grad_norm": 0.38448408246040344, "learning_rate": 0.001, "loss": 1.9341, "step": 276472 }, { "epoch": 23.855072463768117, "grad_norm": 1.389699935913086, "learning_rate": 0.001, "loss": 1.9398, "step": 276528 }, { "epoch": 23.85990338164251, "grad_norm": 1.6831470727920532, "learning_rate": 0.001, "loss": 1.9359, "step": 276584 }, { "epoch": 23.864734299516908, "grad_norm": 0.9034839868545532, "learning_rate": 0.001, "loss": 1.9394, "step": 276640 }, { "epoch": 23.869565217391305, "grad_norm": 3.222108840942383, "learning_rate": 0.001, "loss": 1.9352, "step": 276696 }, { "epoch": 23.8743961352657, "grad_norm": 1.3451405763626099, "learning_rate": 0.001, "loss": 1.9248, "step": 276752 }, { "epoch": 23.879227053140095, "grad_norm": 0.7348869442939758, "learning_rate": 0.001, "loss": 1.9359, "step": 276808 }, { "epoch": 23.884057971014492, "grad_norm": 0.6763084530830383, "learning_rate": 0.001, "loss": 1.9272, "step": 276864 }, { "epoch": 23.88888888888889, "grad_norm": 1.1968436241149902, "learning_rate": 0.001, "loss": 1.9454, "step": 276920 }, { "epoch": 23.893719806763286, "grad_norm": 1.2304675579071045, "learning_rate": 0.001, "loss": 1.9486, "step": 276976 }, { "epoch": 23.89855072463768, "grad_norm": 1.362490177154541, "learning_rate": 0.001, "loss": 1.9645, "step": 277032 }, { "epoch": 23.903381642512077, "grad_norm": 1.351412296295166, "learning_rate": 0.001, "loss": 1.9602, "step": 277088 }, { "epoch": 23.908212560386474, "grad_norm": 1.486250400543213, "learning_rate": 0.001, "loss": 1.9793, "step": 277144 }, { "epoch": 23.91304347826087, "grad_norm": 0.759766161441803, "learning_rate": 0.001, "loss": 1.9803, "step": 277200 }, { "epoch": 23.917874396135264, "grad_norm": 1.292884111404419, "learning_rate": 0.001, "loss": 1.9625, "step": 277256 }, { "epoch": 23.92270531400966, "grad_norm": 0.5014551281929016, "learning_rate": 0.001, "loss": 1.9593, "step": 277312 }, { "epoch": 23.92753623188406, "grad_norm": 0.46127235889434814, "learning_rate": 0.001, "loss": 1.9588, "step": 277368 }, { "epoch": 23.932367149758456, "grad_norm": 2.2300260066986084, "learning_rate": 0.001, "loss": 1.944, "step": 277424 }, { "epoch": 23.93719806763285, "grad_norm": 1.0253379344940186, "learning_rate": 0.001, "loss": 1.9501, "step": 277480 }, { "epoch": 23.942028985507246, "grad_norm": 0.87002032995224, "learning_rate": 0.001, "loss": 1.9409, "step": 277536 }, { "epoch": 23.946859903381643, "grad_norm": 1.2491779327392578, "learning_rate": 0.001, "loss": 1.9522, "step": 277592 }, { "epoch": 23.95169082125604, "grad_norm": 0.5064098238945007, "learning_rate": 0.001, "loss": 1.9595, "step": 277648 }, { "epoch": 23.956521739130434, "grad_norm": 1.01318359375, "learning_rate": 0.001, "loss": 1.9589, "step": 277704 }, { "epoch": 23.96135265700483, "grad_norm": 0.4085215628147125, "learning_rate": 0.001, "loss": 1.952, "step": 277760 }, { "epoch": 23.966183574879228, "grad_norm": 3.1914901733398438, "learning_rate": 0.001, "loss": 1.9466, "step": 277816 }, { "epoch": 23.971014492753625, "grad_norm": 1.040095567703247, "learning_rate": 0.001, "loss": 1.937, "step": 277872 }, { "epoch": 23.97584541062802, "grad_norm": 0.6660473942756653, "learning_rate": 0.001, "loss": 1.9384, "step": 277928 }, { "epoch": 23.980676328502415, "grad_norm": 0.9203644394874573, "learning_rate": 0.001, "loss": 1.9511, "step": 277984 }, { "epoch": 23.985507246376812, "grad_norm": 2.4745121002197266, "learning_rate": 0.001, "loss": 1.9384, "step": 278040 }, { "epoch": 23.990338164251206, "grad_norm": 1.9933804273605347, "learning_rate": 0.001, "loss": 1.9472, "step": 278096 }, { "epoch": 23.995169082125603, "grad_norm": 1.2811702489852905, "learning_rate": 0.001, "loss": 1.9364, "step": 278152 }, { "epoch": 24.0, "grad_norm": 1.7240533828735352, "learning_rate": 0.001, "loss": 1.9419, "step": 278208 }, { "epoch": 24.004830917874397, "grad_norm": 0.7032594680786133, "learning_rate": 0.001, "loss": 1.9071, "step": 278264 }, { "epoch": 24.00966183574879, "grad_norm": 0.4524131715297699, "learning_rate": 0.001, "loss": 1.9034, "step": 278320 }, { "epoch": 24.014492753623188, "grad_norm": 0.6271324753761292, "learning_rate": 0.001, "loss": 1.8983, "step": 278376 }, { "epoch": 24.019323671497585, "grad_norm": 5.7993268966674805, "learning_rate": 0.001, "loss": 1.9055, "step": 278432 }, { "epoch": 24.02415458937198, "grad_norm": 0.6311468482017517, "learning_rate": 0.001, "loss": 1.9108, "step": 278488 }, { "epoch": 24.028985507246375, "grad_norm": 1.0532279014587402, "learning_rate": 0.001, "loss": 1.9029, "step": 278544 }, { "epoch": 24.033816425120772, "grad_norm": 0.5748487114906311, "learning_rate": 0.001, "loss": 1.9065, "step": 278600 }, { "epoch": 24.03864734299517, "grad_norm": 0.5513666272163391, "learning_rate": 0.001, "loss": 1.9181, "step": 278656 }, { "epoch": 24.043478260869566, "grad_norm": 0.4437922239303589, "learning_rate": 0.001, "loss": 1.917, "step": 278712 }, { "epoch": 24.04830917874396, "grad_norm": 0.6994848251342773, "learning_rate": 0.001, "loss": 1.9171, "step": 278768 }, { "epoch": 24.053140096618357, "grad_norm": 0.5937466025352478, "learning_rate": 0.001, "loss": 1.9203, "step": 278824 }, { "epoch": 24.057971014492754, "grad_norm": 2.7160017490386963, "learning_rate": 0.001, "loss": 1.9168, "step": 278880 }, { "epoch": 24.06280193236715, "grad_norm": 0.5684502124786377, "learning_rate": 0.001, "loss": 1.9128, "step": 278936 }, { "epoch": 24.067632850241544, "grad_norm": 0.9820815324783325, "learning_rate": 0.001, "loss": 1.9113, "step": 278992 }, { "epoch": 24.07246376811594, "grad_norm": 1.1331963539123535, "learning_rate": 0.001, "loss": 1.901, "step": 279048 }, { "epoch": 24.07729468599034, "grad_norm": 1.1206762790679932, "learning_rate": 0.001, "loss": 1.892, "step": 279104 }, { "epoch": 24.082125603864736, "grad_norm": 1.8506113290786743, "learning_rate": 0.001, "loss": 1.888, "step": 279160 }, { "epoch": 24.08695652173913, "grad_norm": 1.0762802362442017, "learning_rate": 0.001, "loss": 1.8909, "step": 279216 }, { "epoch": 24.091787439613526, "grad_norm": 2.2971434593200684, "learning_rate": 0.001, "loss": 1.8876, "step": 279272 }, { "epoch": 24.096618357487923, "grad_norm": 2.967075824737549, "learning_rate": 0.001, "loss": 1.9016, "step": 279328 }, { "epoch": 24.10144927536232, "grad_norm": 0.9901528358459473, "learning_rate": 0.001, "loss": 1.8964, "step": 279384 }, { "epoch": 24.106280193236714, "grad_norm": 1.8009085655212402, "learning_rate": 0.001, "loss": 1.9047, "step": 279440 }, { "epoch": 24.11111111111111, "grad_norm": 0.6187200546264648, "learning_rate": 0.001, "loss": 1.8976, "step": 279496 }, { "epoch": 24.115942028985508, "grad_norm": 0.43210309743881226, "learning_rate": 0.001, "loss": 1.906, "step": 279552 }, { "epoch": 24.120772946859905, "grad_norm": 0.5662825703620911, "learning_rate": 0.001, "loss": 1.9059, "step": 279608 }, { "epoch": 24.1256038647343, "grad_norm": 0.3383672535419464, "learning_rate": 0.001, "loss": 1.9169, "step": 279664 }, { "epoch": 24.130434782608695, "grad_norm": 0.38147589564323425, "learning_rate": 0.001, "loss": 1.9167, "step": 279720 }, { "epoch": 24.135265700483092, "grad_norm": 1.9904968738555908, "learning_rate": 0.001, "loss": 1.9143, "step": 279776 }, { "epoch": 24.14009661835749, "grad_norm": 0.4460856318473816, "learning_rate": 0.001, "loss": 1.9122, "step": 279832 }, { "epoch": 24.144927536231883, "grad_norm": 0.43119677901268005, "learning_rate": 0.001, "loss": 1.9156, "step": 279888 }, { "epoch": 24.14975845410628, "grad_norm": 1.5527560710906982, "learning_rate": 0.001, "loss": 1.9021, "step": 279944 }, { "epoch": 24.154589371980677, "grad_norm": 0.27000075578689575, "learning_rate": 0.001, "loss": 1.9093, "step": 280000 }, { "epoch": 24.159420289855074, "grad_norm": 3.288224697113037, "learning_rate": 0.001, "loss": 1.9068, "step": 280056 }, { "epoch": 24.164251207729468, "grad_norm": 0.6336233615875244, "learning_rate": 0.001, "loss": 1.9056, "step": 280112 }, { "epoch": 24.169082125603865, "grad_norm": 2.2870900630950928, "learning_rate": 0.001, "loss": 1.9096, "step": 280168 }, { "epoch": 24.17391304347826, "grad_norm": 0.7440868020057678, "learning_rate": 0.001, "loss": 1.9276, "step": 280224 }, { "epoch": 24.17874396135266, "grad_norm": 7.545788288116455, "learning_rate": 0.001, "loss": 1.9186, "step": 280280 }, { "epoch": 24.183574879227052, "grad_norm": 2.4397919178009033, "learning_rate": 0.001, "loss": 1.912, "step": 280336 }, { "epoch": 24.18840579710145, "grad_norm": 0.5629308223724365, "learning_rate": 0.001, "loss": 1.9282, "step": 280392 }, { "epoch": 24.193236714975846, "grad_norm": 2.577620506286621, "learning_rate": 0.001, "loss": 1.9249, "step": 280448 }, { "epoch": 24.19806763285024, "grad_norm": 1.189853310585022, "learning_rate": 0.001, "loss": 1.9211, "step": 280504 }, { "epoch": 24.202898550724637, "grad_norm": 0.8739112019538879, "learning_rate": 0.001, "loss": 1.9149, "step": 280560 }, { "epoch": 24.207729468599034, "grad_norm": 0.6673154830932617, "learning_rate": 0.001, "loss": 1.9188, "step": 280616 }, { "epoch": 24.21256038647343, "grad_norm": 0.6294225454330444, "learning_rate": 0.001, "loss": 1.9261, "step": 280672 }, { "epoch": 24.217391304347824, "grad_norm": 1.6908996105194092, "learning_rate": 0.001, "loss": 1.9277, "step": 280728 }, { "epoch": 24.22222222222222, "grad_norm": 1.5422178506851196, "learning_rate": 0.001, "loss": 1.9265, "step": 280784 }, { "epoch": 24.22705314009662, "grad_norm": 0.3957167863845825, "learning_rate": 0.001, "loss": 1.9215, "step": 280840 }, { "epoch": 24.231884057971016, "grad_norm": 1.4532999992370605, "learning_rate": 0.001, "loss": 1.923, "step": 280896 }, { "epoch": 24.23671497584541, "grad_norm": 3.4129905700683594, "learning_rate": 0.001, "loss": 1.9234, "step": 280952 }, { "epoch": 24.241545893719806, "grad_norm": 0.7897972464561462, "learning_rate": 0.001, "loss": 1.9227, "step": 281008 }, { "epoch": 24.246376811594203, "grad_norm": 0.6017054915428162, "learning_rate": 0.001, "loss": 1.9234, "step": 281064 }, { "epoch": 24.2512077294686, "grad_norm": 1.723306655883789, "learning_rate": 0.001, "loss": 1.9249, "step": 281120 }, { "epoch": 24.256038647342994, "grad_norm": 0.5447843670845032, "learning_rate": 0.001, "loss": 1.9186, "step": 281176 }, { "epoch": 24.26086956521739, "grad_norm": 0.3186738193035126, "learning_rate": 0.001, "loss": 1.9232, "step": 281232 }, { "epoch": 24.265700483091788, "grad_norm": 0.7094520926475525, "learning_rate": 0.001, "loss": 1.9104, "step": 281288 }, { "epoch": 24.270531400966185, "grad_norm": 0.804150402545929, "learning_rate": 0.001, "loss": 1.9097, "step": 281344 }, { "epoch": 24.27536231884058, "grad_norm": 1.580153226852417, "learning_rate": 0.001, "loss": 1.9146, "step": 281400 }, { "epoch": 24.280193236714975, "grad_norm": 1.3783105611801147, "learning_rate": 0.001, "loss": 1.9236, "step": 281456 }, { "epoch": 24.285024154589372, "grad_norm": 1.4467170238494873, "learning_rate": 0.001, "loss": 1.9224, "step": 281512 }, { "epoch": 24.28985507246377, "grad_norm": 1.048298954963684, "learning_rate": 0.001, "loss": 1.9332, "step": 281568 }, { "epoch": 24.294685990338163, "grad_norm": 1.2644479274749756, "learning_rate": 0.001, "loss": 1.9327, "step": 281624 }, { "epoch": 24.29951690821256, "grad_norm": 0.5416079163551331, "learning_rate": 0.001, "loss": 1.9278, "step": 281680 }, { "epoch": 24.304347826086957, "grad_norm": 1.05036461353302, "learning_rate": 0.001, "loss": 1.9206, "step": 281736 }, { "epoch": 24.309178743961354, "grad_norm": 0.428485631942749, "learning_rate": 0.001, "loss": 1.9107, "step": 281792 }, { "epoch": 24.314009661835748, "grad_norm": 0.8467603325843811, "learning_rate": 0.001, "loss": 1.9152, "step": 281848 }, { "epoch": 24.318840579710145, "grad_norm": 1.9940674304962158, "learning_rate": 0.001, "loss": 1.9133, "step": 281904 }, { "epoch": 24.32367149758454, "grad_norm": 0.5763525366783142, "learning_rate": 0.001, "loss": 1.9231, "step": 281960 }, { "epoch": 24.32850241545894, "grad_norm": 3.374732732772827, "learning_rate": 0.001, "loss": 1.9131, "step": 282016 }, { "epoch": 24.333333333333332, "grad_norm": 1.113442063331604, "learning_rate": 0.001, "loss": 1.9148, "step": 282072 }, { "epoch": 24.33816425120773, "grad_norm": 1.1274182796478271, "learning_rate": 0.001, "loss": 1.9087, "step": 282128 }, { "epoch": 24.342995169082126, "grad_norm": 0.6083889603614807, "learning_rate": 0.001, "loss": 1.9101, "step": 282184 }, { "epoch": 24.347826086956523, "grad_norm": 1.5237759351730347, "learning_rate": 0.001, "loss": 1.9221, "step": 282240 }, { "epoch": 24.352657004830917, "grad_norm": 0.906283974647522, "learning_rate": 0.001, "loss": 1.9193, "step": 282296 }, { "epoch": 24.357487922705314, "grad_norm": 1.0883668661117554, "learning_rate": 0.001, "loss": 1.9191, "step": 282352 }, { "epoch": 24.36231884057971, "grad_norm": 2.2442147731781006, "learning_rate": 0.001, "loss": 1.9095, "step": 282408 }, { "epoch": 24.367149758454108, "grad_norm": 2.4565682411193848, "learning_rate": 0.001, "loss": 1.9167, "step": 282464 }, { "epoch": 24.3719806763285, "grad_norm": 0.7146531343460083, "learning_rate": 0.001, "loss": 1.916, "step": 282520 }, { "epoch": 24.3768115942029, "grad_norm": 0.7050408720970154, "learning_rate": 0.001, "loss": 1.9241, "step": 282576 }, { "epoch": 24.381642512077295, "grad_norm": 4.046645164489746, "learning_rate": 0.001, "loss": 1.9323, "step": 282632 }, { "epoch": 24.386473429951693, "grad_norm": 2.13010573387146, "learning_rate": 0.001, "loss": 1.9221, "step": 282688 }, { "epoch": 24.391304347826086, "grad_norm": 0.566875696182251, "learning_rate": 0.001, "loss": 1.9288, "step": 282744 }, { "epoch": 24.396135265700483, "grad_norm": 2.4855856895446777, "learning_rate": 0.001, "loss": 1.9182, "step": 282800 }, { "epoch": 24.40096618357488, "grad_norm": 0.4139580726623535, "learning_rate": 0.001, "loss": 1.9184, "step": 282856 }, { "epoch": 24.405797101449274, "grad_norm": 1.4570844173431396, "learning_rate": 0.001, "loss": 1.9331, "step": 282912 }, { "epoch": 24.41062801932367, "grad_norm": 4.85396671295166, "learning_rate": 0.001, "loss": 1.9447, "step": 282968 }, { "epoch": 24.415458937198068, "grad_norm": 1.0623300075531006, "learning_rate": 0.001, "loss": 1.9351, "step": 283024 }, { "epoch": 24.420289855072465, "grad_norm": 3.9003710746765137, "learning_rate": 0.001, "loss": 1.9182, "step": 283080 }, { "epoch": 24.42512077294686, "grad_norm": 1.6495261192321777, "learning_rate": 0.001, "loss": 1.9318, "step": 283136 }, { "epoch": 24.429951690821255, "grad_norm": 0.9540917277336121, "learning_rate": 0.001, "loss": 1.9456, "step": 283192 }, { "epoch": 24.434782608695652, "grad_norm": 1.2446115016937256, "learning_rate": 0.001, "loss": 1.9324, "step": 283248 }, { "epoch": 24.43961352657005, "grad_norm": 0.6882824301719666, "learning_rate": 0.001, "loss": 1.9393, "step": 283304 }, { "epoch": 24.444444444444443, "grad_norm": 0.6733536720275879, "learning_rate": 0.001, "loss": 1.9462, "step": 283360 }, { "epoch": 24.44927536231884, "grad_norm": 1.2083462476730347, "learning_rate": 0.001, "loss": 1.9673, "step": 283416 }, { "epoch": 24.454106280193237, "grad_norm": 1.1044378280639648, "learning_rate": 0.001, "loss": 1.9504, "step": 283472 }, { "epoch": 24.458937198067634, "grad_norm": 2.113809108734131, "learning_rate": 0.001, "loss": 1.9504, "step": 283528 }, { "epoch": 24.463768115942027, "grad_norm": 0.47570937871932983, "learning_rate": 0.001, "loss": 1.9392, "step": 283584 }, { "epoch": 24.468599033816425, "grad_norm": 1.5000444650650024, "learning_rate": 0.001, "loss": 1.938, "step": 283640 }, { "epoch": 24.47342995169082, "grad_norm": 1.0407072305679321, "learning_rate": 0.001, "loss": 1.931, "step": 283696 }, { "epoch": 24.47826086956522, "grad_norm": 6.89382266998291, "learning_rate": 0.001, "loss": 1.9351, "step": 283752 }, { "epoch": 24.483091787439612, "grad_norm": 6.6253790855407715, "learning_rate": 0.001, "loss": 1.9307, "step": 283808 }, { "epoch": 24.48792270531401, "grad_norm": 2.150550365447998, "learning_rate": 0.001, "loss": 1.9321, "step": 283864 }, { "epoch": 24.492753623188406, "grad_norm": 1.5275256633758545, "learning_rate": 0.001, "loss": 1.9355, "step": 283920 }, { "epoch": 24.497584541062803, "grad_norm": 0.5016854405403137, "learning_rate": 0.001, "loss": 1.9506, "step": 283976 }, { "epoch": 24.502415458937197, "grad_norm": 0.7780137658119202, "learning_rate": 0.001, "loss": 1.9549, "step": 284032 }, { "epoch": 24.507246376811594, "grad_norm": 1.1209287643432617, "learning_rate": 0.001, "loss": 1.948, "step": 284088 }, { "epoch": 24.51207729468599, "grad_norm": 1.2572706937789917, "learning_rate": 0.001, "loss": 1.9412, "step": 284144 }, { "epoch": 24.516908212560388, "grad_norm": 2.046964406967163, "learning_rate": 0.001, "loss": 1.9412, "step": 284200 }, { "epoch": 24.52173913043478, "grad_norm": 1.0316088199615479, "learning_rate": 0.001, "loss": 1.9409, "step": 284256 }, { "epoch": 24.52657004830918, "grad_norm": 1.9627680778503418, "learning_rate": 0.001, "loss": 1.9466, "step": 284312 }, { "epoch": 24.531400966183575, "grad_norm": 3.5076000690460205, "learning_rate": 0.001, "loss": 1.9313, "step": 284368 }, { "epoch": 24.536231884057973, "grad_norm": 1.429106593132019, "learning_rate": 0.001, "loss": 1.9476, "step": 284424 }, { "epoch": 24.541062801932366, "grad_norm": 15.782718658447266, "learning_rate": 0.001, "loss": 1.9504, "step": 284480 }, { "epoch": 24.545893719806763, "grad_norm": 1.0051147937774658, "learning_rate": 0.001, "loss": 1.9585, "step": 284536 }, { "epoch": 24.55072463768116, "grad_norm": 1.2437424659729004, "learning_rate": 0.001, "loss": 1.9675, "step": 284592 }, { "epoch": 24.555555555555557, "grad_norm": 0.7899636626243591, "learning_rate": 0.001, "loss": 1.9781, "step": 284648 }, { "epoch": 24.56038647342995, "grad_norm": 1.4735686779022217, "learning_rate": 0.001, "loss": 1.9693, "step": 284704 }, { "epoch": 24.565217391304348, "grad_norm": 0.9253683090209961, "learning_rate": 0.001, "loss": 1.9705, "step": 284760 }, { "epoch": 24.570048309178745, "grad_norm": 1.17081618309021, "learning_rate": 0.001, "loss": 1.9686, "step": 284816 }, { "epoch": 24.57487922705314, "grad_norm": 0.8421620726585388, "learning_rate": 0.001, "loss": 1.9653, "step": 284872 }, { "epoch": 24.579710144927535, "grad_norm": 2.7979695796966553, "learning_rate": 0.001, "loss": 1.9644, "step": 284928 }, { "epoch": 24.584541062801932, "grad_norm": 0.5268198251724243, "learning_rate": 0.001, "loss": 1.9559, "step": 284984 }, { "epoch": 24.58937198067633, "grad_norm": 2.684612512588501, "learning_rate": 0.001, "loss": 1.9496, "step": 285040 }, { "epoch": 24.594202898550726, "grad_norm": 2.848207950592041, "learning_rate": 0.001, "loss": 1.9588, "step": 285096 }, { "epoch": 24.59903381642512, "grad_norm": 0.6829081773757935, "learning_rate": 0.001, "loss": 1.9425, "step": 285152 }, { "epoch": 24.603864734299517, "grad_norm": 0.4179551899433136, "learning_rate": 0.001, "loss": 1.9347, "step": 285208 }, { "epoch": 24.608695652173914, "grad_norm": 0.4337264597415924, "learning_rate": 0.001, "loss": 1.9362, "step": 285264 }, { "epoch": 24.613526570048307, "grad_norm": 0.8992215991020203, "learning_rate": 0.001, "loss": 1.9378, "step": 285320 }, { "epoch": 24.618357487922705, "grad_norm": 0.6492766737937927, "learning_rate": 0.001, "loss": 1.9284, "step": 285376 }, { "epoch": 24.6231884057971, "grad_norm": 1.1232877969741821, "learning_rate": 0.001, "loss": 1.9261, "step": 285432 }, { "epoch": 24.6280193236715, "grad_norm": 1.0397026538848877, "learning_rate": 0.001, "loss": 1.9288, "step": 285488 }, { "epoch": 24.632850241545892, "grad_norm": 0.74684077501297, "learning_rate": 0.001, "loss": 1.9283, "step": 285544 }, { "epoch": 24.63768115942029, "grad_norm": 0.8136125206947327, "learning_rate": 0.001, "loss": 1.9362, "step": 285600 }, { "epoch": 24.642512077294686, "grad_norm": 10.070075035095215, "learning_rate": 0.001, "loss": 1.9267, "step": 285656 }, { "epoch": 24.647342995169083, "grad_norm": 3.998183488845825, "learning_rate": 0.001, "loss": 1.9176, "step": 285712 }, { "epoch": 24.652173913043477, "grad_norm": 1.1154167652130127, "learning_rate": 0.001, "loss": 1.9308, "step": 285768 }, { "epoch": 24.657004830917874, "grad_norm": 3.9229440689086914, "learning_rate": 0.001, "loss": 1.9356, "step": 285824 }, { "epoch": 24.66183574879227, "grad_norm": 1.5403419733047485, "learning_rate": 0.001, "loss": 1.9296, "step": 285880 }, { "epoch": 24.666666666666668, "grad_norm": 0.8178623914718628, "learning_rate": 0.001, "loss": 1.9296, "step": 285936 }, { "epoch": 24.67149758454106, "grad_norm": 5.438370704650879, "learning_rate": 0.001, "loss": 1.9276, "step": 285992 }, { "epoch": 24.67632850241546, "grad_norm": 1.1394742727279663, "learning_rate": 0.001, "loss": 1.9357, "step": 286048 }, { "epoch": 24.681159420289855, "grad_norm": 1.0972161293029785, "learning_rate": 0.001, "loss": 1.9366, "step": 286104 }, { "epoch": 24.685990338164252, "grad_norm": 3.26716685295105, "learning_rate": 0.001, "loss": 1.9377, "step": 286160 }, { "epoch": 24.690821256038646, "grad_norm": 1.0800268650054932, "learning_rate": 0.001, "loss": 1.928, "step": 286216 }, { "epoch": 24.695652173913043, "grad_norm": 0.8661267757415771, "learning_rate": 0.001, "loss": 1.9326, "step": 286272 }, { "epoch": 24.70048309178744, "grad_norm": 0.5988258123397827, "learning_rate": 0.001, "loss": 1.9285, "step": 286328 }, { "epoch": 24.705314009661837, "grad_norm": 0.5859854221343994, "learning_rate": 0.001, "loss": 1.9302, "step": 286384 }, { "epoch": 24.71014492753623, "grad_norm": 1.8245928287506104, "learning_rate": 0.001, "loss": 1.9379, "step": 286440 }, { "epoch": 24.714975845410628, "grad_norm": 0.36968106031417847, "learning_rate": 0.001, "loss": 1.9435, "step": 286496 }, { "epoch": 24.719806763285025, "grad_norm": 0.6545636653900146, "learning_rate": 0.001, "loss": 1.9397, "step": 286552 }, { "epoch": 24.72463768115942, "grad_norm": 1.5007156133651733, "learning_rate": 0.001, "loss": 1.936, "step": 286608 }, { "epoch": 24.729468599033815, "grad_norm": 1.6536756753921509, "learning_rate": 0.001, "loss": 1.9341, "step": 286664 }, { "epoch": 24.734299516908212, "grad_norm": 0.7244811058044434, "learning_rate": 0.001, "loss": 1.9329, "step": 286720 }, { "epoch": 24.73913043478261, "grad_norm": 1.6437448263168335, "learning_rate": 0.001, "loss": 1.929, "step": 286776 }, { "epoch": 24.743961352657006, "grad_norm": 0.5581583380699158, "learning_rate": 0.001, "loss": 1.9384, "step": 286832 }, { "epoch": 24.7487922705314, "grad_norm": 0.5002501606941223, "learning_rate": 0.001, "loss": 1.9299, "step": 286888 }, { "epoch": 24.753623188405797, "grad_norm": 0.6342955231666565, "learning_rate": 0.001, "loss": 1.9325, "step": 286944 }, { "epoch": 24.758454106280194, "grad_norm": 0.7209481596946716, "learning_rate": 0.001, "loss": 1.9363, "step": 287000 }, { "epoch": 24.76328502415459, "grad_norm": 0.4335455000400543, "learning_rate": 0.001, "loss": 1.9333, "step": 287056 }, { "epoch": 24.768115942028984, "grad_norm": 1.286346435546875, "learning_rate": 0.001, "loss": 1.9411, "step": 287112 }, { "epoch": 24.77294685990338, "grad_norm": 3.266732692718506, "learning_rate": 0.001, "loss": 1.9312, "step": 287168 }, { "epoch": 24.77777777777778, "grad_norm": 0.4348663091659546, "learning_rate": 0.001, "loss": 1.9389, "step": 287224 }, { "epoch": 24.782608695652176, "grad_norm": 1.258651614189148, "learning_rate": 0.001, "loss": 1.9349, "step": 287280 }, { "epoch": 24.78743961352657, "grad_norm": 0.5606732368469238, "learning_rate": 0.001, "loss": 1.9324, "step": 287336 }, { "epoch": 24.792270531400966, "grad_norm": 0.7041652202606201, "learning_rate": 0.001, "loss": 1.9283, "step": 287392 }, { "epoch": 24.797101449275363, "grad_norm": 0.34374329447746277, "learning_rate": 0.001, "loss": 1.9427, "step": 287448 }, { "epoch": 24.80193236714976, "grad_norm": 0.5982248783111572, "learning_rate": 0.001, "loss": 1.938, "step": 287504 }, { "epoch": 24.806763285024154, "grad_norm": 1.4653364419937134, "learning_rate": 0.001, "loss": 1.936, "step": 287560 }, { "epoch": 24.81159420289855, "grad_norm": 1.584633231163025, "learning_rate": 0.001, "loss": 1.9471, "step": 287616 }, { "epoch": 24.816425120772948, "grad_norm": 1.4942760467529297, "learning_rate": 0.001, "loss": 1.9411, "step": 287672 }, { "epoch": 24.82125603864734, "grad_norm": 2.931358814239502, "learning_rate": 0.001, "loss": 1.9511, "step": 287728 }, { "epoch": 24.82608695652174, "grad_norm": 0.7946625351905823, "learning_rate": 0.001, "loss": 1.9392, "step": 287784 }, { "epoch": 24.830917874396135, "grad_norm": 0.9004411697387695, "learning_rate": 0.001, "loss": 1.9463, "step": 287840 }, { "epoch": 24.835748792270532, "grad_norm": 0.44295451045036316, "learning_rate": 0.001, "loss": 1.9381, "step": 287896 }, { "epoch": 24.840579710144926, "grad_norm": 0.8131876587867737, "learning_rate": 0.001, "loss": 1.936, "step": 287952 }, { "epoch": 24.845410628019323, "grad_norm": 0.8626725077629089, "learning_rate": 0.001, "loss": 1.9352, "step": 288008 }, { "epoch": 24.85024154589372, "grad_norm": 0.4049046039581299, "learning_rate": 0.001, "loss": 1.9373, "step": 288064 }, { "epoch": 24.855072463768117, "grad_norm": 0.6616743803024292, "learning_rate": 0.001, "loss": 1.9535, "step": 288120 }, { "epoch": 24.85990338164251, "grad_norm": 5.975124835968018, "learning_rate": 0.001, "loss": 1.9412, "step": 288176 }, { "epoch": 24.864734299516908, "grad_norm": 1.0922489166259766, "learning_rate": 0.001, "loss": 1.9281, "step": 288232 }, { "epoch": 24.869565217391305, "grad_norm": 1.129918098449707, "learning_rate": 0.001, "loss": 1.9297, "step": 288288 }, { "epoch": 24.8743961352657, "grad_norm": 0.7542330622673035, "learning_rate": 0.001, "loss": 1.9355, "step": 288344 }, { "epoch": 24.879227053140095, "grad_norm": 1.5211526155471802, "learning_rate": 0.001, "loss": 1.9324, "step": 288400 }, { "epoch": 24.884057971014492, "grad_norm": 1.161846399307251, "learning_rate": 0.001, "loss": 1.9313, "step": 288456 }, { "epoch": 24.88888888888889, "grad_norm": 0.9829748272895813, "learning_rate": 0.001, "loss": 1.9246, "step": 288512 }, { "epoch": 24.893719806763286, "grad_norm": 0.3879932761192322, "learning_rate": 0.001, "loss": 1.9312, "step": 288568 }, { "epoch": 24.89855072463768, "grad_norm": 2.8497977256774902, "learning_rate": 0.001, "loss": 1.9349, "step": 288624 }, { "epoch": 24.903381642512077, "grad_norm": 0.8585650324821472, "learning_rate": 0.001, "loss": 1.9372, "step": 288680 }, { "epoch": 24.908212560386474, "grad_norm": 0.4140551686286926, "learning_rate": 0.001, "loss": 1.9396, "step": 288736 }, { "epoch": 24.91304347826087, "grad_norm": 0.8719210028648376, "learning_rate": 0.001, "loss": 1.9389, "step": 288792 }, { "epoch": 24.917874396135264, "grad_norm": 1.5678306818008423, "learning_rate": 0.001, "loss": 1.9254, "step": 288848 }, { "epoch": 24.92270531400966, "grad_norm": 0.6155852675437927, "learning_rate": 0.001, "loss": 1.9267, "step": 288904 }, { "epoch": 24.92753623188406, "grad_norm": 1.3455864191055298, "learning_rate": 0.001, "loss": 1.9346, "step": 288960 }, { "epoch": 24.932367149758456, "grad_norm": 0.5619357824325562, "learning_rate": 0.001, "loss": 1.9422, "step": 289016 }, { "epoch": 24.93719806763285, "grad_norm": 1.0533734560012817, "learning_rate": 0.001, "loss": 1.9363, "step": 289072 }, { "epoch": 24.942028985507246, "grad_norm": 0.5143280029296875, "learning_rate": 0.001, "loss": 1.9376, "step": 289128 }, { "epoch": 24.946859903381643, "grad_norm": 3.120244026184082, "learning_rate": 0.001, "loss": 1.9342, "step": 289184 }, { "epoch": 24.95169082125604, "grad_norm": 0.6211585402488708, "learning_rate": 0.001, "loss": 1.937, "step": 289240 }, { "epoch": 24.956521739130434, "grad_norm": 0.7097315192222595, "learning_rate": 0.001, "loss": 1.934, "step": 289296 }, { "epoch": 24.96135265700483, "grad_norm": 0.3881317377090454, "learning_rate": 0.001, "loss": 1.9356, "step": 289352 }, { "epoch": 24.966183574879228, "grad_norm": 0.4759860634803772, "learning_rate": 0.001, "loss": 1.9244, "step": 289408 }, { "epoch": 24.971014492753625, "grad_norm": 0.5969966053962708, "learning_rate": 0.001, "loss": 1.919, "step": 289464 }, { "epoch": 24.97584541062802, "grad_norm": 1.1887179613113403, "learning_rate": 0.001, "loss": 1.9184, "step": 289520 }, { "epoch": 24.980676328502415, "grad_norm": 0.37710118293762207, "learning_rate": 0.001, "loss": 1.9266, "step": 289576 }, { "epoch": 24.985507246376812, "grad_norm": 0.3579617738723755, "learning_rate": 0.001, "loss": 1.9223, "step": 289632 }, { "epoch": 24.990338164251206, "grad_norm": 2.7368972301483154, "learning_rate": 0.001, "loss": 1.9329, "step": 289688 }, { "epoch": 24.995169082125603, "grad_norm": 0.5299686789512634, "learning_rate": 0.001, "loss": 1.9329, "step": 289744 }, { "epoch": 25.0, "grad_norm": 0.6297286152839661, "learning_rate": 0.001, "loss": 1.9429, "step": 289800 }, { "epoch": 25.004830917874397, "grad_norm": 0.4525673985481262, "learning_rate": 0.001, "loss": 1.8939, "step": 289856 }, { "epoch": 25.00966183574879, "grad_norm": 1.337429404258728, "learning_rate": 0.001, "loss": 1.8973, "step": 289912 }, { "epoch": 25.014492753623188, "grad_norm": 0.42868876457214355, "learning_rate": 0.001, "loss": 1.8883, "step": 289968 }, { "epoch": 25.019323671497585, "grad_norm": 0.3213861286640167, "learning_rate": 0.001, "loss": 1.8896, "step": 290024 }, { "epoch": 25.02415458937198, "grad_norm": 0.9243196845054626, "learning_rate": 0.001, "loss": 1.8925, "step": 290080 }, { "epoch": 25.028985507246375, "grad_norm": 0.7700756192207336, "learning_rate": 0.001, "loss": 1.894, "step": 290136 }, { "epoch": 25.033816425120772, "grad_norm": 0.37991103529930115, "learning_rate": 0.001, "loss": 1.8958, "step": 290192 }, { "epoch": 25.03864734299517, "grad_norm": 0.6052849888801575, "learning_rate": 0.001, "loss": 1.9125, "step": 290248 }, { "epoch": 25.043478260869566, "grad_norm": 0.5753774046897888, "learning_rate": 0.001, "loss": 1.9022, "step": 290304 }, { "epoch": 25.04830917874396, "grad_norm": 0.8124651312828064, "learning_rate": 0.001, "loss": 1.9048, "step": 290360 }, { "epoch": 25.053140096618357, "grad_norm": 0.7410440444946289, "learning_rate": 0.001, "loss": 1.8953, "step": 290416 }, { "epoch": 25.057971014492754, "grad_norm": 0.8376240134239197, "learning_rate": 0.001, "loss": 1.8952, "step": 290472 }, { "epoch": 25.06280193236715, "grad_norm": 0.5952937006950378, "learning_rate": 0.001, "loss": 1.9086, "step": 290528 }, { "epoch": 25.067632850241544, "grad_norm": 0.49595123529434204, "learning_rate": 0.001, "loss": 1.9099, "step": 290584 }, { "epoch": 25.07246376811594, "grad_norm": 0.9433848261833191, "learning_rate": 0.001, "loss": 1.8922, "step": 290640 }, { "epoch": 25.07729468599034, "grad_norm": 0.9255042672157288, "learning_rate": 0.001, "loss": 1.8915, "step": 290696 }, { "epoch": 25.082125603864736, "grad_norm": 0.42907917499542236, "learning_rate": 0.001, "loss": 1.8935, "step": 290752 }, { "epoch": 25.08695652173913, "grad_norm": 0.4167092740535736, "learning_rate": 0.001, "loss": 1.8914, "step": 290808 }, { "epoch": 25.091787439613526, "grad_norm": 0.45365434885025024, "learning_rate": 0.001, "loss": 1.8861, "step": 290864 }, { "epoch": 25.096618357487923, "grad_norm": 0.7556871175765991, "learning_rate": 0.001, "loss": 1.9013, "step": 290920 }, { "epoch": 25.10144927536232, "grad_norm": 0.319809228181839, "learning_rate": 0.001, "loss": 1.895, "step": 290976 }, { "epoch": 25.106280193236714, "grad_norm": 0.7540002465248108, "learning_rate": 0.001, "loss": 1.8959, "step": 291032 }, { "epoch": 25.11111111111111, "grad_norm": 0.5418335199356079, "learning_rate": 0.001, "loss": 1.8948, "step": 291088 }, { "epoch": 25.115942028985508, "grad_norm": 0.7253740429878235, "learning_rate": 0.001, "loss": 1.8994, "step": 291144 }, { "epoch": 25.120772946859905, "grad_norm": 0.6484837532043457, "learning_rate": 0.001, "loss": 1.9009, "step": 291200 }, { "epoch": 25.1256038647343, "grad_norm": 0.8925520777702332, "learning_rate": 0.001, "loss": 1.8945, "step": 291256 }, { "epoch": 25.130434782608695, "grad_norm": 0.9301453828811646, "learning_rate": 0.001, "loss": 1.8928, "step": 291312 }, { "epoch": 25.135265700483092, "grad_norm": 3.4035086631774902, "learning_rate": 0.001, "loss": 1.8902, "step": 291368 }, { "epoch": 25.14009661835749, "grad_norm": 0.4157479405403137, "learning_rate": 0.001, "loss": 1.8846, "step": 291424 }, { "epoch": 25.144927536231883, "grad_norm": 0.5445131063461304, "learning_rate": 0.001, "loss": 1.8868, "step": 291480 }, { "epoch": 25.14975845410628, "grad_norm": 0.27865248918533325, "learning_rate": 0.001, "loss": 1.9009, "step": 291536 }, { "epoch": 25.154589371980677, "grad_norm": 0.3499648869037628, "learning_rate": 0.001, "loss": 1.8931, "step": 291592 }, { "epoch": 25.159420289855074, "grad_norm": 0.4969694912433624, "learning_rate": 0.001, "loss": 1.9101, "step": 291648 }, { "epoch": 25.164251207729468, "grad_norm": 0.29891514778137207, "learning_rate": 0.001, "loss": 1.9218, "step": 291704 }, { "epoch": 25.169082125603865, "grad_norm": 8.23755168914795, "learning_rate": 0.001, "loss": 1.9258, "step": 291760 }, { "epoch": 25.17391304347826, "grad_norm": 2.853146553039551, "learning_rate": 0.001, "loss": 1.9217, "step": 291816 }, { "epoch": 25.17874396135266, "grad_norm": 1.3786201477050781, "learning_rate": 0.001, "loss": 1.9288, "step": 291872 }, { "epoch": 25.183574879227052, "grad_norm": 0.5281625390052795, "learning_rate": 0.001, "loss": 1.9213, "step": 291928 }, { "epoch": 25.18840579710145, "grad_norm": 1.1346683502197266, "learning_rate": 0.001, "loss": 1.9212, "step": 291984 }, { "epoch": 25.193236714975846, "grad_norm": 0.6556458473205566, "learning_rate": 0.001, "loss": 1.9116, "step": 292040 }, { "epoch": 25.19806763285024, "grad_norm": 0.6012808680534363, "learning_rate": 0.001, "loss": 1.9088, "step": 292096 }, { "epoch": 25.202898550724637, "grad_norm": 0.9514915347099304, "learning_rate": 0.001, "loss": 1.9028, "step": 292152 }, { "epoch": 25.207729468599034, "grad_norm": 0.9847424030303955, "learning_rate": 0.001, "loss": 1.903, "step": 292208 }, { "epoch": 25.21256038647343, "grad_norm": 0.44935745000839233, "learning_rate": 0.001, "loss": 1.9132, "step": 292264 }, { "epoch": 25.217391304347824, "grad_norm": 4.4888105392456055, "learning_rate": 0.001, "loss": 1.9125, "step": 292320 }, { "epoch": 25.22222222222222, "grad_norm": 0.7369659543037415, "learning_rate": 0.001, "loss": 1.9117, "step": 292376 }, { "epoch": 25.22705314009662, "grad_norm": 14.997528076171875, "learning_rate": 0.001, "loss": 1.9065, "step": 292432 }, { "epoch": 25.231884057971016, "grad_norm": 0.8875183463096619, "learning_rate": 0.001, "loss": 1.905, "step": 292488 }, { "epoch": 25.23671497584541, "grad_norm": 7.130239009857178, "learning_rate": 0.001, "loss": 1.8985, "step": 292544 }, { "epoch": 25.241545893719806, "grad_norm": 0.9772172570228577, "learning_rate": 0.001, "loss": 1.8985, "step": 292600 }, { "epoch": 25.246376811594203, "grad_norm": 2.7604634761810303, "learning_rate": 0.001, "loss": 1.9017, "step": 292656 }, { "epoch": 25.2512077294686, "grad_norm": 1.0480865240097046, "learning_rate": 0.001, "loss": 1.9028, "step": 292712 }, { "epoch": 25.256038647342994, "grad_norm": 0.4690069556236267, "learning_rate": 0.001, "loss": 1.9113, "step": 292768 }, { "epoch": 25.26086956521739, "grad_norm": 1.7872720956802368, "learning_rate": 0.001, "loss": 1.9223, "step": 292824 }, { "epoch": 25.265700483091788, "grad_norm": 1.577032446861267, "learning_rate": 0.001, "loss": 1.9093, "step": 292880 }, { "epoch": 25.270531400966185, "grad_norm": 0.27479788661003113, "learning_rate": 0.001, "loss": 1.9037, "step": 292936 }, { "epoch": 25.27536231884058, "grad_norm": 1.9263094663619995, "learning_rate": 0.001, "loss": 1.9083, "step": 292992 }, { "epoch": 25.280193236714975, "grad_norm": 0.5642146468162537, "learning_rate": 0.001, "loss": 1.9015, "step": 293048 }, { "epoch": 25.285024154589372, "grad_norm": 2.787116289138794, "learning_rate": 0.001, "loss": 1.9051, "step": 293104 }, { "epoch": 25.28985507246377, "grad_norm": 3.6648001670837402, "learning_rate": 0.001, "loss": 1.9044, "step": 293160 }, { "epoch": 25.294685990338163, "grad_norm": 1.8598066568374634, "learning_rate": 0.001, "loss": 1.9179, "step": 293216 }, { "epoch": 25.29951690821256, "grad_norm": 0.5635346174240112, "learning_rate": 0.001, "loss": 1.925, "step": 293272 }, { "epoch": 25.304347826086957, "grad_norm": 0.408155620098114, "learning_rate": 0.001, "loss": 1.895, "step": 293328 }, { "epoch": 25.309178743961354, "grad_norm": 2.83010196685791, "learning_rate": 0.001, "loss": 1.8942, "step": 293384 }, { "epoch": 25.314009661835748, "grad_norm": 0.6202123165130615, "learning_rate": 0.001, "loss": 1.8963, "step": 293440 }, { "epoch": 25.318840579710145, "grad_norm": 0.7521714568138123, "learning_rate": 0.001, "loss": 1.9047, "step": 293496 }, { "epoch": 25.32367149758454, "grad_norm": 1.0924556255340576, "learning_rate": 0.001, "loss": 1.8953, "step": 293552 }, { "epoch": 25.32850241545894, "grad_norm": 0.4207269549369812, "learning_rate": 0.001, "loss": 1.8974, "step": 293608 }, { "epoch": 25.333333333333332, "grad_norm": 1.9933527708053589, "learning_rate": 0.001, "loss": 1.8875, "step": 293664 }, { "epoch": 25.33816425120773, "grad_norm": 1.5796427726745605, "learning_rate": 0.001, "loss": 1.8965, "step": 293720 }, { "epoch": 25.342995169082126, "grad_norm": 3.365229368209839, "learning_rate": 0.001, "loss": 1.9063, "step": 293776 }, { "epoch": 25.347826086956523, "grad_norm": 0.43115368485450745, "learning_rate": 0.001, "loss": 1.9114, "step": 293832 }, { "epoch": 25.352657004830917, "grad_norm": 4.12042236328125, "learning_rate": 0.001, "loss": 1.8997, "step": 293888 }, { "epoch": 25.357487922705314, "grad_norm": 1.3762151002883911, "learning_rate": 0.001, "loss": 1.9046, "step": 293944 }, { "epoch": 25.36231884057971, "grad_norm": 0.4094729721546173, "learning_rate": 0.001, "loss": 1.9083, "step": 294000 }, { "epoch": 25.367149758454108, "grad_norm": 0.7817860245704651, "learning_rate": 0.001, "loss": 1.9004, "step": 294056 }, { "epoch": 25.3719806763285, "grad_norm": 0.6060207486152649, "learning_rate": 0.001, "loss": 1.904, "step": 294112 }, { "epoch": 25.3768115942029, "grad_norm": 1.3999605178833008, "learning_rate": 0.001, "loss": 1.913, "step": 294168 }, { "epoch": 25.381642512077295, "grad_norm": 1.6263113021850586, "learning_rate": 0.001, "loss": 1.9202, "step": 294224 }, { "epoch": 25.386473429951693, "grad_norm": 2.385714292526245, "learning_rate": 0.001, "loss": 1.9287, "step": 294280 }, { "epoch": 25.391304347826086, "grad_norm": 1.8584405183792114, "learning_rate": 0.001, "loss": 1.9301, "step": 294336 }, { "epoch": 25.396135265700483, "grad_norm": 0.6920762658119202, "learning_rate": 0.001, "loss": 1.9165, "step": 294392 }, { "epoch": 25.40096618357488, "grad_norm": 0.5580245852470398, "learning_rate": 0.001, "loss": 1.9099, "step": 294448 }, { "epoch": 25.405797101449274, "grad_norm": 0.687025249004364, "learning_rate": 0.001, "loss": 1.9164, "step": 294504 }, { "epoch": 25.41062801932367, "grad_norm": 1.1209429502487183, "learning_rate": 0.001, "loss": 1.9314, "step": 294560 }, { "epoch": 25.415458937198068, "grad_norm": 0.8178371787071228, "learning_rate": 0.001, "loss": 1.9285, "step": 294616 }, { "epoch": 25.420289855072465, "grad_norm": 0.632466733455658, "learning_rate": 0.001, "loss": 1.9308, "step": 294672 }, { "epoch": 25.42512077294686, "grad_norm": 0.37882667779922485, "learning_rate": 0.001, "loss": 1.9216, "step": 294728 }, { "epoch": 25.429951690821255, "grad_norm": 0.672427773475647, "learning_rate": 0.001, "loss": 1.9182, "step": 294784 }, { "epoch": 25.434782608695652, "grad_norm": 1.0975173711776733, "learning_rate": 0.001, "loss": 1.9061, "step": 294840 }, { "epoch": 25.43961352657005, "grad_norm": 0.32201433181762695, "learning_rate": 0.001, "loss": 1.9118, "step": 294896 }, { "epoch": 25.444444444444443, "grad_norm": 0.39145660400390625, "learning_rate": 0.001, "loss": 1.9092, "step": 294952 }, { "epoch": 25.44927536231884, "grad_norm": 1.644083023071289, "learning_rate": 0.001, "loss": 1.9003, "step": 295008 }, { "epoch": 25.454106280193237, "grad_norm": 1.29334557056427, "learning_rate": 0.001, "loss": 1.9071, "step": 295064 }, { "epoch": 25.458937198067634, "grad_norm": 0.38255631923675537, "learning_rate": 0.001, "loss": 1.9079, "step": 295120 }, { "epoch": 25.463768115942027, "grad_norm": 0.27681881189346313, "learning_rate": 0.001, "loss": 1.9023, "step": 295176 }, { "epoch": 25.468599033816425, "grad_norm": 0.4592108726501465, "learning_rate": 0.001, "loss": 1.9097, "step": 295232 }, { "epoch": 25.47342995169082, "grad_norm": 3.836824655532837, "learning_rate": 0.001, "loss": 1.902, "step": 295288 }, { "epoch": 25.47826086956522, "grad_norm": 0.4101463258266449, "learning_rate": 0.001, "loss": 1.9092, "step": 295344 }, { "epoch": 25.483091787439612, "grad_norm": 0.47329702973365784, "learning_rate": 0.001, "loss": 1.9131, "step": 295400 }, { "epoch": 25.48792270531401, "grad_norm": 0.8570764660835266, "learning_rate": 0.001, "loss": 1.9215, "step": 295456 }, { "epoch": 25.492753623188406, "grad_norm": 4.283381462097168, "learning_rate": 0.001, "loss": 1.9173, "step": 295512 }, { "epoch": 25.497584541062803, "grad_norm": 1.0258461236953735, "learning_rate": 0.001, "loss": 1.9145, "step": 295568 }, { "epoch": 25.502415458937197, "grad_norm": 0.6075412631034851, "learning_rate": 0.001, "loss": 1.9045, "step": 295624 }, { "epoch": 25.507246376811594, "grad_norm": 0.3578660190105438, "learning_rate": 0.001, "loss": 1.9027, "step": 295680 }, { "epoch": 25.51207729468599, "grad_norm": 1.0285652875900269, "learning_rate": 0.001, "loss": 1.9069, "step": 295736 }, { "epoch": 25.516908212560388, "grad_norm": 0.3425889313220978, "learning_rate": 0.001, "loss": 1.9039, "step": 295792 }, { "epoch": 25.52173913043478, "grad_norm": 0.7712044715881348, "learning_rate": 0.001, "loss": 1.9079, "step": 295848 }, { "epoch": 25.52657004830918, "grad_norm": 1.4006562232971191, "learning_rate": 0.001, "loss": 1.9029, "step": 295904 }, { "epoch": 25.531400966183575, "grad_norm": 1.7159932851791382, "learning_rate": 0.001, "loss": 1.9061, "step": 295960 }, { "epoch": 25.536231884057973, "grad_norm": 0.2915489673614502, "learning_rate": 0.001, "loss": 1.9097, "step": 296016 }, { "epoch": 25.541062801932366, "grad_norm": 0.7826337218284607, "learning_rate": 0.001, "loss": 1.9149, "step": 296072 }, { "epoch": 25.545893719806763, "grad_norm": 1.243208408355713, "learning_rate": 0.001, "loss": 1.9084, "step": 296128 }, { "epoch": 25.55072463768116, "grad_norm": 0.7755004167556763, "learning_rate": 0.001, "loss": 1.9004, "step": 296184 }, { "epoch": 25.555555555555557, "grad_norm": 3.1196846961975098, "learning_rate": 0.001, "loss": 1.9033, "step": 296240 }, { "epoch": 25.56038647342995, "grad_norm": 0.38815218210220337, "learning_rate": 0.001, "loss": 1.9113, "step": 296296 }, { "epoch": 25.565217391304348, "grad_norm": 0.6186689734458923, "learning_rate": 0.001, "loss": 1.9013, "step": 296352 }, { "epoch": 25.570048309178745, "grad_norm": 0.7853377461433411, "learning_rate": 0.001, "loss": 1.9104, "step": 296408 }, { "epoch": 25.57487922705314, "grad_norm": 2.117340087890625, "learning_rate": 0.001, "loss": 1.9078, "step": 296464 }, { "epoch": 25.579710144927535, "grad_norm": 0.3405117392539978, "learning_rate": 0.001, "loss": 1.906, "step": 296520 }, { "epoch": 25.584541062801932, "grad_norm": 1.4066188335418701, "learning_rate": 0.001, "loss": 1.9063, "step": 296576 }, { "epoch": 25.58937198067633, "grad_norm": 2.6741034984588623, "learning_rate": 0.001, "loss": 1.9059, "step": 296632 }, { "epoch": 25.594202898550726, "grad_norm": 0.9391655921936035, "learning_rate": 0.001, "loss": 1.9018, "step": 296688 }, { "epoch": 25.59903381642512, "grad_norm": 0.38697245717048645, "learning_rate": 0.001, "loss": 1.9045, "step": 296744 }, { "epoch": 25.603864734299517, "grad_norm": 1.3267372846603394, "learning_rate": 0.001, "loss": 1.9107, "step": 296800 }, { "epoch": 25.608695652173914, "grad_norm": 0.345544695854187, "learning_rate": 0.001, "loss": 1.9083, "step": 296856 }, { "epoch": 25.613526570048307, "grad_norm": 0.32526302337646484, "learning_rate": 0.001, "loss": 1.9014, "step": 296912 }, { "epoch": 25.618357487922705, "grad_norm": 0.3191330134868622, "learning_rate": 0.001, "loss": 1.9017, "step": 296968 }, { "epoch": 25.6231884057971, "grad_norm": 0.5620341897010803, "learning_rate": 0.001, "loss": 1.9068, "step": 297024 }, { "epoch": 25.6280193236715, "grad_norm": 0.31250351667404175, "learning_rate": 0.001, "loss": 1.9124, "step": 297080 }, { "epoch": 25.632850241545892, "grad_norm": 0.7494297027587891, "learning_rate": 0.001, "loss": 1.9064, "step": 297136 }, { "epoch": 25.63768115942029, "grad_norm": 0.6574238538742065, "learning_rate": 0.001, "loss": 1.908, "step": 297192 }, { "epoch": 25.642512077294686, "grad_norm": 2.87058687210083, "learning_rate": 0.001, "loss": 1.9047, "step": 297248 }, { "epoch": 25.647342995169083, "grad_norm": 0.4257795512676239, "learning_rate": 0.001, "loss": 1.9079, "step": 297304 }, { "epoch": 25.652173913043477, "grad_norm": 0.6079059839248657, "learning_rate": 0.001, "loss": 1.9051, "step": 297360 }, { "epoch": 25.657004830917874, "grad_norm": 0.8318992257118225, "learning_rate": 0.001, "loss": 1.8974, "step": 297416 }, { "epoch": 25.66183574879227, "grad_norm": 1.1041759252548218, "learning_rate": 0.001, "loss": 1.8898, "step": 297472 }, { "epoch": 25.666666666666668, "grad_norm": 0.3551042675971985, "learning_rate": 0.001, "loss": 1.8893, "step": 297528 }, { "epoch": 25.67149758454106, "grad_norm": 0.3162704408168793, "learning_rate": 0.001, "loss": 1.9046, "step": 297584 }, { "epoch": 25.67632850241546, "grad_norm": 0.29571276903152466, "learning_rate": 0.001, "loss": 1.912, "step": 297640 }, { "epoch": 25.681159420289855, "grad_norm": 0.36899498105049133, "learning_rate": 0.001, "loss": 1.8996, "step": 297696 }, { "epoch": 25.685990338164252, "grad_norm": 0.6283080577850342, "learning_rate": 0.001, "loss": 1.8975, "step": 297752 }, { "epoch": 25.690821256038646, "grad_norm": 1.0238908529281616, "learning_rate": 0.001, "loss": 1.8969, "step": 297808 }, { "epoch": 25.695652173913043, "grad_norm": 0.41631507873535156, "learning_rate": 0.001, "loss": 1.9119, "step": 297864 }, { "epoch": 25.70048309178744, "grad_norm": 13.57119083404541, "learning_rate": 0.001, "loss": 1.9105, "step": 297920 }, { "epoch": 25.705314009661837, "grad_norm": 1.7018839120864868, "learning_rate": 0.001, "loss": 1.8944, "step": 297976 }, { "epoch": 25.71014492753623, "grad_norm": 1.874880313873291, "learning_rate": 0.001, "loss": 1.9113, "step": 298032 }, { "epoch": 25.714975845410628, "grad_norm": 0.8662129640579224, "learning_rate": 0.001, "loss": 1.8973, "step": 298088 }, { "epoch": 25.719806763285025, "grad_norm": 0.40759047865867615, "learning_rate": 0.001, "loss": 1.9034, "step": 298144 }, { "epoch": 25.72463768115942, "grad_norm": 0.49374592304229736, "learning_rate": 0.001, "loss": 1.9078, "step": 298200 }, { "epoch": 25.729468599033815, "grad_norm": 0.6400483846664429, "learning_rate": 0.001, "loss": 1.9147, "step": 298256 }, { "epoch": 25.734299516908212, "grad_norm": 0.3507595658302307, "learning_rate": 0.001, "loss": 1.9139, "step": 298312 }, { "epoch": 25.73913043478261, "grad_norm": 0.5335170030593872, "learning_rate": 0.001, "loss": 1.9047, "step": 298368 }, { "epoch": 25.743961352657006, "grad_norm": 0.3188813626766205, "learning_rate": 0.001, "loss": 1.8995, "step": 298424 }, { "epoch": 25.7487922705314, "grad_norm": 0.2959311306476593, "learning_rate": 0.001, "loss": 1.9036, "step": 298480 }, { "epoch": 25.753623188405797, "grad_norm": 0.4437108337879181, "learning_rate": 0.001, "loss": 1.8933, "step": 298536 }, { "epoch": 25.758454106280194, "grad_norm": 0.3364206552505493, "learning_rate": 0.001, "loss": 1.8986, "step": 298592 }, { "epoch": 25.76328502415459, "grad_norm": 0.6662651300430298, "learning_rate": 0.001, "loss": 1.9046, "step": 298648 }, { "epoch": 25.768115942028984, "grad_norm": 3.5131893157958984, "learning_rate": 0.001, "loss": 1.9039, "step": 298704 }, { "epoch": 25.77294685990338, "grad_norm": 0.42375093698501587, "learning_rate": 0.001, "loss": 1.8917, "step": 298760 }, { "epoch": 25.77777777777778, "grad_norm": 0.5596851110458374, "learning_rate": 0.001, "loss": 1.8976, "step": 298816 }, { "epoch": 25.782608695652176, "grad_norm": 1.331814169883728, "learning_rate": 0.001, "loss": 1.9112, "step": 298872 }, { "epoch": 25.78743961352657, "grad_norm": 0.44547462463378906, "learning_rate": 0.001, "loss": 1.905, "step": 298928 }, { "epoch": 25.792270531400966, "grad_norm": 5.6350507736206055, "learning_rate": 0.001, "loss": 1.913, "step": 298984 }, { "epoch": 25.797101449275363, "grad_norm": 0.7228078842163086, "learning_rate": 0.001, "loss": 1.9269, "step": 299040 }, { "epoch": 25.80193236714976, "grad_norm": 1.2135993242263794, "learning_rate": 0.001, "loss": 1.9211, "step": 299096 }, { "epoch": 25.806763285024154, "grad_norm": 1.1408370733261108, "learning_rate": 0.001, "loss": 1.9235, "step": 299152 }, { "epoch": 25.81159420289855, "grad_norm": 0.7281011939048767, "learning_rate": 0.001, "loss": 1.914, "step": 299208 }, { "epoch": 25.816425120772948, "grad_norm": 0.4898208677768707, "learning_rate": 0.001, "loss": 1.8962, "step": 299264 }, { "epoch": 25.82125603864734, "grad_norm": 1.246077537536621, "learning_rate": 0.001, "loss": 1.9248, "step": 299320 }, { "epoch": 25.82608695652174, "grad_norm": 2.373201847076416, "learning_rate": 0.001, "loss": 1.9255, "step": 299376 }, { "epoch": 25.830917874396135, "grad_norm": 1.2950632572174072, "learning_rate": 0.001, "loss": 1.9141, "step": 299432 }, { "epoch": 25.835748792270532, "grad_norm": 0.3583345413208008, "learning_rate": 0.001, "loss": 1.9208, "step": 299488 }, { "epoch": 25.840579710144926, "grad_norm": 0.7596715688705444, "learning_rate": 0.001, "loss": 1.9065, "step": 299544 }, { "epoch": 25.845410628019323, "grad_norm": 0.3765631914138794, "learning_rate": 0.001, "loss": 1.9013, "step": 299600 }, { "epoch": 25.85024154589372, "grad_norm": 0.3842770755290985, "learning_rate": 0.001, "loss": 1.914, "step": 299656 }, { "epoch": 25.855072463768117, "grad_norm": 1.05859375, "learning_rate": 0.001, "loss": 1.9111, "step": 299712 }, { "epoch": 25.85990338164251, "grad_norm": 0.9447957873344421, "learning_rate": 0.001, "loss": 1.9161, "step": 299768 }, { "epoch": 25.864734299516908, "grad_norm": 0.43140825629234314, "learning_rate": 0.001, "loss": 1.914, "step": 299824 }, { "epoch": 25.869565217391305, "grad_norm": 1.1701570749282837, "learning_rate": 0.001, "loss": 1.918, "step": 299880 }, { "epoch": 25.8743961352657, "grad_norm": 1.0709367990493774, "learning_rate": 0.001, "loss": 1.9142, "step": 299936 }, { "epoch": 25.879227053140095, "grad_norm": 0.38440442085266113, "learning_rate": 0.001, "loss": 1.9301, "step": 299992 }, { "epoch": 25.884057971014492, "grad_norm": 2.866610288619995, "learning_rate": 0.001, "loss": 1.9214, "step": 300048 }, { "epoch": 25.88888888888889, "grad_norm": 3.443896532058716, "learning_rate": 0.001, "loss": 1.9175, "step": 300104 }, { "epoch": 25.893719806763286, "grad_norm": 0.5511997938156128, "learning_rate": 0.001, "loss": 1.9204, "step": 300160 }, { "epoch": 25.89855072463768, "grad_norm": 0.29944583773612976, "learning_rate": 0.001, "loss": 1.9189, "step": 300216 }, { "epoch": 25.903381642512077, "grad_norm": 1.086601972579956, "learning_rate": 0.001, "loss": 1.9112, "step": 300272 }, { "epoch": 25.908212560386474, "grad_norm": 0.8408346176147461, "learning_rate": 0.001, "loss": 1.9176, "step": 300328 }, { "epoch": 25.91304347826087, "grad_norm": 0.36783310770988464, "learning_rate": 0.001, "loss": 1.9205, "step": 300384 }, { "epoch": 25.917874396135264, "grad_norm": 0.6311558485031128, "learning_rate": 0.001, "loss": 1.9139, "step": 300440 }, { "epoch": 25.92270531400966, "grad_norm": 0.47691747546195984, "learning_rate": 0.001, "loss": 1.9067, "step": 300496 }, { "epoch": 25.92753623188406, "grad_norm": 0.6378122568130493, "learning_rate": 0.001, "loss": 1.9002, "step": 300552 }, { "epoch": 25.932367149758456, "grad_norm": 0.8052208423614502, "learning_rate": 0.001, "loss": 1.9079, "step": 300608 }, { "epoch": 25.93719806763285, "grad_norm": 0.8299881815910339, "learning_rate": 0.001, "loss": 1.9101, "step": 300664 }, { "epoch": 25.942028985507246, "grad_norm": 0.3130775988101959, "learning_rate": 0.001, "loss": 1.9023, "step": 300720 }, { "epoch": 25.946859903381643, "grad_norm": 0.5967893004417419, "learning_rate": 0.001, "loss": 1.9047, "step": 300776 }, { "epoch": 25.95169082125604, "grad_norm": 0.5926712155342102, "learning_rate": 0.001, "loss": 1.894, "step": 300832 }, { "epoch": 25.956521739130434, "grad_norm": 0.5201270580291748, "learning_rate": 0.001, "loss": 1.9009, "step": 300888 }, { "epoch": 25.96135265700483, "grad_norm": 1.2219924926757812, "learning_rate": 0.001, "loss": 1.9183, "step": 300944 }, { "epoch": 25.966183574879228, "grad_norm": 1.3014028072357178, "learning_rate": 0.001, "loss": 1.9227, "step": 301000 }, { "epoch": 25.971014492753625, "grad_norm": 0.6416466236114502, "learning_rate": 0.001, "loss": 1.9303, "step": 301056 }, { "epoch": 25.97584541062802, "grad_norm": 0.7122479677200317, "learning_rate": 0.001, "loss": 1.9299, "step": 301112 }, { "epoch": 25.980676328502415, "grad_norm": 1.0494407415390015, "learning_rate": 0.001, "loss": 1.922, "step": 301168 }, { "epoch": 25.985507246376812, "grad_norm": 0.9618339538574219, "learning_rate": 0.001, "loss": 1.9197, "step": 301224 }, { "epoch": 25.990338164251206, "grad_norm": 0.6108476519584656, "learning_rate": 0.001, "loss": 1.9209, "step": 301280 }, { "epoch": 25.995169082125603, "grad_norm": 1.281829833984375, "learning_rate": 0.001, "loss": 1.9196, "step": 301336 }, { "epoch": 26.0, "grad_norm": 1.3143168687820435, "learning_rate": 0.001, "loss": 1.9205, "step": 301392 }, { "epoch": 26.004830917874397, "grad_norm": 1.1090672016143799, "learning_rate": 0.001, "loss": 1.8746, "step": 301448 }, { "epoch": 26.00966183574879, "grad_norm": 0.5035786032676697, "learning_rate": 0.001, "loss": 1.878, "step": 301504 }, { "epoch": 26.014492753623188, "grad_norm": 1.1404213905334473, "learning_rate": 0.001, "loss": 1.8671, "step": 301560 }, { "epoch": 26.019323671497585, "grad_norm": 0.5109812617301941, "learning_rate": 0.001, "loss": 1.8704, "step": 301616 }, { "epoch": 26.02415458937198, "grad_norm": 0.5612033009529114, "learning_rate": 0.001, "loss": 1.865, "step": 301672 }, { "epoch": 26.028985507246375, "grad_norm": 0.5191264748573303, "learning_rate": 0.001, "loss": 1.8738, "step": 301728 }, { "epoch": 26.033816425120772, "grad_norm": 0.42789584398269653, "learning_rate": 0.001, "loss": 1.871, "step": 301784 }, { "epoch": 26.03864734299517, "grad_norm": 0.3447798788547516, "learning_rate": 0.001, "loss": 1.8651, "step": 301840 }, { "epoch": 26.043478260869566, "grad_norm": 0.49298641085624695, "learning_rate": 0.001, "loss": 1.8816, "step": 301896 }, { "epoch": 26.04830917874396, "grad_norm": 0.4427963197231293, "learning_rate": 0.001, "loss": 1.8724, "step": 301952 }, { "epoch": 26.053140096618357, "grad_norm": 0.5729649662971497, "learning_rate": 0.001, "loss": 1.8702, "step": 302008 }, { "epoch": 26.057971014492754, "grad_norm": 1.5502758026123047, "learning_rate": 0.001, "loss": 1.8744, "step": 302064 }, { "epoch": 26.06280193236715, "grad_norm": 0.9390813112258911, "learning_rate": 0.001, "loss": 1.8776, "step": 302120 }, { "epoch": 26.067632850241544, "grad_norm": 1.3725903034210205, "learning_rate": 0.001, "loss": 1.8813, "step": 302176 }, { "epoch": 26.07246376811594, "grad_norm": 0.46793410181999207, "learning_rate": 0.001, "loss": 1.8942, "step": 302232 }, { "epoch": 26.07729468599034, "grad_norm": 1.2029708623886108, "learning_rate": 0.001, "loss": 1.8851, "step": 302288 }, { "epoch": 26.082125603864736, "grad_norm": 0.49923932552337646, "learning_rate": 0.001, "loss": 1.8989, "step": 302344 }, { "epoch": 26.08695652173913, "grad_norm": 0.6857315897941589, "learning_rate": 0.001, "loss": 1.8894, "step": 302400 }, { "epoch": 26.091787439613526, "grad_norm": 2.213942289352417, "learning_rate": 0.001, "loss": 1.8791, "step": 302456 }, { "epoch": 26.096618357487923, "grad_norm": 0.4258173704147339, "learning_rate": 0.001, "loss": 1.8871, "step": 302512 }, { "epoch": 26.10144927536232, "grad_norm": 1.16960871219635, "learning_rate": 0.001, "loss": 1.8899, "step": 302568 }, { "epoch": 26.106280193236714, "grad_norm": 0.9616075754165649, "learning_rate": 0.001, "loss": 1.8783, "step": 302624 }, { "epoch": 26.11111111111111, "grad_norm": 0.7716442942619324, "learning_rate": 0.001, "loss": 1.8726, "step": 302680 }, { "epoch": 26.115942028985508, "grad_norm": 5.355496406555176, "learning_rate": 0.001, "loss": 1.8799, "step": 302736 }, { "epoch": 26.120772946859905, "grad_norm": 1.204249382019043, "learning_rate": 0.001, "loss": 1.8727, "step": 302792 }, { "epoch": 26.1256038647343, "grad_norm": 1.827998399734497, "learning_rate": 0.001, "loss": 1.8788, "step": 302848 }, { "epoch": 26.130434782608695, "grad_norm": 2.4009156227111816, "learning_rate": 0.001, "loss": 1.87, "step": 302904 }, { "epoch": 26.135265700483092, "grad_norm": 0.2766123116016388, "learning_rate": 0.001, "loss": 1.8851, "step": 302960 }, { "epoch": 26.14009661835749, "grad_norm": 0.9976978898048401, "learning_rate": 0.001, "loss": 1.8847, "step": 303016 }, { "epoch": 26.144927536231883, "grad_norm": 0.30852484703063965, "learning_rate": 0.001, "loss": 1.8775, "step": 303072 }, { "epoch": 26.14975845410628, "grad_norm": 2.1905627250671387, "learning_rate": 0.001, "loss": 1.8689, "step": 303128 }, { "epoch": 26.154589371980677, "grad_norm": 0.8048823475837708, "learning_rate": 0.001, "loss": 1.869, "step": 303184 }, { "epoch": 26.159420289855074, "grad_norm": 0.613875150680542, "learning_rate": 0.001, "loss": 1.876, "step": 303240 }, { "epoch": 26.164251207729468, "grad_norm": 0.9056879878044128, "learning_rate": 0.001, "loss": 1.8837, "step": 303296 }, { "epoch": 26.169082125603865, "grad_norm": 1.5494990348815918, "learning_rate": 0.001, "loss": 1.8723, "step": 303352 }, { "epoch": 26.17391304347826, "grad_norm": 4.792817115783691, "learning_rate": 0.001, "loss": 1.8841, "step": 303408 }, { "epoch": 26.17874396135266, "grad_norm": 0.3415108323097229, "learning_rate": 0.001, "loss": 1.8802, "step": 303464 }, { "epoch": 26.183574879227052, "grad_norm": 1.3616974353790283, "learning_rate": 0.001, "loss": 1.8812, "step": 303520 }, { "epoch": 26.18840579710145, "grad_norm": 0.40016573667526245, "learning_rate": 0.001, "loss": 1.887, "step": 303576 }, { "epoch": 26.193236714975846, "grad_norm": 1.1108522415161133, "learning_rate": 0.001, "loss": 1.8926, "step": 303632 }, { "epoch": 26.19806763285024, "grad_norm": 1.0270116329193115, "learning_rate": 0.001, "loss": 1.8783, "step": 303688 }, { "epoch": 26.202898550724637, "grad_norm": 0.4639199376106262, "learning_rate": 0.001, "loss": 1.8783, "step": 303744 }, { "epoch": 26.207729468599034, "grad_norm": 0.2807302176952362, "learning_rate": 0.001, "loss": 1.87, "step": 303800 }, { "epoch": 26.21256038647343, "grad_norm": 5.228604316711426, "learning_rate": 0.001, "loss": 1.8717, "step": 303856 }, { "epoch": 26.217391304347824, "grad_norm": 0.8545970916748047, "learning_rate": 0.001, "loss": 1.8832, "step": 303912 }, { "epoch": 26.22222222222222, "grad_norm": 1.9023536443710327, "learning_rate": 0.001, "loss": 1.8811, "step": 303968 }, { "epoch": 26.22705314009662, "grad_norm": 0.4753367006778717, "learning_rate": 0.001, "loss": 1.8838, "step": 304024 }, { "epoch": 26.231884057971016, "grad_norm": 0.37560272216796875, "learning_rate": 0.001, "loss": 1.8773, "step": 304080 }, { "epoch": 26.23671497584541, "grad_norm": 0.3396928310394287, "learning_rate": 0.001, "loss": 1.8767, "step": 304136 }, { "epoch": 26.241545893719806, "grad_norm": 1.4739775657653809, "learning_rate": 0.001, "loss": 1.8865, "step": 304192 }, { "epoch": 26.246376811594203, "grad_norm": 0.41447797417640686, "learning_rate": 0.001, "loss": 1.8867, "step": 304248 }, { "epoch": 26.2512077294686, "grad_norm": 3.348151683807373, "learning_rate": 0.001, "loss": 1.8896, "step": 304304 }, { "epoch": 26.256038647342994, "grad_norm": 0.5316768884658813, "learning_rate": 0.001, "loss": 1.8813, "step": 304360 }, { "epoch": 26.26086956521739, "grad_norm": 3.1807913780212402, "learning_rate": 0.001, "loss": 1.8851, "step": 304416 }, { "epoch": 26.265700483091788, "grad_norm": 0.9746037125587463, "learning_rate": 0.001, "loss": 1.8784, "step": 304472 }, { "epoch": 26.270531400966185, "grad_norm": 0.6775360107421875, "learning_rate": 0.001, "loss": 1.8754, "step": 304528 }, { "epoch": 26.27536231884058, "grad_norm": 0.6768845915794373, "learning_rate": 0.001, "loss": 1.8793, "step": 304584 }, { "epoch": 26.280193236714975, "grad_norm": 0.5649595260620117, "learning_rate": 0.001, "loss": 1.8747, "step": 304640 }, { "epoch": 26.285024154589372, "grad_norm": 0.33543792366981506, "learning_rate": 0.001, "loss": 1.8768, "step": 304696 }, { "epoch": 26.28985507246377, "grad_norm": 0.2526896893978119, "learning_rate": 0.001, "loss": 1.8703, "step": 304752 }, { "epoch": 26.294685990338163, "grad_norm": 0.3276289701461792, "learning_rate": 0.001, "loss": 1.8704, "step": 304808 }, { "epoch": 26.29951690821256, "grad_norm": 9.126632690429688, "learning_rate": 0.001, "loss": 1.8708, "step": 304864 }, { "epoch": 26.304347826086957, "grad_norm": 0.6791106462478638, "learning_rate": 0.001, "loss": 1.8884, "step": 304920 }, { "epoch": 26.309178743961354, "grad_norm": 0.885800838470459, "learning_rate": 0.001, "loss": 1.8883, "step": 304976 }, { "epoch": 26.314009661835748, "grad_norm": 0.8597341775894165, "learning_rate": 0.001, "loss": 1.8957, "step": 305032 }, { "epoch": 26.318840579710145, "grad_norm": 0.8280271887779236, "learning_rate": 0.001, "loss": 1.8858, "step": 305088 }, { "epoch": 26.32367149758454, "grad_norm": 0.317353755235672, "learning_rate": 0.001, "loss": 1.8882, "step": 305144 }, { "epoch": 26.32850241545894, "grad_norm": 6.9839630126953125, "learning_rate": 0.001, "loss": 1.9069, "step": 305200 }, { "epoch": 26.333333333333332, "grad_norm": 0.3132542669773102, "learning_rate": 0.001, "loss": 1.8812, "step": 305256 }, { "epoch": 26.33816425120773, "grad_norm": 6.474642276763916, "learning_rate": 0.001, "loss": 1.8936, "step": 305312 }, { "epoch": 26.342995169082126, "grad_norm": 2.4401490688323975, "learning_rate": 0.001, "loss": 1.8914, "step": 305368 }, { "epoch": 26.347826086956523, "grad_norm": 7.178806304931641, "learning_rate": 0.001, "loss": 1.89, "step": 305424 }, { "epoch": 26.352657004830917, "grad_norm": 3.663526773452759, "learning_rate": 0.001, "loss": 1.8872, "step": 305480 }, { "epoch": 26.357487922705314, "grad_norm": 0.6609997749328613, "learning_rate": 0.001, "loss": 1.8908, "step": 305536 }, { "epoch": 26.36231884057971, "grad_norm": 0.3757002651691437, "learning_rate": 0.001, "loss": 1.8922, "step": 305592 }, { "epoch": 26.367149758454108, "grad_norm": 1.704759955406189, "learning_rate": 0.001, "loss": 1.8866, "step": 305648 }, { "epoch": 26.3719806763285, "grad_norm": 0.5327828526496887, "learning_rate": 0.001, "loss": 1.8755, "step": 305704 }, { "epoch": 26.3768115942029, "grad_norm": 0.5192166566848755, "learning_rate": 0.001, "loss": 1.8821, "step": 305760 }, { "epoch": 26.381642512077295, "grad_norm": 0.6626099348068237, "learning_rate": 0.001, "loss": 1.8777, "step": 305816 }, { "epoch": 26.386473429951693, "grad_norm": 7.298976421356201, "learning_rate": 0.001, "loss": 1.8825, "step": 305872 }, { "epoch": 26.391304347826086, "grad_norm": 0.8154512643814087, "learning_rate": 0.001, "loss": 1.8897, "step": 305928 }, { "epoch": 26.396135265700483, "grad_norm": 0.33252355456352234, "learning_rate": 0.001, "loss": 1.8804, "step": 305984 }, { "epoch": 26.40096618357488, "grad_norm": 0.7255956530570984, "learning_rate": 0.001, "loss": 1.8855, "step": 306040 }, { "epoch": 26.405797101449274, "grad_norm": 0.94120854139328, "learning_rate": 0.001, "loss": 1.8826, "step": 306096 }, { "epoch": 26.41062801932367, "grad_norm": 0.2682323753833771, "learning_rate": 0.001, "loss": 1.8851, "step": 306152 }, { "epoch": 26.415458937198068, "grad_norm": 1.1721643209457397, "learning_rate": 0.001, "loss": 1.8834, "step": 306208 }, { "epoch": 26.420289855072465, "grad_norm": 0.2778533101081848, "learning_rate": 0.001, "loss": 1.8729, "step": 306264 }, { "epoch": 26.42512077294686, "grad_norm": 0.3333447277545929, "learning_rate": 0.001, "loss": 1.8845, "step": 306320 }, { "epoch": 26.429951690821255, "grad_norm": 0.2832978367805481, "learning_rate": 0.001, "loss": 1.8755, "step": 306376 }, { "epoch": 26.434782608695652, "grad_norm": 0.883465051651001, "learning_rate": 0.001, "loss": 1.8828, "step": 306432 }, { "epoch": 26.43961352657005, "grad_norm": 0.4079696536064148, "learning_rate": 0.001, "loss": 1.875, "step": 306488 }, { "epoch": 26.444444444444443, "grad_norm": 0.3065531253814697, "learning_rate": 0.001, "loss": 1.8765, "step": 306544 }, { "epoch": 26.44927536231884, "grad_norm": 0.569281280040741, "learning_rate": 0.001, "loss": 1.8903, "step": 306600 }, { "epoch": 26.454106280193237, "grad_norm": 0.3361499607563019, "learning_rate": 0.001, "loss": 1.8999, "step": 306656 }, { "epoch": 26.458937198067634, "grad_norm": 0.39027315378189087, "learning_rate": 0.001, "loss": 1.9017, "step": 306712 }, { "epoch": 26.463768115942027, "grad_norm": 0.3455759286880493, "learning_rate": 0.001, "loss": 1.8974, "step": 306768 }, { "epoch": 26.468599033816425, "grad_norm": 0.3283372223377228, "learning_rate": 0.001, "loss": 1.8923, "step": 306824 }, { "epoch": 26.47342995169082, "grad_norm": 0.30351710319519043, "learning_rate": 0.001, "loss": 1.8857, "step": 306880 }, { "epoch": 26.47826086956522, "grad_norm": 0.3195769488811493, "learning_rate": 0.001, "loss": 1.8859, "step": 306936 }, { "epoch": 26.483091787439612, "grad_norm": 0.7026917338371277, "learning_rate": 0.001, "loss": 1.8837, "step": 306992 }, { "epoch": 26.48792270531401, "grad_norm": 0.3198241591453552, "learning_rate": 0.001, "loss": 1.8837, "step": 307048 }, { "epoch": 26.492753623188406, "grad_norm": 0.5403355360031128, "learning_rate": 0.001, "loss": 1.8952, "step": 307104 }, { "epoch": 26.497584541062803, "grad_norm": 1.1172131299972534, "learning_rate": 0.001, "loss": 1.8867, "step": 307160 }, { "epoch": 26.502415458937197, "grad_norm": 1.1870065927505493, "learning_rate": 0.001, "loss": 1.9141, "step": 307216 }, { "epoch": 26.507246376811594, "grad_norm": 2.358741521835327, "learning_rate": 0.001, "loss": 1.929, "step": 307272 }, { "epoch": 26.51207729468599, "grad_norm": 0.39125892519950867, "learning_rate": 0.001, "loss": 1.9167, "step": 307328 }, { "epoch": 26.516908212560388, "grad_norm": 0.7804172039031982, "learning_rate": 0.001, "loss": 1.8994, "step": 307384 }, { "epoch": 26.52173913043478, "grad_norm": 2.6584928035736084, "learning_rate": 0.001, "loss": 1.8934, "step": 307440 }, { "epoch": 26.52657004830918, "grad_norm": 1.4944427013397217, "learning_rate": 0.001, "loss": 1.901, "step": 307496 }, { "epoch": 26.531400966183575, "grad_norm": 0.5590356588363647, "learning_rate": 0.001, "loss": 1.8994, "step": 307552 }, { "epoch": 26.536231884057973, "grad_norm": 1.2691519260406494, "learning_rate": 0.001, "loss": 1.8963, "step": 307608 }, { "epoch": 26.541062801932366, "grad_norm": 2.2038352489471436, "learning_rate": 0.001, "loss": 1.8961, "step": 307664 }, { "epoch": 26.545893719806763, "grad_norm": 18.64430809020996, "learning_rate": 0.001, "loss": 1.8852, "step": 307720 }, { "epoch": 26.55072463768116, "grad_norm": 0.9700139164924622, "learning_rate": 0.001, "loss": 1.899, "step": 307776 }, { "epoch": 26.555555555555557, "grad_norm": 0.9777677655220032, "learning_rate": 0.001, "loss": 1.9145, "step": 307832 }, { "epoch": 26.56038647342995, "grad_norm": 0.8899800181388855, "learning_rate": 0.001, "loss": 1.9049, "step": 307888 }, { "epoch": 26.565217391304348, "grad_norm": 0.48456913232803345, "learning_rate": 0.001, "loss": 1.8916, "step": 307944 }, { "epoch": 26.570048309178745, "grad_norm": 0.41189396381378174, "learning_rate": 0.001, "loss": 1.9031, "step": 308000 }, { "epoch": 26.57487922705314, "grad_norm": 1.8014603853225708, "learning_rate": 0.001, "loss": 1.8989, "step": 308056 }, { "epoch": 26.579710144927535, "grad_norm": 1.531384825706482, "learning_rate": 0.001, "loss": 1.8974, "step": 308112 }, { "epoch": 26.584541062801932, "grad_norm": 1.1755337715148926, "learning_rate": 0.001, "loss": 1.8886, "step": 308168 }, { "epoch": 26.58937198067633, "grad_norm": 10.700976371765137, "learning_rate": 0.001, "loss": 1.8956, "step": 308224 }, { "epoch": 26.594202898550726, "grad_norm": 3.254497528076172, "learning_rate": 0.001, "loss": 1.9164, "step": 308280 }, { "epoch": 26.59903381642512, "grad_norm": 1.1473145484924316, "learning_rate": 0.001, "loss": 1.919, "step": 308336 }, { "epoch": 26.603864734299517, "grad_norm": 1.8856450319290161, "learning_rate": 0.001, "loss": 1.9368, "step": 308392 }, { "epoch": 26.608695652173914, "grad_norm": 2.5101125240325928, "learning_rate": 0.001, "loss": 1.9449, "step": 308448 }, { "epoch": 26.613526570048307, "grad_norm": 1.7259069681167603, "learning_rate": 0.001, "loss": 1.9423, "step": 308504 }, { "epoch": 26.618357487922705, "grad_norm": 1.6580649614334106, "learning_rate": 0.001, "loss": 1.9319, "step": 308560 }, { "epoch": 26.6231884057971, "grad_norm": 1.1380752325057983, "learning_rate": 0.001, "loss": 1.9181, "step": 308616 }, { "epoch": 26.6280193236715, "grad_norm": 0.766771137714386, "learning_rate": 0.001, "loss": 1.9123, "step": 308672 }, { "epoch": 26.632850241545892, "grad_norm": 5.427463054656982, "learning_rate": 0.001, "loss": 1.9195, "step": 308728 }, { "epoch": 26.63768115942029, "grad_norm": 0.8008475303649902, "learning_rate": 0.001, "loss": 1.9184, "step": 308784 }, { "epoch": 26.642512077294686, "grad_norm": 1.1295855045318604, "learning_rate": 0.001, "loss": 1.9153, "step": 308840 }, { "epoch": 26.647342995169083, "grad_norm": 0.4221721887588501, "learning_rate": 0.001, "loss": 1.9032, "step": 308896 }, { "epoch": 26.652173913043477, "grad_norm": 2.990657329559326, "learning_rate": 0.001, "loss": 1.8925, "step": 308952 }, { "epoch": 26.657004830917874, "grad_norm": 0.8321185111999512, "learning_rate": 0.001, "loss": 1.896, "step": 309008 }, { "epoch": 26.66183574879227, "grad_norm": 0.3358325958251953, "learning_rate": 0.001, "loss": 1.9013, "step": 309064 }, { "epoch": 26.666666666666668, "grad_norm": 0.641778826713562, "learning_rate": 0.001, "loss": 1.8994, "step": 309120 }, { "epoch": 26.67149758454106, "grad_norm": 0.38921916484832764, "learning_rate": 0.001, "loss": 1.8912, "step": 309176 }, { "epoch": 26.67632850241546, "grad_norm": 0.35960710048675537, "learning_rate": 0.001, "loss": 1.886, "step": 309232 }, { "epoch": 26.681159420289855, "grad_norm": 0.4628863036632538, "learning_rate": 0.001, "loss": 1.8826, "step": 309288 }, { "epoch": 26.685990338164252, "grad_norm": 2.0270535945892334, "learning_rate": 0.001, "loss": 1.8941, "step": 309344 }, { "epoch": 26.690821256038646, "grad_norm": 2.1707756519317627, "learning_rate": 0.001, "loss": 1.8989, "step": 309400 }, { "epoch": 26.695652173913043, "grad_norm": 0.6616970300674438, "learning_rate": 0.001, "loss": 1.8959, "step": 309456 }, { "epoch": 26.70048309178744, "grad_norm": 0.3734076917171478, "learning_rate": 0.001, "loss": 1.9049, "step": 309512 }, { "epoch": 26.705314009661837, "grad_norm": 1.3182293176651, "learning_rate": 0.001, "loss": 1.889, "step": 309568 }, { "epoch": 26.71014492753623, "grad_norm": 0.6974278688430786, "learning_rate": 0.001, "loss": 1.8905, "step": 309624 }, { "epoch": 26.714975845410628, "grad_norm": 1.2047744989395142, "learning_rate": 0.001, "loss": 1.9039, "step": 309680 }, { "epoch": 26.719806763285025, "grad_norm": 2.1396772861480713, "learning_rate": 0.001, "loss": 1.9095, "step": 309736 }, { "epoch": 26.72463768115942, "grad_norm": 0.3909177780151367, "learning_rate": 0.001, "loss": 1.9091, "step": 309792 }, { "epoch": 26.729468599033815, "grad_norm": 7.051060676574707, "learning_rate": 0.001, "loss": 1.9042, "step": 309848 }, { "epoch": 26.734299516908212, "grad_norm": 0.8563029170036316, "learning_rate": 0.001, "loss": 1.9002, "step": 309904 }, { "epoch": 26.73913043478261, "grad_norm": 1.8648775815963745, "learning_rate": 0.001, "loss": 1.9019, "step": 309960 }, { "epoch": 26.743961352657006, "grad_norm": 1.2807790040969849, "learning_rate": 0.001, "loss": 1.8963, "step": 310016 }, { "epoch": 26.7487922705314, "grad_norm": 0.6904706358909607, "learning_rate": 0.001, "loss": 1.9017, "step": 310072 }, { "epoch": 26.753623188405797, "grad_norm": 0.381023108959198, "learning_rate": 0.001, "loss": 1.9143, "step": 310128 }, { "epoch": 26.758454106280194, "grad_norm": 1.7539176940917969, "learning_rate": 0.001, "loss": 1.9192, "step": 310184 }, { "epoch": 26.76328502415459, "grad_norm": 0.4614386558532715, "learning_rate": 0.001, "loss": 1.9185, "step": 310240 }, { "epoch": 26.768115942028984, "grad_norm": 2.982783555984497, "learning_rate": 0.001, "loss": 1.9108, "step": 310296 }, { "epoch": 26.77294685990338, "grad_norm": 0.5641258358955383, "learning_rate": 0.001, "loss": 1.9035, "step": 310352 }, { "epoch": 26.77777777777778, "grad_norm": 0.40009805560112, "learning_rate": 0.001, "loss": 1.9015, "step": 310408 }, { "epoch": 26.782608695652176, "grad_norm": 0.7269598841667175, "learning_rate": 0.001, "loss": 1.9003, "step": 310464 }, { "epoch": 26.78743961352657, "grad_norm": 0.5545816421508789, "learning_rate": 0.001, "loss": 1.9094, "step": 310520 }, { "epoch": 26.792270531400966, "grad_norm": 1.7288382053375244, "learning_rate": 0.001, "loss": 1.897, "step": 310576 }, { "epoch": 26.797101449275363, "grad_norm": 0.689685583114624, "learning_rate": 0.001, "loss": 1.8999, "step": 310632 }, { "epoch": 26.80193236714976, "grad_norm": 0.6434476375579834, "learning_rate": 0.001, "loss": 1.9001, "step": 310688 }, { "epoch": 26.806763285024154, "grad_norm": 0.7111773490905762, "learning_rate": 0.001, "loss": 1.9066, "step": 310744 }, { "epoch": 26.81159420289855, "grad_norm": 1.208738088607788, "learning_rate": 0.001, "loss": 1.9017, "step": 310800 }, { "epoch": 26.816425120772948, "grad_norm": 0.28572335839271545, "learning_rate": 0.001, "loss": 1.9076, "step": 310856 }, { "epoch": 26.82125603864734, "grad_norm": 1.7035478353500366, "learning_rate": 0.001, "loss": 1.9058, "step": 310912 }, { "epoch": 26.82608695652174, "grad_norm": 0.909943163394928, "learning_rate": 0.001, "loss": 1.9099, "step": 310968 }, { "epoch": 26.830917874396135, "grad_norm": 1.4705640077590942, "learning_rate": 0.001, "loss": 1.9045, "step": 311024 }, { "epoch": 26.835748792270532, "grad_norm": 1.6536723375320435, "learning_rate": 0.001, "loss": 1.9108, "step": 311080 }, { "epoch": 26.840579710144926, "grad_norm": 2.310727596282959, "learning_rate": 0.001, "loss": 1.9037, "step": 311136 }, { "epoch": 26.845410628019323, "grad_norm": 0.8978720307350159, "learning_rate": 0.001, "loss": 1.8984, "step": 311192 }, { "epoch": 26.85024154589372, "grad_norm": 1.3442364931106567, "learning_rate": 0.001, "loss": 1.8903, "step": 311248 }, { "epoch": 26.855072463768117, "grad_norm": 1.4992104768753052, "learning_rate": 0.001, "loss": 1.8943, "step": 311304 }, { "epoch": 26.85990338164251, "grad_norm": 1.4222944974899292, "learning_rate": 0.001, "loss": 1.896, "step": 311360 }, { "epoch": 26.864734299516908, "grad_norm": 0.3838197588920593, "learning_rate": 0.001, "loss": 1.9007, "step": 311416 }, { "epoch": 26.869565217391305, "grad_norm": 2.465280771255493, "learning_rate": 0.001, "loss": 1.8999, "step": 311472 }, { "epoch": 26.8743961352657, "grad_norm": 1.7289129495620728, "learning_rate": 0.001, "loss": 1.9006, "step": 311528 }, { "epoch": 26.879227053140095, "grad_norm": 0.9332231879234314, "learning_rate": 0.001, "loss": 1.9111, "step": 311584 }, { "epoch": 26.884057971014492, "grad_norm": 0.3344719707965851, "learning_rate": 0.001, "loss": 1.9266, "step": 311640 }, { "epoch": 26.88888888888889, "grad_norm": 0.40268272161483765, "learning_rate": 0.001, "loss": 1.9252, "step": 311696 }, { "epoch": 26.893719806763286, "grad_norm": 1.498618483543396, "learning_rate": 0.001, "loss": 1.9116, "step": 311752 }, { "epoch": 26.89855072463768, "grad_norm": 17.675731658935547, "learning_rate": 0.001, "loss": 1.9036, "step": 311808 }, { "epoch": 26.903381642512077, "grad_norm": 0.592947781085968, "learning_rate": 0.001, "loss": 1.9053, "step": 311864 }, { "epoch": 26.908212560386474, "grad_norm": 1.4547990560531616, "learning_rate": 0.001, "loss": 1.9095, "step": 311920 }, { "epoch": 26.91304347826087, "grad_norm": 0.6640229821205139, "learning_rate": 0.001, "loss": 1.9126, "step": 311976 }, { "epoch": 26.917874396135264, "grad_norm": 1.9883171319961548, "learning_rate": 0.001, "loss": 1.9135, "step": 312032 }, { "epoch": 26.92270531400966, "grad_norm": 0.6683921813964844, "learning_rate": 0.001, "loss": 1.9048, "step": 312088 }, { "epoch": 26.92753623188406, "grad_norm": 1.5512480735778809, "learning_rate": 0.001, "loss": 1.9066, "step": 312144 }, { "epoch": 26.932367149758456, "grad_norm": 0.49740952253341675, "learning_rate": 0.001, "loss": 1.9035, "step": 312200 }, { "epoch": 26.93719806763285, "grad_norm": 2.413365602493286, "learning_rate": 0.001, "loss": 1.9114, "step": 312256 }, { "epoch": 26.942028985507246, "grad_norm": 1.005081295967102, "learning_rate": 0.001, "loss": 1.9289, "step": 312312 }, { "epoch": 26.946859903381643, "grad_norm": 0.6323103308677673, "learning_rate": 0.001, "loss": 1.9196, "step": 312368 }, { "epoch": 26.95169082125604, "grad_norm": 1.4417591094970703, "learning_rate": 0.001, "loss": 1.9032, "step": 312424 }, { "epoch": 26.956521739130434, "grad_norm": 0.3168693482875824, "learning_rate": 0.001, "loss": 1.9094, "step": 312480 }, { "epoch": 26.96135265700483, "grad_norm": 1.1715271472930908, "learning_rate": 0.001, "loss": 1.9067, "step": 312536 }, { "epoch": 26.966183574879228, "grad_norm": 0.533421516418457, "learning_rate": 0.001, "loss": 1.9128, "step": 312592 }, { "epoch": 26.971014492753625, "grad_norm": 0.40822833776474, "learning_rate": 0.001, "loss": 1.9193, "step": 312648 }, { "epoch": 26.97584541062802, "grad_norm": 0.5746835470199585, "learning_rate": 0.001, "loss": 1.909, "step": 312704 }, { "epoch": 26.980676328502415, "grad_norm": 0.8537691831588745, "learning_rate": 0.001, "loss": 1.9145, "step": 312760 }, { "epoch": 26.985507246376812, "grad_norm": 1.048047661781311, "learning_rate": 0.001, "loss": 1.9266, "step": 312816 }, { "epoch": 26.990338164251206, "grad_norm": 0.6718924641609192, "learning_rate": 0.001, "loss": 1.9143, "step": 312872 }, { "epoch": 26.995169082125603, "grad_norm": 1.1036442518234253, "learning_rate": 0.001, "loss": 1.9087, "step": 312928 }, { "epoch": 27.0, "grad_norm": 0.8787195086479187, "learning_rate": 0.001, "loss": 1.9044, "step": 312984 }, { "epoch": 27.004830917874397, "grad_norm": 1.326701045036316, "learning_rate": 0.001, "loss": 1.8706, "step": 313040 }, { "epoch": 27.00966183574879, "grad_norm": 16.42797088623047, "learning_rate": 0.001, "loss": 1.8744, "step": 313096 }, { "epoch": 27.014492753623188, "grad_norm": 0.8020104765892029, "learning_rate": 0.001, "loss": 1.8741, "step": 313152 }, { "epoch": 27.019323671497585, "grad_norm": 1.0050700902938843, "learning_rate": 0.001, "loss": 1.8727, "step": 313208 }, { "epoch": 27.02415458937198, "grad_norm": 3.5074846744537354, "learning_rate": 0.001, "loss": 1.8741, "step": 313264 }, { "epoch": 27.028985507246375, "grad_norm": 0.5766336917877197, "learning_rate": 0.001, "loss": 1.8874, "step": 313320 }, { "epoch": 27.033816425120772, "grad_norm": 2.3538620471954346, "learning_rate": 0.001, "loss": 1.883, "step": 313376 }, { "epoch": 27.03864734299517, "grad_norm": 0.753011167049408, "learning_rate": 0.001, "loss": 1.8808, "step": 313432 }, { "epoch": 27.043478260869566, "grad_norm": 0.5945261120796204, "learning_rate": 0.001, "loss": 1.879, "step": 313488 }, { "epoch": 27.04830917874396, "grad_norm": 0.523683488368988, "learning_rate": 0.001, "loss": 1.8698, "step": 313544 }, { "epoch": 27.053140096618357, "grad_norm": 0.3526722192764282, "learning_rate": 0.001, "loss": 1.8775, "step": 313600 }, { "epoch": 27.057971014492754, "grad_norm": 0.5040813088417053, "learning_rate": 0.001, "loss": 1.8663, "step": 313656 }, { "epoch": 27.06280193236715, "grad_norm": 1.0941710472106934, "learning_rate": 0.001, "loss": 1.8658, "step": 313712 }, { "epoch": 27.067632850241544, "grad_norm": 1.4529997110366821, "learning_rate": 0.001, "loss": 1.875, "step": 313768 }, { "epoch": 27.07246376811594, "grad_norm": 0.7842084765434265, "learning_rate": 0.001, "loss": 1.8651, "step": 313824 }, { "epoch": 27.07729468599034, "grad_norm": 0.4730626940727234, "learning_rate": 0.001, "loss": 1.8601, "step": 313880 }, { "epoch": 27.082125603864736, "grad_norm": 1.0959725379943848, "learning_rate": 0.001, "loss": 1.8585, "step": 313936 }, { "epoch": 27.08695652173913, "grad_norm": 0.8946459889411926, "learning_rate": 0.001, "loss": 1.8601, "step": 313992 }, { "epoch": 27.091787439613526, "grad_norm": 1.3689531087875366, "learning_rate": 0.001, "loss": 1.8505, "step": 314048 }, { "epoch": 27.096618357487923, "grad_norm": 1.4602844715118408, "learning_rate": 0.001, "loss": 1.8606, "step": 314104 }, { "epoch": 27.10144927536232, "grad_norm": 0.5630793571472168, "learning_rate": 0.001, "loss": 1.8763, "step": 314160 }, { "epoch": 27.106280193236714, "grad_norm": 1.3670883178710938, "learning_rate": 0.001, "loss": 1.8786, "step": 314216 }, { "epoch": 27.11111111111111, "grad_norm": 0.5482655167579651, "learning_rate": 0.001, "loss": 1.8774, "step": 314272 }, { "epoch": 27.115942028985508, "grad_norm": 0.6280721426010132, "learning_rate": 0.001, "loss": 1.8712, "step": 314328 }, { "epoch": 27.120772946859905, "grad_norm": 0.6933141946792603, "learning_rate": 0.001, "loss": 1.8789, "step": 314384 }, { "epoch": 27.1256038647343, "grad_norm": 0.5642929673194885, "learning_rate": 0.001, "loss": 1.8858, "step": 314440 }, { "epoch": 27.130434782608695, "grad_norm": 0.47165897488594055, "learning_rate": 0.001, "loss": 1.8811, "step": 314496 }, { "epoch": 27.135265700483092, "grad_norm": 2.2702157497406006, "learning_rate": 0.001, "loss": 1.8782, "step": 314552 }, { "epoch": 27.14009661835749, "grad_norm": 1.172485589981079, "learning_rate": 0.001, "loss": 1.8695, "step": 314608 }, { "epoch": 27.144927536231883, "grad_norm": 0.5493935346603394, "learning_rate": 0.001, "loss": 1.8614, "step": 314664 }, { "epoch": 27.14975845410628, "grad_norm": 0.28513163328170776, "learning_rate": 0.001, "loss": 1.8718, "step": 314720 }, { "epoch": 27.154589371980677, "grad_norm": 0.5382900238037109, "learning_rate": 0.001, "loss": 1.8717, "step": 314776 }, { "epoch": 27.159420289855074, "grad_norm": 0.5325965881347656, "learning_rate": 0.001, "loss": 1.8639, "step": 314832 }, { "epoch": 27.164251207729468, "grad_norm": 0.5712432265281677, "learning_rate": 0.001, "loss": 1.8717, "step": 314888 }, { "epoch": 27.169082125603865, "grad_norm": 0.41147926449775696, "learning_rate": 0.001, "loss": 1.8805, "step": 314944 }, { "epoch": 27.17391304347826, "grad_norm": 0.7136927247047424, "learning_rate": 0.001, "loss": 1.8674, "step": 315000 }, { "epoch": 27.17874396135266, "grad_norm": 1.326589584350586, "learning_rate": 0.001, "loss": 1.8616, "step": 315056 }, { "epoch": 27.183574879227052, "grad_norm": 2.189208745956421, "learning_rate": 0.001, "loss": 1.8757, "step": 315112 }, { "epoch": 27.18840579710145, "grad_norm": 1.004647135734558, "learning_rate": 0.001, "loss": 1.8881, "step": 315168 }, { "epoch": 27.193236714975846, "grad_norm": 0.8220815062522888, "learning_rate": 0.001, "loss": 1.8783, "step": 315224 }, { "epoch": 27.19806763285024, "grad_norm": 0.5307570099830627, "learning_rate": 0.001, "loss": 1.8717, "step": 315280 }, { "epoch": 27.202898550724637, "grad_norm": 0.46694618463516235, "learning_rate": 0.001, "loss": 1.8646, "step": 315336 }, { "epoch": 27.207729468599034, "grad_norm": 0.5688669085502625, "learning_rate": 0.001, "loss": 1.8697, "step": 315392 }, { "epoch": 27.21256038647343, "grad_norm": 1.0991936922073364, "learning_rate": 0.001, "loss": 1.8741, "step": 315448 }, { "epoch": 27.217391304347824, "grad_norm": 1.3987079858779907, "learning_rate": 0.001, "loss": 1.8741, "step": 315504 }, { "epoch": 27.22222222222222, "grad_norm": 0.35779833793640137, "learning_rate": 0.001, "loss": 1.8726, "step": 315560 }, { "epoch": 27.22705314009662, "grad_norm": 2.180145263671875, "learning_rate": 0.001, "loss": 1.8711, "step": 315616 }, { "epoch": 27.231884057971016, "grad_norm": 0.4075707495212555, "learning_rate": 0.001, "loss": 1.8738, "step": 315672 }, { "epoch": 27.23671497584541, "grad_norm": 0.6211092472076416, "learning_rate": 0.001, "loss": 1.872, "step": 315728 }, { "epoch": 27.241545893719806, "grad_norm": 0.4445870816707611, "learning_rate": 0.001, "loss": 1.8676, "step": 315784 }, { "epoch": 27.246376811594203, "grad_norm": 0.2655699551105499, "learning_rate": 0.001, "loss": 1.8792, "step": 315840 }, { "epoch": 27.2512077294686, "grad_norm": 0.6398695111274719, "learning_rate": 0.001, "loss": 1.8798, "step": 315896 }, { "epoch": 27.256038647342994, "grad_norm": 0.5590243935585022, "learning_rate": 0.001, "loss": 1.8841, "step": 315952 }, { "epoch": 27.26086956521739, "grad_norm": 0.5827743411064148, "learning_rate": 0.001, "loss": 1.877, "step": 316008 }, { "epoch": 27.265700483091788, "grad_norm": 0.543076753616333, "learning_rate": 0.001, "loss": 1.879, "step": 316064 }, { "epoch": 27.270531400966185, "grad_norm": 0.4645482003688812, "learning_rate": 0.001, "loss": 1.9005, "step": 316120 }, { "epoch": 27.27536231884058, "grad_norm": 1.1291449069976807, "learning_rate": 0.001, "loss": 1.8973, "step": 316176 }, { "epoch": 27.280193236714975, "grad_norm": 0.7766647934913635, "learning_rate": 0.001, "loss": 1.891, "step": 316232 }, { "epoch": 27.285024154589372, "grad_norm": 0.43069392442703247, "learning_rate": 0.001, "loss": 1.9002, "step": 316288 }, { "epoch": 27.28985507246377, "grad_norm": 1.9576739072799683, "learning_rate": 0.001, "loss": 1.8846, "step": 316344 }, { "epoch": 27.294685990338163, "grad_norm": 0.36968687176704407, "learning_rate": 0.001, "loss": 1.8842, "step": 316400 }, { "epoch": 27.29951690821256, "grad_norm": 1.5662766695022583, "learning_rate": 0.001, "loss": 1.8977, "step": 316456 }, { "epoch": 27.304347826086957, "grad_norm": 0.6104956269264221, "learning_rate": 0.001, "loss": 1.9044, "step": 316512 }, { "epoch": 27.309178743961354, "grad_norm": 0.5514392256736755, "learning_rate": 0.001, "loss": 1.9121, "step": 316568 }, { "epoch": 27.314009661835748, "grad_norm": 0.3508784770965576, "learning_rate": 0.001, "loss": 1.9166, "step": 316624 }, { "epoch": 27.318840579710145, "grad_norm": 0.5754507184028625, "learning_rate": 0.001, "loss": 1.8906, "step": 316680 }, { "epoch": 27.32367149758454, "grad_norm": 0.883436381816864, "learning_rate": 0.001, "loss": 1.8968, "step": 316736 }, { "epoch": 27.32850241545894, "grad_norm": 2.083041191101074, "learning_rate": 0.001, "loss": 1.9031, "step": 316792 }, { "epoch": 27.333333333333332, "grad_norm": 0.956459641456604, "learning_rate": 0.001, "loss": 1.8948, "step": 316848 }, { "epoch": 27.33816425120773, "grad_norm": 0.9388265013694763, "learning_rate": 0.001, "loss": 1.8938, "step": 316904 }, { "epoch": 27.342995169082126, "grad_norm": 0.7901542782783508, "learning_rate": 0.001, "loss": 1.8905, "step": 316960 }, { "epoch": 27.347826086956523, "grad_norm": 0.4316692352294922, "learning_rate": 0.001, "loss": 1.8947, "step": 317016 }, { "epoch": 27.352657004830917, "grad_norm": 0.5385074019432068, "learning_rate": 0.001, "loss": 1.8903, "step": 317072 }, { "epoch": 27.357487922705314, "grad_norm": 0.2966645359992981, "learning_rate": 0.001, "loss": 1.8818, "step": 317128 }, { "epoch": 27.36231884057971, "grad_norm": 0.6879773139953613, "learning_rate": 0.001, "loss": 1.8895, "step": 317184 }, { "epoch": 27.367149758454108, "grad_norm": 0.879963219165802, "learning_rate": 0.001, "loss": 1.8757, "step": 317240 }, { "epoch": 27.3719806763285, "grad_norm": 1.953042984008789, "learning_rate": 0.001, "loss": 1.8737, "step": 317296 }, { "epoch": 27.3768115942029, "grad_norm": 1.9178799390792847, "learning_rate": 0.001, "loss": 1.8835, "step": 317352 }, { "epoch": 27.381642512077295, "grad_norm": 0.7519900798797607, "learning_rate": 0.001, "loss": 1.8875, "step": 317408 }, { "epoch": 27.386473429951693, "grad_norm": 2.2220876216888428, "learning_rate": 0.001, "loss": 1.8833, "step": 317464 }, { "epoch": 27.391304347826086, "grad_norm": 0.6742727756500244, "learning_rate": 0.001, "loss": 1.8864, "step": 317520 }, { "epoch": 27.396135265700483, "grad_norm": 1.0539315938949585, "learning_rate": 0.001, "loss": 1.8883, "step": 317576 }, { "epoch": 27.40096618357488, "grad_norm": 0.2949395477771759, "learning_rate": 0.001, "loss": 1.898, "step": 317632 }, { "epoch": 27.405797101449274, "grad_norm": 0.3182970881462097, "learning_rate": 0.001, "loss": 1.8974, "step": 317688 }, { "epoch": 27.41062801932367, "grad_norm": 0.6619102358818054, "learning_rate": 0.001, "loss": 1.8793, "step": 317744 }, { "epoch": 27.415458937198068, "grad_norm": 0.4429330825805664, "learning_rate": 0.001, "loss": 1.8813, "step": 317800 }, { "epoch": 27.420289855072465, "grad_norm": 0.4018218517303467, "learning_rate": 0.001, "loss": 1.8822, "step": 317856 }, { "epoch": 27.42512077294686, "grad_norm": 0.4882408380508423, "learning_rate": 0.001, "loss": 1.8808, "step": 317912 }, { "epoch": 27.429951690821255, "grad_norm": 1.5550724267959595, "learning_rate": 0.001, "loss": 1.8742, "step": 317968 }, { "epoch": 27.434782608695652, "grad_norm": 0.7251642942428589, "learning_rate": 0.001, "loss": 1.8752, "step": 318024 }, { "epoch": 27.43961352657005, "grad_norm": 1.447607159614563, "learning_rate": 0.001, "loss": 1.877, "step": 318080 }, { "epoch": 27.444444444444443, "grad_norm": 0.7382452487945557, "learning_rate": 0.001, "loss": 1.8778, "step": 318136 }, { "epoch": 27.44927536231884, "grad_norm": 0.6004638075828552, "learning_rate": 0.001, "loss": 1.8802, "step": 318192 }, { "epoch": 27.454106280193237, "grad_norm": 0.5994868874549866, "learning_rate": 0.001, "loss": 1.8845, "step": 318248 }, { "epoch": 27.458937198067634, "grad_norm": 0.499619722366333, "learning_rate": 0.001, "loss": 1.8937, "step": 318304 }, { "epoch": 27.463768115942027, "grad_norm": 0.7170670628547668, "learning_rate": 0.001, "loss": 1.899, "step": 318360 }, { "epoch": 27.468599033816425, "grad_norm": 0.3206084668636322, "learning_rate": 0.001, "loss": 1.8891, "step": 318416 }, { "epoch": 27.47342995169082, "grad_norm": 0.4154198467731476, "learning_rate": 0.001, "loss": 1.8802, "step": 318472 }, { "epoch": 27.47826086956522, "grad_norm": 0.48203417658805847, "learning_rate": 0.001, "loss": 1.8802, "step": 318528 }, { "epoch": 27.483091787439612, "grad_norm": 1.2199218273162842, "learning_rate": 0.001, "loss": 1.8815, "step": 318584 }, { "epoch": 27.48792270531401, "grad_norm": 3.0930137634277344, "learning_rate": 0.001, "loss": 1.8845, "step": 318640 }, { "epoch": 27.492753623188406, "grad_norm": 0.4436224699020386, "learning_rate": 0.001, "loss": 1.8831, "step": 318696 }, { "epoch": 27.497584541062803, "grad_norm": 0.7236968278884888, "learning_rate": 0.001, "loss": 1.8844, "step": 318752 }, { "epoch": 27.502415458937197, "grad_norm": 0.3699961304664612, "learning_rate": 0.001, "loss": 1.8863, "step": 318808 }, { "epoch": 27.507246376811594, "grad_norm": 0.6518350839614868, "learning_rate": 0.001, "loss": 1.8851, "step": 318864 }, { "epoch": 27.51207729468599, "grad_norm": 1.2222529649734497, "learning_rate": 0.001, "loss": 1.8839, "step": 318920 }, { "epoch": 27.516908212560388, "grad_norm": 1.0932095050811768, "learning_rate": 0.001, "loss": 1.8964, "step": 318976 }, { "epoch": 27.52173913043478, "grad_norm": 0.6933474540710449, "learning_rate": 0.001, "loss": 1.882, "step": 319032 }, { "epoch": 27.52657004830918, "grad_norm": 2.3337810039520264, "learning_rate": 0.001, "loss": 1.8958, "step": 319088 }, { "epoch": 27.531400966183575, "grad_norm": 1.5713279247283936, "learning_rate": 0.001, "loss": 1.8986, "step": 319144 }, { "epoch": 27.536231884057973, "grad_norm": 2.0587315559387207, "learning_rate": 0.001, "loss": 1.9114, "step": 319200 }, { "epoch": 27.541062801932366, "grad_norm": 0.8205888271331787, "learning_rate": 0.001, "loss": 1.907, "step": 319256 }, { "epoch": 27.545893719806763, "grad_norm": 0.47125864028930664, "learning_rate": 0.001, "loss": 1.8988, "step": 319312 }, { "epoch": 27.55072463768116, "grad_norm": 0.9135292768478394, "learning_rate": 0.001, "loss": 1.9016, "step": 319368 }, { "epoch": 27.555555555555557, "grad_norm": 1.260481357574463, "learning_rate": 0.001, "loss": 1.8844, "step": 319424 }, { "epoch": 27.56038647342995, "grad_norm": 0.728749692440033, "learning_rate": 0.001, "loss": 1.9079, "step": 319480 }, { "epoch": 27.565217391304348, "grad_norm": 0.372332364320755, "learning_rate": 0.001, "loss": 1.9039, "step": 319536 }, { "epoch": 27.570048309178745, "grad_norm": 0.6417818665504456, "learning_rate": 0.001, "loss": 1.9063, "step": 319592 }, { "epoch": 27.57487922705314, "grad_norm": 0.24539490044116974, "learning_rate": 0.001, "loss": 1.9033, "step": 319648 }, { "epoch": 27.579710144927535, "grad_norm": 0.644845724105835, "learning_rate": 0.001, "loss": 1.8935, "step": 319704 }, { "epoch": 27.584541062801932, "grad_norm": 0.3748593330383301, "learning_rate": 0.001, "loss": 1.8935, "step": 319760 }, { "epoch": 27.58937198067633, "grad_norm": 0.7903972864151001, "learning_rate": 0.001, "loss": 1.902, "step": 319816 }, { "epoch": 27.594202898550726, "grad_norm": 0.39561012387275696, "learning_rate": 0.001, "loss": 1.8867, "step": 319872 }, { "epoch": 27.59903381642512, "grad_norm": 10.904019355773926, "learning_rate": 0.001, "loss": 1.8924, "step": 319928 }, { "epoch": 27.603864734299517, "grad_norm": 0.33296820521354675, "learning_rate": 0.001, "loss": 1.8913, "step": 319984 }, { "epoch": 27.608695652173914, "grad_norm": 3.3433420658111572, "learning_rate": 0.001, "loss": 1.8959, "step": 320040 }, { "epoch": 27.613526570048307, "grad_norm": 1.7524195909500122, "learning_rate": 0.001, "loss": 1.893, "step": 320096 }, { "epoch": 27.618357487922705, "grad_norm": 5.968977928161621, "learning_rate": 0.001, "loss": 1.8968, "step": 320152 }, { "epoch": 27.6231884057971, "grad_norm": 4.030668258666992, "learning_rate": 0.001, "loss": 1.8801, "step": 320208 }, { "epoch": 27.6280193236715, "grad_norm": 0.5673682689666748, "learning_rate": 0.001, "loss": 1.8862, "step": 320264 }, { "epoch": 27.632850241545892, "grad_norm": 0.480268657207489, "learning_rate": 0.001, "loss": 1.8832, "step": 320320 }, { "epoch": 27.63768115942029, "grad_norm": 1.0985263586044312, "learning_rate": 0.001, "loss": 1.8795, "step": 320376 }, { "epoch": 27.642512077294686, "grad_norm": 0.46542710065841675, "learning_rate": 0.001, "loss": 1.8826, "step": 320432 }, { "epoch": 27.647342995169083, "grad_norm": 0.911636471748352, "learning_rate": 0.001, "loss": 1.887, "step": 320488 }, { "epoch": 27.652173913043477, "grad_norm": 0.38735121488571167, "learning_rate": 0.001, "loss": 1.8906, "step": 320544 }, { "epoch": 27.657004830917874, "grad_norm": 0.2978653013706207, "learning_rate": 0.001, "loss": 1.9011, "step": 320600 }, { "epoch": 27.66183574879227, "grad_norm": 0.3818797171115875, "learning_rate": 0.001, "loss": 1.8859, "step": 320656 }, { "epoch": 27.666666666666668, "grad_norm": 6.442737102508545, "learning_rate": 0.001, "loss": 1.8869, "step": 320712 }, { "epoch": 27.67149758454106, "grad_norm": 0.4381128251552582, "learning_rate": 0.001, "loss": 1.88, "step": 320768 }, { "epoch": 27.67632850241546, "grad_norm": 0.3563942313194275, "learning_rate": 0.001, "loss": 1.8886, "step": 320824 }, { "epoch": 27.681159420289855, "grad_norm": 0.33426153659820557, "learning_rate": 0.001, "loss": 1.8952, "step": 320880 }, { "epoch": 27.685990338164252, "grad_norm": 0.591313898563385, "learning_rate": 0.001, "loss": 1.8838, "step": 320936 }, { "epoch": 27.690821256038646, "grad_norm": 0.538343608379364, "learning_rate": 0.001, "loss": 1.8899, "step": 320992 }, { "epoch": 27.695652173913043, "grad_norm": 0.33096420764923096, "learning_rate": 0.001, "loss": 1.8842, "step": 321048 }, { "epoch": 27.70048309178744, "grad_norm": 1.6692655086517334, "learning_rate": 0.001, "loss": 1.8814, "step": 321104 }, { "epoch": 27.705314009661837, "grad_norm": 0.8772959113121033, "learning_rate": 0.001, "loss": 1.8851, "step": 321160 }, { "epoch": 27.71014492753623, "grad_norm": 0.7430129647254944, "learning_rate": 0.001, "loss": 1.8802, "step": 321216 }, { "epoch": 27.714975845410628, "grad_norm": 6.376121997833252, "learning_rate": 0.001, "loss": 1.8946, "step": 321272 }, { "epoch": 27.719806763285025, "grad_norm": 0.45732006430625916, "learning_rate": 0.001, "loss": 1.8939, "step": 321328 }, { "epoch": 27.72463768115942, "grad_norm": 2.5487396717071533, "learning_rate": 0.001, "loss": 1.8937, "step": 321384 }, { "epoch": 27.729468599033815, "grad_norm": 0.322135329246521, "learning_rate": 0.001, "loss": 1.9008, "step": 321440 }, { "epoch": 27.734299516908212, "grad_norm": 16.541006088256836, "learning_rate": 0.001, "loss": 1.9014, "step": 321496 }, { "epoch": 27.73913043478261, "grad_norm": 0.31626319885253906, "learning_rate": 0.001, "loss": 1.8968, "step": 321552 }, { "epoch": 27.743961352657006, "grad_norm": 0.3314145803451538, "learning_rate": 0.001, "loss": 1.8915, "step": 321608 }, { "epoch": 27.7487922705314, "grad_norm": 1.5720685720443726, "learning_rate": 0.001, "loss": 1.8989, "step": 321664 }, { "epoch": 27.753623188405797, "grad_norm": 0.9092897176742554, "learning_rate": 0.001, "loss": 1.8886, "step": 321720 }, { "epoch": 27.758454106280194, "grad_norm": 0.7346388697624207, "learning_rate": 0.001, "loss": 1.883, "step": 321776 }, { "epoch": 27.76328502415459, "grad_norm": 2.519763946533203, "learning_rate": 0.001, "loss": 1.8852, "step": 321832 }, { "epoch": 27.768115942028984, "grad_norm": 0.6742886900901794, "learning_rate": 0.001, "loss": 1.895, "step": 321888 }, { "epoch": 27.77294685990338, "grad_norm": 0.9749161005020142, "learning_rate": 0.001, "loss": 1.8932, "step": 321944 }, { "epoch": 27.77777777777778, "grad_norm": 0.3334799110889435, "learning_rate": 0.001, "loss": 1.8958, "step": 322000 }, { "epoch": 27.782608695652176, "grad_norm": 2.679762601852417, "learning_rate": 0.001, "loss": 1.8973, "step": 322056 }, { "epoch": 27.78743961352657, "grad_norm": 1.8996022939682007, "learning_rate": 0.001, "loss": 1.8989, "step": 322112 }, { "epoch": 27.792270531400966, "grad_norm": 3.7399845123291016, "learning_rate": 0.001, "loss": 1.904, "step": 322168 }, { "epoch": 27.797101449275363, "grad_norm": 0.9479333162307739, "learning_rate": 0.001, "loss": 1.9064, "step": 322224 }, { "epoch": 27.80193236714976, "grad_norm": 0.28084588050842285, "learning_rate": 0.001, "loss": 1.9243, "step": 322280 }, { "epoch": 27.806763285024154, "grad_norm": 0.603751003742218, "learning_rate": 0.001, "loss": 1.929, "step": 322336 }, { "epoch": 27.81159420289855, "grad_norm": 0.5026025176048279, "learning_rate": 0.001, "loss": 1.9212, "step": 322392 }, { "epoch": 27.816425120772948, "grad_norm": 0.8195517659187317, "learning_rate": 0.001, "loss": 1.9125, "step": 322448 }, { "epoch": 27.82125603864734, "grad_norm": 2.9567036628723145, "learning_rate": 0.001, "loss": 1.9094, "step": 322504 }, { "epoch": 27.82608695652174, "grad_norm": 5.535406112670898, "learning_rate": 0.001, "loss": 1.9189, "step": 322560 }, { "epoch": 27.830917874396135, "grad_norm": 0.9300132989883423, "learning_rate": 0.001, "loss": 1.9077, "step": 322616 }, { "epoch": 27.835748792270532, "grad_norm": 0.36531877517700195, "learning_rate": 0.001, "loss": 1.9054, "step": 322672 }, { "epoch": 27.840579710144926, "grad_norm": 1.9135181903839111, "learning_rate": 0.001, "loss": 1.9071, "step": 322728 }, { "epoch": 27.845410628019323, "grad_norm": 0.8453105092048645, "learning_rate": 0.001, "loss": 1.9184, "step": 322784 }, { "epoch": 27.85024154589372, "grad_norm": 0.35372525453567505, "learning_rate": 0.001, "loss": 1.9105, "step": 322840 }, { "epoch": 27.855072463768117, "grad_norm": 0.5117209553718567, "learning_rate": 0.001, "loss": 1.9258, "step": 322896 }, { "epoch": 27.85990338164251, "grad_norm": 0.629283607006073, "learning_rate": 0.001, "loss": 1.9318, "step": 322952 }, { "epoch": 27.864734299516908, "grad_norm": 0.5136920809745789, "learning_rate": 0.001, "loss": 1.9101, "step": 323008 }, { "epoch": 27.869565217391305, "grad_norm": 1.8003183603286743, "learning_rate": 0.001, "loss": 1.9257, "step": 323064 }, { "epoch": 27.8743961352657, "grad_norm": 0.9285981059074402, "learning_rate": 0.001, "loss": 1.9012, "step": 323120 }, { "epoch": 27.879227053140095, "grad_norm": 0.8813960552215576, "learning_rate": 0.001, "loss": 1.9041, "step": 323176 }, { "epoch": 27.884057971014492, "grad_norm": 0.6235100626945496, "learning_rate": 0.001, "loss": 1.9007, "step": 323232 }, { "epoch": 27.88888888888889, "grad_norm": 0.3910762071609497, "learning_rate": 0.001, "loss": 1.9127, "step": 323288 }, { "epoch": 27.893719806763286, "grad_norm": 8.673907279968262, "learning_rate": 0.001, "loss": 1.9082, "step": 323344 }, { "epoch": 27.89855072463768, "grad_norm": 3.1357994079589844, "learning_rate": 0.001, "loss": 1.9214, "step": 323400 }, { "epoch": 27.903381642512077, "grad_norm": 0.3850042223930359, "learning_rate": 0.001, "loss": 1.9151, "step": 323456 }, { "epoch": 27.908212560386474, "grad_norm": 0.30180904269218445, "learning_rate": 0.001, "loss": 1.9371, "step": 323512 }, { "epoch": 27.91304347826087, "grad_norm": 0.6604330539703369, "learning_rate": 0.001, "loss": 1.9429, "step": 323568 }, { "epoch": 27.917874396135264, "grad_norm": 0.5030882358551025, "learning_rate": 0.001, "loss": 1.9253, "step": 323624 }, { "epoch": 27.92270531400966, "grad_norm": 0.5363969206809998, "learning_rate": 0.001, "loss": 1.9049, "step": 323680 }, { "epoch": 27.92753623188406, "grad_norm": 1.0833579301834106, "learning_rate": 0.001, "loss": 1.9076, "step": 323736 }, { "epoch": 27.932367149758456, "grad_norm": 2.429192066192627, "learning_rate": 0.001, "loss": 1.906, "step": 323792 }, { "epoch": 27.93719806763285, "grad_norm": 2.371067762374878, "learning_rate": 0.001, "loss": 1.8969, "step": 323848 }, { "epoch": 27.942028985507246, "grad_norm": 0.9529378414154053, "learning_rate": 0.001, "loss": 1.9039, "step": 323904 }, { "epoch": 27.946859903381643, "grad_norm": 1.3829976320266724, "learning_rate": 0.001, "loss": 1.9014, "step": 323960 }, { "epoch": 27.95169082125604, "grad_norm": 14.774099349975586, "learning_rate": 0.001, "loss": 1.9239, "step": 324016 }, { "epoch": 27.956521739130434, "grad_norm": 18.525075912475586, "learning_rate": 0.001, "loss": 1.9404, "step": 324072 }, { "epoch": 27.96135265700483, "grad_norm": 0.5167252421379089, "learning_rate": 0.001, "loss": 1.9476, "step": 324128 }, { "epoch": 27.966183574879228, "grad_norm": 0.6662850975990295, "learning_rate": 0.001, "loss": 1.9395, "step": 324184 }, { "epoch": 27.971014492753625, "grad_norm": 0.41508254408836365, "learning_rate": 0.001, "loss": 1.9303, "step": 324240 }, { "epoch": 27.97584541062802, "grad_norm": 0.5331782102584839, "learning_rate": 0.001, "loss": 1.9173, "step": 324296 }, { "epoch": 27.980676328502415, "grad_norm": 3.6407034397125244, "learning_rate": 0.001, "loss": 1.9082, "step": 324352 }, { "epoch": 27.985507246376812, "grad_norm": 122.23438262939453, "learning_rate": 0.001, "loss": 1.9093, "step": 324408 }, { "epoch": 27.990338164251206, "grad_norm": 1.300862431526184, "learning_rate": 0.001, "loss": 1.9166, "step": 324464 }, { "epoch": 27.995169082125603, "grad_norm": 0.6456590294837952, "learning_rate": 0.001, "loss": 1.911, "step": 324520 }, { "epoch": 28.0, "grad_norm": 0.552609920501709, "learning_rate": 0.001, "loss": 1.9095, "step": 324576 }, { "epoch": 28.004830917874397, "grad_norm": 0.48888495564460754, "learning_rate": 0.001, "loss": 1.876, "step": 324632 }, { "epoch": 28.00966183574879, "grad_norm": 0.342519074678421, "learning_rate": 0.001, "loss": 1.8614, "step": 324688 }, { "epoch": 28.014492753623188, "grad_norm": 0.4688197672367096, "learning_rate": 0.001, "loss": 1.8703, "step": 324744 }, { "epoch": 28.019323671497585, "grad_norm": 0.9068715572357178, "learning_rate": 0.001, "loss": 1.8726, "step": 324800 }, { "epoch": 28.02415458937198, "grad_norm": 1.2657779455184937, "learning_rate": 0.001, "loss": 1.854, "step": 324856 }, { "epoch": 28.028985507246375, "grad_norm": 2.6490018367767334, "learning_rate": 0.001, "loss": 1.8685, "step": 324912 }, { "epoch": 28.033816425120772, "grad_norm": 0.5517184734344482, "learning_rate": 0.001, "loss": 1.8668, "step": 324968 }, { "epoch": 28.03864734299517, "grad_norm": 0.4099219739437103, "learning_rate": 0.001, "loss": 1.8663, "step": 325024 }, { "epoch": 28.043478260869566, "grad_norm": 0.39857882261276245, "learning_rate": 0.001, "loss": 1.8627, "step": 325080 }, { "epoch": 28.04830917874396, "grad_norm": 1.229873538017273, "learning_rate": 0.001, "loss": 1.865, "step": 325136 }, { "epoch": 28.053140096618357, "grad_norm": 1.3178666830062866, "learning_rate": 0.001, "loss": 1.8775, "step": 325192 }, { "epoch": 28.057971014492754, "grad_norm": 0.30644679069519043, "learning_rate": 0.001, "loss": 1.8748, "step": 325248 }, { "epoch": 28.06280193236715, "grad_norm": 1.005143404006958, "learning_rate": 0.001, "loss": 1.8736, "step": 325304 }, { "epoch": 28.067632850241544, "grad_norm": 0.3653129041194916, "learning_rate": 0.001, "loss": 1.8693, "step": 325360 }, { "epoch": 28.07246376811594, "grad_norm": 0.3228726089000702, "learning_rate": 0.001, "loss": 1.88, "step": 325416 }, { "epoch": 28.07729468599034, "grad_norm": 0.5306240320205688, "learning_rate": 0.001, "loss": 1.878, "step": 325472 }, { "epoch": 28.082125603864736, "grad_norm": 0.4342239499092102, "learning_rate": 0.001, "loss": 1.8895, "step": 325528 }, { "epoch": 28.08695652173913, "grad_norm": 2.961655378341675, "learning_rate": 0.001, "loss": 1.8611, "step": 325584 }, { "epoch": 28.091787439613526, "grad_norm": 0.7887042760848999, "learning_rate": 0.001, "loss": 1.8565, "step": 325640 }, { "epoch": 28.096618357487923, "grad_norm": 0.8795722723007202, "learning_rate": 0.001, "loss": 1.8673, "step": 325696 }, { "epoch": 28.10144927536232, "grad_norm": 2.6997663974761963, "learning_rate": 0.001, "loss": 1.8657, "step": 325752 }, { "epoch": 28.106280193236714, "grad_norm": 1.6187169551849365, "learning_rate": 0.001, "loss": 1.8733, "step": 325808 }, { "epoch": 28.11111111111111, "grad_norm": 1.2055699825286865, "learning_rate": 0.001, "loss": 1.8615, "step": 325864 }, { "epoch": 28.115942028985508, "grad_norm": 1.928062915802002, "learning_rate": 0.001, "loss": 1.8659, "step": 325920 }, { "epoch": 28.120772946859905, "grad_norm": 0.7335609793663025, "learning_rate": 0.001, "loss": 1.8809, "step": 325976 }, { "epoch": 28.1256038647343, "grad_norm": 1.5227272510528564, "learning_rate": 0.001, "loss": 1.879, "step": 326032 }, { "epoch": 28.130434782608695, "grad_norm": 1.1515116691589355, "learning_rate": 0.001, "loss": 1.8723, "step": 326088 }, { "epoch": 28.135265700483092, "grad_norm": 1.439655065536499, "learning_rate": 0.001, "loss": 1.8743, "step": 326144 }, { "epoch": 28.14009661835749, "grad_norm": 1.2852096557617188, "learning_rate": 0.001, "loss": 1.8921, "step": 326200 }, { "epoch": 28.144927536231883, "grad_norm": 12.06866455078125, "learning_rate": 0.001, "loss": 1.9198, "step": 326256 }, { "epoch": 28.14975845410628, "grad_norm": 1.060186505317688, "learning_rate": 0.001, "loss": 1.9161, "step": 326312 }, { "epoch": 28.154589371980677, "grad_norm": 1.3918250799179077, "learning_rate": 0.001, "loss": 1.9106, "step": 326368 }, { "epoch": 28.159420289855074, "grad_norm": 1.0849374532699585, "learning_rate": 0.001, "loss": 1.9148, "step": 326424 }, { "epoch": 28.164251207729468, "grad_norm": 0.3700352907180786, "learning_rate": 0.001, "loss": 1.9047, "step": 326480 }, { "epoch": 28.169082125603865, "grad_norm": 0.35202285647392273, "learning_rate": 0.001, "loss": 1.9088, "step": 326536 }, { "epoch": 28.17391304347826, "grad_norm": 1.53982412815094, "learning_rate": 0.001, "loss": 1.8945, "step": 326592 }, { "epoch": 28.17874396135266, "grad_norm": 0.9972717761993408, "learning_rate": 0.001, "loss": 1.8972, "step": 326648 }, { "epoch": 28.183574879227052, "grad_norm": 0.42282775044441223, "learning_rate": 0.001, "loss": 1.8961, "step": 326704 }, { "epoch": 28.18840579710145, "grad_norm": 0.5036537647247314, "learning_rate": 0.001, "loss": 1.8912, "step": 326760 }, { "epoch": 28.193236714975846, "grad_norm": 2.0767226219177246, "learning_rate": 0.001, "loss": 1.872, "step": 326816 }, { "epoch": 28.19806763285024, "grad_norm": 0.701141893863678, "learning_rate": 0.001, "loss": 1.8734, "step": 326872 }, { "epoch": 28.202898550724637, "grad_norm": 7.2736430168151855, "learning_rate": 0.001, "loss": 1.8843, "step": 326928 }, { "epoch": 28.207729468599034, "grad_norm": 1.168703556060791, "learning_rate": 0.001, "loss": 1.8875, "step": 326984 }, { "epoch": 28.21256038647343, "grad_norm": 1.0168073177337646, "learning_rate": 0.001, "loss": 1.8939, "step": 327040 }, { "epoch": 28.217391304347824, "grad_norm": 1.061295747756958, "learning_rate": 0.001, "loss": 1.8919, "step": 327096 }, { "epoch": 28.22222222222222, "grad_norm": 0.37067949771881104, "learning_rate": 0.001, "loss": 1.9043, "step": 327152 }, { "epoch": 28.22705314009662, "grad_norm": 0.5680735111236572, "learning_rate": 0.001, "loss": 1.8882, "step": 327208 }, { "epoch": 28.231884057971016, "grad_norm": 0.5914512872695923, "learning_rate": 0.001, "loss": 1.899, "step": 327264 }, { "epoch": 28.23671497584541, "grad_norm": 0.32705751061439514, "learning_rate": 0.001, "loss": 1.8847, "step": 327320 }, { "epoch": 28.241545893719806, "grad_norm": 0.5657069683074951, "learning_rate": 0.001, "loss": 1.8913, "step": 327376 }, { "epoch": 28.246376811594203, "grad_norm": 0.7612531185150146, "learning_rate": 0.001, "loss": 1.8824, "step": 327432 }, { "epoch": 28.2512077294686, "grad_norm": 0.31469398736953735, "learning_rate": 0.001, "loss": 1.8834, "step": 327488 }, { "epoch": 28.256038647342994, "grad_norm": 0.5604250431060791, "learning_rate": 0.001, "loss": 1.8885, "step": 327544 }, { "epoch": 28.26086956521739, "grad_norm": 1.1248418092727661, "learning_rate": 0.001, "loss": 1.8849, "step": 327600 }, { "epoch": 28.265700483091788, "grad_norm": 0.9591240286827087, "learning_rate": 0.001, "loss": 1.8779, "step": 327656 }, { "epoch": 28.270531400966185, "grad_norm": 0.7204088568687439, "learning_rate": 0.001, "loss": 1.879, "step": 327712 }, { "epoch": 28.27536231884058, "grad_norm": 1.5862189531326294, "learning_rate": 0.001, "loss": 1.8714, "step": 327768 }, { "epoch": 28.280193236714975, "grad_norm": 0.3382035791873932, "learning_rate": 0.001, "loss": 1.8775, "step": 327824 }, { "epoch": 28.285024154589372, "grad_norm": 1.227964997291565, "learning_rate": 0.001, "loss": 1.8782, "step": 327880 }, { "epoch": 28.28985507246377, "grad_norm": 1.745741605758667, "learning_rate": 0.001, "loss": 1.8655, "step": 327936 }, { "epoch": 28.294685990338163, "grad_norm": 0.9000911116600037, "learning_rate": 0.001, "loss": 1.8908, "step": 327992 }, { "epoch": 28.29951690821256, "grad_norm": 1.3809490203857422, "learning_rate": 0.001, "loss": 1.8957, "step": 328048 }, { "epoch": 28.304347826086957, "grad_norm": 1.2855427265167236, "learning_rate": 0.001, "loss": 1.8882, "step": 328104 }, { "epoch": 28.309178743961354, "grad_norm": 0.4027520716190338, "learning_rate": 0.001, "loss": 1.8841, "step": 328160 }, { "epoch": 28.314009661835748, "grad_norm": 0.8023689389228821, "learning_rate": 0.001, "loss": 1.8759, "step": 328216 }, { "epoch": 28.318840579710145, "grad_norm": 1.133941411972046, "learning_rate": 0.001, "loss": 1.8827, "step": 328272 }, { "epoch": 28.32367149758454, "grad_norm": 1.4654099941253662, "learning_rate": 0.001, "loss": 1.8808, "step": 328328 }, { "epoch": 28.32850241545894, "grad_norm": 4.052917003631592, "learning_rate": 0.001, "loss": 1.8846, "step": 328384 }, { "epoch": 28.333333333333332, "grad_norm": 0.5777309536933899, "learning_rate": 0.001, "loss": 1.8817, "step": 328440 }, { "epoch": 28.33816425120773, "grad_norm": 1.7741546630859375, "learning_rate": 0.001, "loss": 1.886, "step": 328496 }, { "epoch": 28.342995169082126, "grad_norm": 0.8260049223899841, "learning_rate": 0.001, "loss": 1.8837, "step": 328552 }, { "epoch": 28.347826086956523, "grad_norm": 1.6373041868209839, "learning_rate": 0.001, "loss": 1.8979, "step": 328608 }, { "epoch": 28.352657004830917, "grad_norm": 1.4206730127334595, "learning_rate": 0.001, "loss": 1.8963, "step": 328664 }, { "epoch": 28.357487922705314, "grad_norm": 0.3405592143535614, "learning_rate": 0.001, "loss": 1.9033, "step": 328720 }, { "epoch": 28.36231884057971, "grad_norm": 0.41407832503318787, "learning_rate": 0.001, "loss": 1.9058, "step": 328776 }, { "epoch": 28.367149758454108, "grad_norm": 3.836137056350708, "learning_rate": 0.001, "loss": 1.8938, "step": 328832 }, { "epoch": 28.3719806763285, "grad_norm": 3.6969447135925293, "learning_rate": 0.001, "loss": 1.9057, "step": 328888 }, { "epoch": 28.3768115942029, "grad_norm": 0.4419362545013428, "learning_rate": 0.001, "loss": 1.9009, "step": 328944 }, { "epoch": 28.381642512077295, "grad_norm": 0.4576115608215332, "learning_rate": 0.001, "loss": 1.8921, "step": 329000 }, { "epoch": 28.386473429951693, "grad_norm": 0.9162690043449402, "learning_rate": 0.001, "loss": 1.8945, "step": 329056 }, { "epoch": 28.391304347826086, "grad_norm": 4.146164894104004, "learning_rate": 0.001, "loss": 1.8938, "step": 329112 }, { "epoch": 28.396135265700483, "grad_norm": 0.37345951795578003, "learning_rate": 0.001, "loss": 1.9001, "step": 329168 }, { "epoch": 28.40096618357488, "grad_norm": 0.4641714096069336, "learning_rate": 0.001, "loss": 1.8966, "step": 329224 }, { "epoch": 28.405797101449274, "grad_norm": 0.4535137116909027, "learning_rate": 0.001, "loss": 1.8874, "step": 329280 }, { "epoch": 28.41062801932367, "grad_norm": 13.033797264099121, "learning_rate": 0.001, "loss": 1.8892, "step": 329336 }, { "epoch": 28.415458937198068, "grad_norm": 2.3273427486419678, "learning_rate": 0.001, "loss": 1.8974, "step": 329392 }, { "epoch": 28.420289855072465, "grad_norm": 1.1021487712860107, "learning_rate": 0.001, "loss": 1.9011, "step": 329448 }, { "epoch": 28.42512077294686, "grad_norm": 0.4815601408481598, "learning_rate": 0.001, "loss": 1.8981, "step": 329504 }, { "epoch": 28.429951690821255, "grad_norm": 3.4632744789123535, "learning_rate": 0.001, "loss": 1.8985, "step": 329560 }, { "epoch": 28.434782608695652, "grad_norm": 1.6353886127471924, "learning_rate": 0.001, "loss": 1.8929, "step": 329616 }, { "epoch": 28.43961352657005, "grad_norm": 2.0833888053894043, "learning_rate": 0.001, "loss": 1.9026, "step": 329672 }, { "epoch": 28.444444444444443, "grad_norm": 0.8961399793624878, "learning_rate": 0.001, "loss": 1.8878, "step": 329728 }, { "epoch": 28.44927536231884, "grad_norm": 0.8229840397834778, "learning_rate": 0.001, "loss": 1.8843, "step": 329784 }, { "epoch": 28.454106280193237, "grad_norm": 0.7641089558601379, "learning_rate": 0.001, "loss": 1.8842, "step": 329840 }, { "epoch": 28.458937198067634, "grad_norm": 0.3715387284755707, "learning_rate": 0.001, "loss": 1.8747, "step": 329896 }, { "epoch": 28.463768115942027, "grad_norm": 0.4506649076938629, "learning_rate": 0.001, "loss": 1.8757, "step": 329952 }, { "epoch": 28.468599033816425, "grad_norm": 0.6041941046714783, "learning_rate": 0.001, "loss": 1.8805, "step": 330008 }, { "epoch": 28.47342995169082, "grad_norm": 0.7833155989646912, "learning_rate": 0.001, "loss": 1.8817, "step": 330064 }, { "epoch": 28.47826086956522, "grad_norm": 1.0198636054992676, "learning_rate": 0.001, "loss": 1.8776, "step": 330120 }, { "epoch": 28.483091787439612, "grad_norm": 3.5298590660095215, "learning_rate": 0.001, "loss": 1.8723, "step": 330176 }, { "epoch": 28.48792270531401, "grad_norm": 0.4761419892311096, "learning_rate": 0.001, "loss": 1.8945, "step": 330232 }, { "epoch": 28.492753623188406, "grad_norm": 0.4587728977203369, "learning_rate": 0.001, "loss": 1.8894, "step": 330288 }, { "epoch": 28.497584541062803, "grad_norm": 0.6684659123420715, "learning_rate": 0.001, "loss": 1.8915, "step": 330344 }, { "epoch": 28.502415458937197, "grad_norm": 2.46571946144104, "learning_rate": 0.001, "loss": 1.9051, "step": 330400 }, { "epoch": 28.507246376811594, "grad_norm": 0.9475992321968079, "learning_rate": 0.001, "loss": 1.9139, "step": 330456 }, { "epoch": 28.51207729468599, "grad_norm": 4.113898754119873, "learning_rate": 0.001, "loss": 1.9044, "step": 330512 }, { "epoch": 28.516908212560388, "grad_norm": 0.4505608081817627, "learning_rate": 0.001, "loss": 1.8958, "step": 330568 }, { "epoch": 28.52173913043478, "grad_norm": 3.4996960163116455, "learning_rate": 0.001, "loss": 1.8952, "step": 330624 }, { "epoch": 28.52657004830918, "grad_norm": 1.0719223022460938, "learning_rate": 0.001, "loss": 1.8986, "step": 330680 }, { "epoch": 28.531400966183575, "grad_norm": 2.0519521236419678, "learning_rate": 0.001, "loss": 1.8808, "step": 330736 }, { "epoch": 28.536231884057973, "grad_norm": 1.0359359979629517, "learning_rate": 0.001, "loss": 1.8871, "step": 330792 }, { "epoch": 28.541062801932366, "grad_norm": 0.663783848285675, "learning_rate": 0.001, "loss": 1.911, "step": 330848 }, { "epoch": 28.545893719806763, "grad_norm": 4.949044227600098, "learning_rate": 0.001, "loss": 1.9068, "step": 330904 }, { "epoch": 28.55072463768116, "grad_norm": 0.973168134689331, "learning_rate": 0.001, "loss": 1.9024, "step": 330960 }, { "epoch": 28.555555555555557, "grad_norm": 0.6336768865585327, "learning_rate": 0.001, "loss": 1.9015, "step": 331016 }, { "epoch": 28.56038647342995, "grad_norm": 0.3745986521244049, "learning_rate": 0.001, "loss": 1.9122, "step": 331072 }, { "epoch": 28.565217391304348, "grad_norm": 0.9017367362976074, "learning_rate": 0.001, "loss": 1.9213, "step": 331128 }, { "epoch": 28.570048309178745, "grad_norm": 2.0765271186828613, "learning_rate": 0.001, "loss": 1.9297, "step": 331184 }, { "epoch": 28.57487922705314, "grad_norm": 2.877708911895752, "learning_rate": 0.001, "loss": 1.9411, "step": 331240 }, { "epoch": 28.579710144927535, "grad_norm": 0.7322184443473816, "learning_rate": 0.001, "loss": 1.9461, "step": 331296 }, { "epoch": 28.584541062801932, "grad_norm": 1.447961449623108, "learning_rate": 0.001, "loss": 1.9644, "step": 331352 }, { "epoch": 28.58937198067633, "grad_norm": 3.7392313480377197, "learning_rate": 0.001, "loss": 1.9579, "step": 331408 }, { "epoch": 28.594202898550726, "grad_norm": 1.5417708158493042, "learning_rate": 0.001, "loss": 1.9651, "step": 331464 }, { "epoch": 28.59903381642512, "grad_norm": 3.817207098007202, "learning_rate": 0.001, "loss": 1.9485, "step": 331520 }, { "epoch": 28.603864734299517, "grad_norm": 1.1892389059066772, "learning_rate": 0.001, "loss": 1.9514, "step": 331576 }, { "epoch": 28.608695652173914, "grad_norm": 0.9001258611679077, "learning_rate": 0.001, "loss": 1.9462, "step": 331632 }, { "epoch": 28.613526570048307, "grad_norm": 0.9239190816879272, "learning_rate": 0.001, "loss": 1.9475, "step": 331688 }, { "epoch": 28.618357487922705, "grad_norm": 0.8705443143844604, "learning_rate": 0.001, "loss": 1.955, "step": 331744 }, { "epoch": 28.6231884057971, "grad_norm": 0.8598714470863342, "learning_rate": 0.001, "loss": 1.9529, "step": 331800 }, { "epoch": 28.6280193236715, "grad_norm": 1.9536629915237427, "learning_rate": 0.001, "loss": 1.9459, "step": 331856 }, { "epoch": 28.632850241545892, "grad_norm": 3.766209363937378, "learning_rate": 0.001, "loss": 1.9397, "step": 331912 }, { "epoch": 28.63768115942029, "grad_norm": 8.624201774597168, "learning_rate": 0.001, "loss": 1.9453, "step": 331968 }, { "epoch": 28.642512077294686, "grad_norm": 1.2410045862197876, "learning_rate": 0.001, "loss": 1.9286, "step": 332024 }, { "epoch": 28.647342995169083, "grad_norm": 0.8718456029891968, "learning_rate": 0.001, "loss": 1.9121, "step": 332080 }, { "epoch": 28.652173913043477, "grad_norm": 0.47467195987701416, "learning_rate": 0.001, "loss": 1.9191, "step": 332136 }, { "epoch": 28.657004830917874, "grad_norm": 1.4802100658416748, "learning_rate": 0.001, "loss": 1.9219, "step": 332192 }, { "epoch": 28.66183574879227, "grad_norm": 0.8133614659309387, "learning_rate": 0.001, "loss": 1.9296, "step": 332248 }, { "epoch": 28.666666666666668, "grad_norm": 0.6556243896484375, "learning_rate": 0.001, "loss": 1.9181, "step": 332304 }, { "epoch": 28.67149758454106, "grad_norm": 1.3882675170898438, "learning_rate": 0.001, "loss": 1.9167, "step": 332360 }, { "epoch": 28.67632850241546, "grad_norm": 1.3900033235549927, "learning_rate": 0.001, "loss": 1.9121, "step": 332416 }, { "epoch": 28.681159420289855, "grad_norm": 0.7995746731758118, "learning_rate": 0.001, "loss": 1.9226, "step": 332472 }, { "epoch": 28.685990338164252, "grad_norm": 1.2805215120315552, "learning_rate": 0.001, "loss": 1.9147, "step": 332528 }, { "epoch": 28.690821256038646, "grad_norm": 1.582929015159607, "learning_rate": 0.001, "loss": 1.9099, "step": 332584 }, { "epoch": 28.695652173913043, "grad_norm": 3.7897348403930664, "learning_rate": 0.001, "loss": 1.9108, "step": 332640 }, { "epoch": 28.70048309178744, "grad_norm": 1.4902665615081787, "learning_rate": 0.001, "loss": 1.9059, "step": 332696 }, { "epoch": 28.705314009661837, "grad_norm": 0.6879105567932129, "learning_rate": 0.001, "loss": 1.9088, "step": 332752 }, { "epoch": 28.71014492753623, "grad_norm": 1.7254632711410522, "learning_rate": 0.001, "loss": 1.9093, "step": 332808 }, { "epoch": 28.714975845410628, "grad_norm": 0.7516950964927673, "learning_rate": 0.001, "loss": 1.9107, "step": 332864 }, { "epoch": 28.719806763285025, "grad_norm": 0.908341646194458, "learning_rate": 0.001, "loss": 1.9118, "step": 332920 }, { "epoch": 28.72463768115942, "grad_norm": 1.1142560243606567, "learning_rate": 0.001, "loss": 1.9286, "step": 332976 }, { "epoch": 28.729468599033815, "grad_norm": 0.6670729517936707, "learning_rate": 0.001, "loss": 1.9168, "step": 333032 }, { "epoch": 28.734299516908212, "grad_norm": 2.884117364883423, "learning_rate": 0.001, "loss": 1.9323, "step": 333088 }, { "epoch": 28.73913043478261, "grad_norm": 1.8979073762893677, "learning_rate": 0.001, "loss": 1.933, "step": 333144 }, { "epoch": 28.743961352657006, "grad_norm": 1.1814138889312744, "learning_rate": 0.001, "loss": 1.9209, "step": 333200 }, { "epoch": 28.7487922705314, "grad_norm": 0.8391841650009155, "learning_rate": 0.001, "loss": 1.9272, "step": 333256 }, { "epoch": 28.753623188405797, "grad_norm": 1.2846533060073853, "learning_rate": 0.001, "loss": 1.9252, "step": 333312 }, { "epoch": 28.758454106280194, "grad_norm": 1.039368748664856, "learning_rate": 0.001, "loss": 1.9282, "step": 333368 }, { "epoch": 28.76328502415459, "grad_norm": 2.0995516777038574, "learning_rate": 0.001, "loss": 1.9112, "step": 333424 }, { "epoch": 28.768115942028984, "grad_norm": 2.147758960723877, "learning_rate": 0.001, "loss": 1.915, "step": 333480 }, { "epoch": 28.77294685990338, "grad_norm": 0.7746847867965698, "learning_rate": 0.001, "loss": 1.9089, "step": 333536 }, { "epoch": 28.77777777777778, "grad_norm": 1.3602650165557861, "learning_rate": 0.001, "loss": 1.9196, "step": 333592 }, { "epoch": 28.782608695652176, "grad_norm": 2.182260274887085, "learning_rate": 0.001, "loss": 1.9207, "step": 333648 }, { "epoch": 28.78743961352657, "grad_norm": 1.1203982830047607, "learning_rate": 0.001, "loss": 1.926, "step": 333704 }, { "epoch": 28.792270531400966, "grad_norm": 1.8124265670776367, "learning_rate": 0.001, "loss": 1.921, "step": 333760 }, { "epoch": 28.797101449275363, "grad_norm": 4.528639316558838, "learning_rate": 0.001, "loss": 1.9203, "step": 333816 }, { "epoch": 28.80193236714976, "grad_norm": 0.5545051693916321, "learning_rate": 0.001, "loss": 1.9088, "step": 333872 }, { "epoch": 28.806763285024154, "grad_norm": 1.0197714567184448, "learning_rate": 0.001, "loss": 1.9251, "step": 333928 }, { "epoch": 28.81159420289855, "grad_norm": 1.0554872751235962, "learning_rate": 0.001, "loss": 1.9231, "step": 333984 }, { "epoch": 28.816425120772948, "grad_norm": 1.9428313970565796, "learning_rate": 0.001, "loss": 1.9153, "step": 334040 }, { "epoch": 28.82125603864734, "grad_norm": 2.292418956756592, "learning_rate": 0.001, "loss": 1.9128, "step": 334096 }, { "epoch": 28.82608695652174, "grad_norm": 0.5901442766189575, "learning_rate": 0.001, "loss": 1.9239, "step": 334152 }, { "epoch": 28.830917874396135, "grad_norm": 5.516192436218262, "learning_rate": 0.001, "loss": 1.9144, "step": 334208 }, { "epoch": 28.835748792270532, "grad_norm": 0.5279197096824646, "learning_rate": 0.001, "loss": 1.9101, "step": 334264 }, { "epoch": 28.840579710144926, "grad_norm": 1.5054384469985962, "learning_rate": 0.001, "loss": 1.9062, "step": 334320 }, { "epoch": 28.845410628019323, "grad_norm": 1.7059355974197388, "learning_rate": 0.001, "loss": 1.9127, "step": 334376 }, { "epoch": 28.85024154589372, "grad_norm": 2.5135741233825684, "learning_rate": 0.001, "loss": 1.9142, "step": 334432 }, { "epoch": 28.855072463768117, "grad_norm": 2.1412508487701416, "learning_rate": 0.001, "loss": 1.9121, "step": 334488 }, { "epoch": 28.85990338164251, "grad_norm": 2.8041398525238037, "learning_rate": 0.001, "loss": 1.9137, "step": 334544 }, { "epoch": 28.864734299516908, "grad_norm": 2.011864423751831, "learning_rate": 0.001, "loss": 1.9112, "step": 334600 }, { "epoch": 28.869565217391305, "grad_norm": 2.4927730560302734, "learning_rate": 0.001, "loss": 1.9089, "step": 334656 }, { "epoch": 28.8743961352657, "grad_norm": 0.9902358651161194, "learning_rate": 0.001, "loss": 1.929, "step": 334712 }, { "epoch": 28.879227053140095, "grad_norm": 1.463372826576233, "learning_rate": 0.001, "loss": 1.9169, "step": 334768 }, { "epoch": 28.884057971014492, "grad_norm": 7.084128379821777, "learning_rate": 0.001, "loss": 1.9146, "step": 334824 }, { "epoch": 28.88888888888889, "grad_norm": 1.7624740600585938, "learning_rate": 0.001, "loss": 1.9153, "step": 334880 }, { "epoch": 28.893719806763286, "grad_norm": 0.9364974498748779, "learning_rate": 0.001, "loss": 1.9169, "step": 334936 }, { "epoch": 28.89855072463768, "grad_norm": 2.909557580947876, "learning_rate": 0.001, "loss": 1.9223, "step": 334992 }, { "epoch": 28.903381642512077, "grad_norm": 0.7098058462142944, "learning_rate": 0.001, "loss": 1.9218, "step": 335048 }, { "epoch": 28.908212560386474, "grad_norm": 1.919205904006958, "learning_rate": 0.001, "loss": 1.9243, "step": 335104 }, { "epoch": 28.91304347826087, "grad_norm": 1.7865612506866455, "learning_rate": 0.001, "loss": 1.9129, "step": 335160 }, { "epoch": 28.917874396135264, "grad_norm": 0.8979544639587402, "learning_rate": 0.001, "loss": 1.9037, "step": 335216 }, { "epoch": 28.92270531400966, "grad_norm": 1.3111666440963745, "learning_rate": 0.001, "loss": 1.9268, "step": 335272 }, { "epoch": 28.92753623188406, "grad_norm": 1.183286190032959, "learning_rate": 0.001, "loss": 1.9211, "step": 335328 }, { "epoch": 28.932367149758456, "grad_norm": 1.013069748878479, "learning_rate": 0.001, "loss": 1.9316, "step": 335384 }, { "epoch": 28.93719806763285, "grad_norm": 3.185436487197876, "learning_rate": 0.001, "loss": 1.9366, "step": 335440 }, { "epoch": 28.942028985507246, "grad_norm": 33.5592155456543, "learning_rate": 0.001, "loss": 1.937, "step": 335496 }, { "epoch": 28.946859903381643, "grad_norm": 0.46022623777389526, "learning_rate": 0.001, "loss": 1.9302, "step": 335552 }, { "epoch": 28.95169082125604, "grad_norm": 4.151989459991455, "learning_rate": 0.001, "loss": 1.9428, "step": 335608 }, { "epoch": 28.956521739130434, "grad_norm": 4.188902854919434, "learning_rate": 0.001, "loss": 1.9312, "step": 335664 }, { "epoch": 28.96135265700483, "grad_norm": 0.5021653175354004, "learning_rate": 0.001, "loss": 1.9347, "step": 335720 }, { "epoch": 28.966183574879228, "grad_norm": 2.4328694343566895, "learning_rate": 0.001, "loss": 1.9359, "step": 335776 }, { "epoch": 28.971014492753625, "grad_norm": 1.620774507522583, "learning_rate": 0.001, "loss": 1.9386, "step": 335832 }, { "epoch": 28.97584541062802, "grad_norm": 1.0298885107040405, "learning_rate": 0.001, "loss": 1.9337, "step": 335888 }, { "epoch": 28.980676328502415, "grad_norm": 11.728135108947754, "learning_rate": 0.001, "loss": 1.9359, "step": 335944 }, { "epoch": 28.985507246376812, "grad_norm": 1.0595448017120361, "learning_rate": 0.001, "loss": 1.9352, "step": 336000 }, { "epoch": 28.990338164251206, "grad_norm": 0.7428472638130188, "learning_rate": 0.001, "loss": 1.9311, "step": 336056 }, { "epoch": 28.995169082125603, "grad_norm": 1.257922649383545, "learning_rate": 0.001, "loss": 1.9244, "step": 336112 }, { "epoch": 29.0, "grad_norm": 0.6742544174194336, "learning_rate": 0.001, "loss": 1.9182, "step": 336168 }, { "epoch": 29.004830917874397, "grad_norm": 1.009490728378296, "learning_rate": 0.001, "loss": 1.89, "step": 336224 }, { "epoch": 29.00966183574879, "grad_norm": 4.219653129577637, "learning_rate": 0.001, "loss": 1.8905, "step": 336280 }, { "epoch": 29.014492753623188, "grad_norm": 1.5191633701324463, "learning_rate": 0.001, "loss": 1.8821, "step": 336336 }, { "epoch": 29.019323671497585, "grad_norm": 3.083433151245117, "learning_rate": 0.001, "loss": 1.8838, "step": 336392 }, { "epoch": 29.02415458937198, "grad_norm": 1.6324265003204346, "learning_rate": 0.001, "loss": 1.8678, "step": 336448 }, { "epoch": 29.028985507246375, "grad_norm": 0.5845301151275635, "learning_rate": 0.001, "loss": 1.8991, "step": 336504 }, { "epoch": 29.033816425120772, "grad_norm": 1.7075315713882446, "learning_rate": 0.001, "loss": 1.891, "step": 336560 }, { "epoch": 29.03864734299517, "grad_norm": 1.9950648546218872, "learning_rate": 0.001, "loss": 1.8889, "step": 336616 }, { "epoch": 29.043478260869566, "grad_norm": 0.8414875864982605, "learning_rate": 0.001, "loss": 1.8891, "step": 336672 }, { "epoch": 29.04830917874396, "grad_norm": 3.040708065032959, "learning_rate": 0.001, "loss": 1.8794, "step": 336728 }, { "epoch": 29.053140096618357, "grad_norm": 3.2586257457733154, "learning_rate": 0.001, "loss": 1.8813, "step": 336784 }, { "epoch": 29.057971014492754, "grad_norm": 1.0472962856292725, "learning_rate": 0.001, "loss": 1.8951, "step": 336840 }, { "epoch": 29.06280193236715, "grad_norm": 0.5597095489501953, "learning_rate": 0.001, "loss": 1.8785, "step": 336896 }, { "epoch": 29.067632850241544, "grad_norm": 1.75657057762146, "learning_rate": 0.001, "loss": 1.8737, "step": 336952 }, { "epoch": 29.07246376811594, "grad_norm": 0.984176516532898, "learning_rate": 0.001, "loss": 1.8753, "step": 337008 }, { "epoch": 29.07729468599034, "grad_norm": 1.0896435976028442, "learning_rate": 0.001, "loss": 1.8789, "step": 337064 }, { "epoch": 29.082125603864736, "grad_norm": 2.123342514038086, "learning_rate": 0.001, "loss": 1.8962, "step": 337120 }, { "epoch": 29.08695652173913, "grad_norm": 2.675179958343506, "learning_rate": 0.001, "loss": 1.9052, "step": 337176 }, { "epoch": 29.091787439613526, "grad_norm": 2.3416190147399902, "learning_rate": 0.001, "loss": 1.9163, "step": 337232 }, { "epoch": 29.096618357487923, "grad_norm": 1.5199639797210693, "learning_rate": 0.001, "loss": 1.9049, "step": 337288 }, { "epoch": 29.10144927536232, "grad_norm": 1.455733060836792, "learning_rate": 0.001, "loss": 1.9034, "step": 337344 }, { "epoch": 29.106280193236714, "grad_norm": 2.525827407836914, "learning_rate": 0.001, "loss": 1.8952, "step": 337400 }, { "epoch": 29.11111111111111, "grad_norm": 1.190516710281372, "learning_rate": 0.001, "loss": 1.8971, "step": 337456 }, { "epoch": 29.115942028985508, "grad_norm": 2.2384257316589355, "learning_rate": 0.001, "loss": 1.9018, "step": 337512 }, { "epoch": 29.120772946859905, "grad_norm": 6.540494918823242, "learning_rate": 0.001, "loss": 1.9068, "step": 337568 }, { "epoch": 29.1256038647343, "grad_norm": 1.9433166980743408, "learning_rate": 0.001, "loss": 1.8958, "step": 337624 }, { "epoch": 29.130434782608695, "grad_norm": 11.21065616607666, "learning_rate": 0.001, "loss": 1.8962, "step": 337680 }, { "epoch": 29.135265700483092, "grad_norm": 1.9401861429214478, "learning_rate": 0.001, "loss": 1.906, "step": 337736 }, { "epoch": 29.14009661835749, "grad_norm": 3.240396022796631, "learning_rate": 0.001, "loss": 1.911, "step": 337792 }, { "epoch": 29.144927536231883, "grad_norm": 2.1199026107788086, "learning_rate": 0.001, "loss": 1.9171, "step": 337848 }, { "epoch": 29.14975845410628, "grad_norm": 3.307455539703369, "learning_rate": 0.001, "loss": 1.9178, "step": 337904 }, { "epoch": 29.154589371980677, "grad_norm": 1.1222668886184692, "learning_rate": 0.001, "loss": 1.9154, "step": 337960 }, { "epoch": 29.159420289855074, "grad_norm": 2.8536243438720703, "learning_rate": 0.001, "loss": 1.9155, "step": 338016 }, { "epoch": 29.164251207729468, "grad_norm": 1.1358243227005005, "learning_rate": 0.001, "loss": 1.9013, "step": 338072 }, { "epoch": 29.169082125603865, "grad_norm": 7.387932300567627, "learning_rate": 0.001, "loss": 1.896, "step": 338128 }, { "epoch": 29.17391304347826, "grad_norm": 2.918789863586426, "learning_rate": 0.001, "loss": 1.8959, "step": 338184 }, { "epoch": 29.17874396135266, "grad_norm": 2.7696633338928223, "learning_rate": 0.001, "loss": 1.897, "step": 338240 }, { "epoch": 29.183574879227052, "grad_norm": 1.3820921182632446, "learning_rate": 0.001, "loss": 1.9035, "step": 338296 }, { "epoch": 29.18840579710145, "grad_norm": 1.6911768913269043, "learning_rate": 0.001, "loss": 1.9121, "step": 338352 }, { "epoch": 29.193236714975846, "grad_norm": 2.3849222660064697, "learning_rate": 0.001, "loss": 1.9035, "step": 338408 }, { "epoch": 29.19806763285024, "grad_norm": 1.1593035459518433, "learning_rate": 0.001, "loss": 1.9072, "step": 338464 }, { "epoch": 29.202898550724637, "grad_norm": 0.4673869013786316, "learning_rate": 0.001, "loss": 1.894, "step": 338520 }, { "epoch": 29.207729468599034, "grad_norm": 2.011094808578491, "learning_rate": 0.001, "loss": 1.8922, "step": 338576 }, { "epoch": 29.21256038647343, "grad_norm": 2.9301528930664062, "learning_rate": 0.001, "loss": 1.8851, "step": 338632 }, { "epoch": 29.217391304347824, "grad_norm": 1.7181782722473145, "learning_rate": 0.001, "loss": 1.8776, "step": 338688 }, { "epoch": 29.22222222222222, "grad_norm": 2.089735507965088, "learning_rate": 0.001, "loss": 1.8859, "step": 338744 }, { "epoch": 29.22705314009662, "grad_norm": 2.2208869457244873, "learning_rate": 0.001, "loss": 1.8775, "step": 338800 }, { "epoch": 29.231884057971016, "grad_norm": 1.1865978240966797, "learning_rate": 0.001, "loss": 1.8889, "step": 338856 }, { "epoch": 29.23671497584541, "grad_norm": 1.8734606504440308, "learning_rate": 0.001, "loss": 1.8968, "step": 338912 }, { "epoch": 29.241545893719806, "grad_norm": 1.4250744581222534, "learning_rate": 0.001, "loss": 1.8793, "step": 338968 }, { "epoch": 29.246376811594203, "grad_norm": 1.6634395122528076, "learning_rate": 0.001, "loss": 1.8738, "step": 339024 }, { "epoch": 29.2512077294686, "grad_norm": 1.0095605850219727, "learning_rate": 0.001, "loss": 1.8843, "step": 339080 }, { "epoch": 29.256038647342994, "grad_norm": 1.2453047037124634, "learning_rate": 0.001, "loss": 1.8859, "step": 339136 }, { "epoch": 29.26086956521739, "grad_norm": 0.4766087830066681, "learning_rate": 0.001, "loss": 1.8914, "step": 339192 }, { "epoch": 29.265700483091788, "grad_norm": 1.6427762508392334, "learning_rate": 0.001, "loss": 1.8923, "step": 339248 }, { "epoch": 29.270531400966185, "grad_norm": 0.8533511161804199, "learning_rate": 0.001, "loss": 1.8973, "step": 339304 }, { "epoch": 29.27536231884058, "grad_norm": 1.0850225687026978, "learning_rate": 0.001, "loss": 1.8962, "step": 339360 }, { "epoch": 29.280193236714975, "grad_norm": 4.645824432373047, "learning_rate": 0.001, "loss": 1.8973, "step": 339416 }, { "epoch": 29.285024154589372, "grad_norm": 0.6815570592880249, "learning_rate": 0.001, "loss": 1.8943, "step": 339472 }, { "epoch": 29.28985507246377, "grad_norm": 0.5917956829071045, "learning_rate": 0.001, "loss": 1.8901, "step": 339528 }, { "epoch": 29.294685990338163, "grad_norm": 1.2189338207244873, "learning_rate": 0.001, "loss": 1.8901, "step": 339584 }, { "epoch": 29.29951690821256, "grad_norm": 0.3623257875442505, "learning_rate": 0.001, "loss": 1.8851, "step": 339640 }, { "epoch": 29.304347826086957, "grad_norm": 0.5104504823684692, "learning_rate": 0.001, "loss": 1.8856, "step": 339696 }, { "epoch": 29.309178743961354, "grad_norm": 1.8637456893920898, "learning_rate": 0.001, "loss": 1.8844, "step": 339752 }, { "epoch": 29.314009661835748, "grad_norm": 0.6654226779937744, "learning_rate": 0.001, "loss": 1.8839, "step": 339808 }, { "epoch": 29.318840579710145, "grad_norm": 0.8700190782546997, "learning_rate": 0.001, "loss": 1.8826, "step": 339864 }, { "epoch": 29.32367149758454, "grad_norm": 5.933652400970459, "learning_rate": 0.001, "loss": 1.8873, "step": 339920 }, { "epoch": 29.32850241545894, "grad_norm": 0.34447595477104187, "learning_rate": 0.001, "loss": 1.88, "step": 339976 }, { "epoch": 29.333333333333332, "grad_norm": 0.4928751587867737, "learning_rate": 0.001, "loss": 1.8805, "step": 340032 }, { "epoch": 29.33816425120773, "grad_norm": 0.8375958800315857, "learning_rate": 0.001, "loss": 1.8931, "step": 340088 }, { "epoch": 29.342995169082126, "grad_norm": 0.7392526268959045, "learning_rate": 0.001, "loss": 1.8935, "step": 340144 }, { "epoch": 29.347826086956523, "grad_norm": 1.9347178936004639, "learning_rate": 0.001, "loss": 1.8905, "step": 340200 }, { "epoch": 29.352657004830917, "grad_norm": 1.0659995079040527, "learning_rate": 0.001, "loss": 1.8916, "step": 340256 }, { "epoch": 29.357487922705314, "grad_norm": 2.6487209796905518, "learning_rate": 0.001, "loss": 1.8782, "step": 340312 }, { "epoch": 29.36231884057971, "grad_norm": 4.568159103393555, "learning_rate": 0.001, "loss": 1.8849, "step": 340368 }, { "epoch": 29.367149758454108, "grad_norm": 1.2360894680023193, "learning_rate": 0.001, "loss": 1.8862, "step": 340424 }, { "epoch": 29.3719806763285, "grad_norm": 0.7626190781593323, "learning_rate": 0.001, "loss": 1.8963, "step": 340480 }, { "epoch": 29.3768115942029, "grad_norm": 0.8645307421684265, "learning_rate": 0.001, "loss": 1.8933, "step": 340536 }, { "epoch": 29.381642512077295, "grad_norm": 3.376204013824463, "learning_rate": 0.001, "loss": 1.9055, "step": 340592 }, { "epoch": 29.386473429951693, "grad_norm": 1.0030955076217651, "learning_rate": 0.001, "loss": 1.8906, "step": 340648 }, { "epoch": 29.391304347826086, "grad_norm": 1.2015458345413208, "learning_rate": 0.001, "loss": 1.8926, "step": 340704 }, { "epoch": 29.396135265700483, "grad_norm": 0.8266012668609619, "learning_rate": 0.001, "loss": 1.9014, "step": 340760 }, { "epoch": 29.40096618357488, "grad_norm": 1.9911928176879883, "learning_rate": 0.001, "loss": 1.9034, "step": 340816 }, { "epoch": 29.405797101449274, "grad_norm": 1.5426700115203857, "learning_rate": 0.001, "loss": 1.9141, "step": 340872 }, { "epoch": 29.41062801932367, "grad_norm": 2.6662533283233643, "learning_rate": 0.001, "loss": 1.9073, "step": 340928 }, { "epoch": 29.415458937198068, "grad_norm": 0.5447232723236084, "learning_rate": 0.001, "loss": 1.9055, "step": 340984 }, { "epoch": 29.420289855072465, "grad_norm": 1.1376993656158447, "learning_rate": 0.001, "loss": 1.903, "step": 341040 }, { "epoch": 29.42512077294686, "grad_norm": 0.9387388825416565, "learning_rate": 0.001, "loss": 1.9141, "step": 341096 }, { "epoch": 29.429951690821255, "grad_norm": 1.7440056800842285, "learning_rate": 0.001, "loss": 1.9295, "step": 341152 }, { "epoch": 29.434782608695652, "grad_norm": 16.031557083129883, "learning_rate": 0.001, "loss": 1.923, "step": 341208 }, { "epoch": 29.43961352657005, "grad_norm": 1.6006251573562622, "learning_rate": 0.001, "loss": 1.9351, "step": 341264 }, { "epoch": 29.444444444444443, "grad_norm": 1.1466755867004395, "learning_rate": 0.001, "loss": 1.9498, "step": 341320 }, { "epoch": 29.44927536231884, "grad_norm": 1.0718761682510376, "learning_rate": 0.001, "loss": 1.9389, "step": 341376 }, { "epoch": 29.454106280193237, "grad_norm": 2.1162822246551514, "learning_rate": 0.001, "loss": 1.9265, "step": 341432 }, { "epoch": 29.458937198067634, "grad_norm": 1.7780194282531738, "learning_rate": 0.001, "loss": 1.9323, "step": 341488 }, { "epoch": 29.463768115942027, "grad_norm": 1.4814939498901367, "learning_rate": 0.001, "loss": 1.926, "step": 341544 }, { "epoch": 29.468599033816425, "grad_norm": 0.4262562394142151, "learning_rate": 0.001, "loss": 1.9275, "step": 341600 }, { "epoch": 29.47342995169082, "grad_norm": 1.196614146232605, "learning_rate": 0.001, "loss": 1.912, "step": 341656 }, { "epoch": 29.47826086956522, "grad_norm": 1.90091872215271, "learning_rate": 0.001, "loss": 1.9183, "step": 341712 }, { "epoch": 29.483091787439612, "grad_norm": 8.82557487487793, "learning_rate": 0.001, "loss": 1.9232, "step": 341768 }, { "epoch": 29.48792270531401, "grad_norm": 1.1036425828933716, "learning_rate": 0.001, "loss": 1.9266, "step": 341824 }, { "epoch": 29.492753623188406, "grad_norm": 2.078120470046997, "learning_rate": 0.001, "loss": 1.9324, "step": 341880 }, { "epoch": 29.497584541062803, "grad_norm": 2.0313286781311035, "learning_rate": 0.001, "loss": 1.9103, "step": 341936 }, { "epoch": 29.502415458937197, "grad_norm": 1.5176509618759155, "learning_rate": 0.001, "loss": 1.9092, "step": 341992 }, { "epoch": 29.507246376811594, "grad_norm": 1.3919011354446411, "learning_rate": 0.001, "loss": 1.903, "step": 342048 }, { "epoch": 29.51207729468599, "grad_norm": 3.533022165298462, "learning_rate": 0.001, "loss": 1.9039, "step": 342104 }, { "epoch": 29.516908212560388, "grad_norm": 3.4404382705688477, "learning_rate": 0.001, "loss": 1.8989, "step": 342160 }, { "epoch": 29.52173913043478, "grad_norm": 2.2579283714294434, "learning_rate": 0.001, "loss": 1.9151, "step": 342216 }, { "epoch": 29.52657004830918, "grad_norm": 0.9830791354179382, "learning_rate": 0.001, "loss": 1.9163, "step": 342272 }, { "epoch": 29.531400966183575, "grad_norm": 0.9636391401290894, "learning_rate": 0.001, "loss": 1.9351, "step": 342328 }, { "epoch": 29.536231884057973, "grad_norm": 0.5918301343917847, "learning_rate": 0.001, "loss": 1.9315, "step": 342384 }, { "epoch": 29.541062801932366, "grad_norm": 0.9615982174873352, "learning_rate": 0.001, "loss": 1.9238, "step": 342440 }, { "epoch": 29.545893719806763, "grad_norm": 1.8380165100097656, "learning_rate": 0.001, "loss": 1.9444, "step": 342496 }, { "epoch": 29.55072463768116, "grad_norm": 1.1342823505401611, "learning_rate": 0.001, "loss": 1.9493, "step": 342552 }, { "epoch": 29.555555555555557, "grad_norm": 1.566544771194458, "learning_rate": 0.001, "loss": 1.946, "step": 342608 }, { "epoch": 29.56038647342995, "grad_norm": 1.7683236598968506, "learning_rate": 0.001, "loss": 1.9402, "step": 342664 }, { "epoch": 29.565217391304348, "grad_norm": 1.1402480602264404, "learning_rate": 0.001, "loss": 1.9474, "step": 342720 }, { "epoch": 29.570048309178745, "grad_norm": 2.59822678565979, "learning_rate": 0.001, "loss": 1.9611, "step": 342776 }, { "epoch": 29.57487922705314, "grad_norm": 3.0990357398986816, "learning_rate": 0.001, "loss": 1.9721, "step": 342832 }, { "epoch": 29.579710144927535, "grad_norm": 1.9563204050064087, "learning_rate": 0.001, "loss": 1.95, "step": 342888 }, { "epoch": 29.584541062801932, "grad_norm": 0.6378768086433411, "learning_rate": 0.001, "loss": 1.9502, "step": 342944 }, { "epoch": 29.58937198067633, "grad_norm": 1.0656732320785522, "learning_rate": 0.001, "loss": 1.9313, "step": 343000 }, { "epoch": 29.594202898550726, "grad_norm": 1.0148881673812866, "learning_rate": 0.001, "loss": 1.9236, "step": 343056 }, { "epoch": 29.59903381642512, "grad_norm": 1.0317550897598267, "learning_rate": 0.001, "loss": 1.9341, "step": 343112 }, { "epoch": 29.603864734299517, "grad_norm": 0.7687577605247498, "learning_rate": 0.001, "loss": 1.9461, "step": 343168 }, { "epoch": 29.608695652173914, "grad_norm": 4.537657737731934, "learning_rate": 0.001, "loss": 1.9362, "step": 343224 }, { "epoch": 29.613526570048307, "grad_norm": 0.9439715147018433, "learning_rate": 0.001, "loss": 1.9295, "step": 343280 }, { "epoch": 29.618357487922705, "grad_norm": 0.8581864833831787, "learning_rate": 0.001, "loss": 1.9238, "step": 343336 }, { "epoch": 29.6231884057971, "grad_norm": 2.6081976890563965, "learning_rate": 0.001, "loss": 1.9165, "step": 343392 }, { "epoch": 29.6280193236715, "grad_norm": 1.3460050821304321, "learning_rate": 0.001, "loss": 1.9331, "step": 343448 }, { "epoch": 29.632850241545892, "grad_norm": 1.081058382987976, "learning_rate": 0.001, "loss": 1.9358, "step": 343504 }, { "epoch": 29.63768115942029, "grad_norm": 0.7841014862060547, "learning_rate": 0.001, "loss": 1.9408, "step": 343560 }, { "epoch": 29.642512077294686, "grad_norm": 1.4917643070220947, "learning_rate": 0.001, "loss": 1.9363, "step": 343616 }, { "epoch": 29.647342995169083, "grad_norm": 1.572274923324585, "learning_rate": 0.001, "loss": 1.9367, "step": 343672 }, { "epoch": 29.652173913043477, "grad_norm": 0.934330403804779, "learning_rate": 0.001, "loss": 1.9428, "step": 343728 }, { "epoch": 29.657004830917874, "grad_norm": 0.8813834190368652, "learning_rate": 0.001, "loss": 1.9216, "step": 343784 }, { "epoch": 29.66183574879227, "grad_norm": 2.107917308807373, "learning_rate": 0.001, "loss": 1.9186, "step": 343840 }, { "epoch": 29.666666666666668, "grad_norm": 1.0549782514572144, "learning_rate": 0.001, "loss": 1.9178, "step": 343896 }, { "epoch": 29.67149758454106, "grad_norm": 1.2666670083999634, "learning_rate": 0.001, "loss": 1.9194, "step": 343952 }, { "epoch": 29.67632850241546, "grad_norm": 1.4690303802490234, "learning_rate": 0.001, "loss": 1.9263, "step": 344008 }, { "epoch": 29.681159420289855, "grad_norm": 1.3744804859161377, "learning_rate": 0.001, "loss": 1.9344, "step": 344064 }, { "epoch": 29.685990338164252, "grad_norm": 1.8718572854995728, "learning_rate": 0.001, "loss": 1.9297, "step": 344120 }, { "epoch": 29.690821256038646, "grad_norm": 12.43152904510498, "learning_rate": 0.001, "loss": 1.9405, "step": 344176 }, { "epoch": 29.695652173913043, "grad_norm": 0.6862295866012573, "learning_rate": 0.001, "loss": 1.9238, "step": 344232 }, { "epoch": 29.70048309178744, "grad_norm": 0.7376891374588013, "learning_rate": 0.001, "loss": 1.9292, "step": 344288 }, { "epoch": 29.705314009661837, "grad_norm": 0.7916767001152039, "learning_rate": 0.001, "loss": 1.9189, "step": 344344 }, { "epoch": 29.71014492753623, "grad_norm": 0.36574843525886536, "learning_rate": 0.001, "loss": 1.9151, "step": 344400 }, { "epoch": 29.714975845410628, "grad_norm": 11.602384567260742, "learning_rate": 0.001, "loss": 1.9124, "step": 344456 }, { "epoch": 29.719806763285025, "grad_norm": 1.1952251195907593, "learning_rate": 0.001, "loss": 1.9138, "step": 344512 }, { "epoch": 29.72463768115942, "grad_norm": 1.2968500852584839, "learning_rate": 0.001, "loss": 1.9076, "step": 344568 }, { "epoch": 29.729468599033815, "grad_norm": 1.144452452659607, "learning_rate": 0.001, "loss": 1.9113, "step": 344624 }, { "epoch": 29.734299516908212, "grad_norm": 0.7884884476661682, "learning_rate": 0.001, "loss": 1.9126, "step": 344680 }, { "epoch": 29.73913043478261, "grad_norm": 0.8450772762298584, "learning_rate": 0.001, "loss": 1.9022, "step": 344736 }, { "epoch": 29.743961352657006, "grad_norm": 1.585148572921753, "learning_rate": 0.001, "loss": 1.9075, "step": 344792 }, { "epoch": 29.7487922705314, "grad_norm": 0.9838790893554688, "learning_rate": 0.001, "loss": 1.8963, "step": 344848 }, { "epoch": 29.753623188405797, "grad_norm": 1.0778287649154663, "learning_rate": 0.001, "loss": 1.9072, "step": 344904 }, { "epoch": 29.758454106280194, "grad_norm": 1.2693387269973755, "learning_rate": 0.001, "loss": 1.9076, "step": 344960 }, { "epoch": 29.76328502415459, "grad_norm": 0.7274543642997742, "learning_rate": 0.001, "loss": 1.9024, "step": 345016 }, { "epoch": 29.768115942028984, "grad_norm": 0.8931019306182861, "learning_rate": 0.001, "loss": 1.8958, "step": 345072 }, { "epoch": 29.77294685990338, "grad_norm": 0.5365729928016663, "learning_rate": 0.001, "loss": 1.9005, "step": 345128 }, { "epoch": 29.77777777777778, "grad_norm": 1.9573228359222412, "learning_rate": 0.001, "loss": 1.8966, "step": 345184 }, { "epoch": 29.782608695652176, "grad_norm": 3.15415620803833, "learning_rate": 0.001, "loss": 1.8948, "step": 345240 }, { "epoch": 29.78743961352657, "grad_norm": 0.9425359964370728, "learning_rate": 0.001, "loss": 1.89, "step": 345296 }, { "epoch": 29.792270531400966, "grad_norm": 2.3333144187927246, "learning_rate": 0.001, "loss": 1.9043, "step": 345352 }, { "epoch": 29.797101449275363, "grad_norm": 0.5949890613555908, "learning_rate": 0.001, "loss": 1.8923, "step": 345408 }, { "epoch": 29.80193236714976, "grad_norm": 2.0462820529937744, "learning_rate": 0.001, "loss": 1.8947, "step": 345464 }, { "epoch": 29.806763285024154, "grad_norm": 1.4434350728988647, "learning_rate": 0.001, "loss": 1.8851, "step": 345520 }, { "epoch": 29.81159420289855, "grad_norm": 0.3378339111804962, "learning_rate": 0.001, "loss": 1.9045, "step": 345576 }, { "epoch": 29.816425120772948, "grad_norm": 0.5433062314987183, "learning_rate": 0.001, "loss": 1.8994, "step": 345632 }, { "epoch": 29.82125603864734, "grad_norm": 0.6677520871162415, "learning_rate": 0.001, "loss": 1.8991, "step": 345688 }, { "epoch": 29.82608695652174, "grad_norm": 1.1760334968566895, "learning_rate": 0.001, "loss": 1.8932, "step": 345744 }, { "epoch": 29.830917874396135, "grad_norm": 1.9096678495407104, "learning_rate": 0.001, "loss": 1.8949, "step": 345800 }, { "epoch": 29.835748792270532, "grad_norm": 0.9076570272445679, "learning_rate": 0.001, "loss": 1.8872, "step": 345856 }, { "epoch": 29.840579710144926, "grad_norm": 1.4037699699401855, "learning_rate": 0.001, "loss": 1.9043, "step": 345912 }, { "epoch": 29.845410628019323, "grad_norm": 1.0574721097946167, "learning_rate": 0.001, "loss": 1.9002, "step": 345968 }, { "epoch": 29.85024154589372, "grad_norm": 0.27883023023605347, "learning_rate": 0.001, "loss": 1.9067, "step": 346024 }, { "epoch": 29.855072463768117, "grad_norm": 2.7373135089874268, "learning_rate": 0.001, "loss": 1.9032, "step": 346080 }, { "epoch": 29.85990338164251, "grad_norm": 14.10302448272705, "learning_rate": 0.001, "loss": 1.9102, "step": 346136 }, { "epoch": 29.864734299516908, "grad_norm": 1.1213552951812744, "learning_rate": 0.001, "loss": 1.9097, "step": 346192 }, { "epoch": 29.869565217391305, "grad_norm": 0.5022969841957092, "learning_rate": 0.001, "loss": 1.9086, "step": 346248 }, { "epoch": 29.8743961352657, "grad_norm": 0.975473165512085, "learning_rate": 0.001, "loss": 1.9082, "step": 346304 }, { "epoch": 29.879227053140095, "grad_norm": 1.0287539958953857, "learning_rate": 0.001, "loss": 1.9083, "step": 346360 }, { "epoch": 29.884057971014492, "grad_norm": 2.4628407955169678, "learning_rate": 0.001, "loss": 1.9065, "step": 346416 }, { "epoch": 29.88888888888889, "grad_norm": 0.8880907297134399, "learning_rate": 0.001, "loss": 1.9124, "step": 346472 }, { "epoch": 29.893719806763286, "grad_norm": 0.37082454562187195, "learning_rate": 0.001, "loss": 1.9023, "step": 346528 }, { "epoch": 29.89855072463768, "grad_norm": 0.33981502056121826, "learning_rate": 0.001, "loss": 1.9131, "step": 346584 }, { "epoch": 29.903381642512077, "grad_norm": 0.43468859791755676, "learning_rate": 0.001, "loss": 1.9056, "step": 346640 }, { "epoch": 29.908212560386474, "grad_norm": 0.5177057981491089, "learning_rate": 0.001, "loss": 1.9109, "step": 346696 }, { "epoch": 29.91304347826087, "grad_norm": 1.4313410520553589, "learning_rate": 0.001, "loss": 1.9183, "step": 346752 }, { "epoch": 29.917874396135264, "grad_norm": 0.34012916684150696, "learning_rate": 0.001, "loss": 1.9172, "step": 346808 }, { "epoch": 29.92270531400966, "grad_norm": 0.27256232500076294, "learning_rate": 0.001, "loss": 1.9007, "step": 346864 }, { "epoch": 29.92753623188406, "grad_norm": 0.3913356363773346, "learning_rate": 0.001, "loss": 1.8969, "step": 346920 }, { "epoch": 29.932367149758456, "grad_norm": 1.646673560142517, "learning_rate": 0.001, "loss": 1.9061, "step": 346976 }, { "epoch": 29.93719806763285, "grad_norm": 0.2729713022708893, "learning_rate": 0.001, "loss": 1.9055, "step": 347032 }, { "epoch": 29.942028985507246, "grad_norm": 1.602040410041809, "learning_rate": 0.001, "loss": 1.8903, "step": 347088 }, { "epoch": 29.946859903381643, "grad_norm": 0.4484712481498718, "learning_rate": 0.001, "loss": 1.8906, "step": 347144 }, { "epoch": 29.95169082125604, "grad_norm": 0.33455437421798706, "learning_rate": 0.001, "loss": 1.9013, "step": 347200 }, { "epoch": 29.956521739130434, "grad_norm": 0.701033890247345, "learning_rate": 0.001, "loss": 1.895, "step": 347256 }, { "epoch": 29.96135265700483, "grad_norm": 0.772212564945221, "learning_rate": 0.001, "loss": 1.8993, "step": 347312 }, { "epoch": 29.966183574879228, "grad_norm": 1.3854421377182007, "learning_rate": 0.001, "loss": 1.9001, "step": 347368 }, { "epoch": 29.971014492753625, "grad_norm": 2.3350648880004883, "learning_rate": 0.001, "loss": 1.8997, "step": 347424 }, { "epoch": 29.97584541062802, "grad_norm": 2.164705276489258, "learning_rate": 0.001, "loss": 1.8952, "step": 347480 }, { "epoch": 29.980676328502415, "grad_norm": 1.2127679586410522, "learning_rate": 0.001, "loss": 1.8991, "step": 347536 }, { "epoch": 29.985507246376812, "grad_norm": 0.6176809072494507, "learning_rate": 0.001, "loss": 1.8921, "step": 347592 }, { "epoch": 29.990338164251206, "grad_norm": 1.0814037322998047, "learning_rate": 0.001, "loss": 1.8922, "step": 347648 }, { "epoch": 29.995169082125603, "grad_norm": 0.7046548128128052, "learning_rate": 0.001, "loss": 1.8862, "step": 347704 }, { "epoch": 30.0, "grad_norm": 1.102046012878418, "learning_rate": 0.001, "loss": 1.9038, "step": 347760 }, { "epoch": 30.004830917874397, "grad_norm": 1.6245622634887695, "learning_rate": 0.001, "loss": 1.8512, "step": 347816 }, { "epoch": 30.00966183574879, "grad_norm": 0.7782652378082275, "learning_rate": 0.001, "loss": 1.8603, "step": 347872 }, { "epoch": 30.014492753623188, "grad_norm": 0.34809496998786926, "learning_rate": 0.001, "loss": 1.8611, "step": 347928 }, { "epoch": 30.019323671497585, "grad_norm": 1.3345814943313599, "learning_rate": 0.001, "loss": 1.8578, "step": 347984 }, { "epoch": 30.02415458937198, "grad_norm": 1.8830335140228271, "learning_rate": 0.001, "loss": 1.862, "step": 348040 }, { "epoch": 30.028985507246375, "grad_norm": 1.0652685165405273, "learning_rate": 0.001, "loss": 1.8553, "step": 348096 }, { "epoch": 30.033816425120772, "grad_norm": 0.6066794395446777, "learning_rate": 0.001, "loss": 1.8621, "step": 348152 }, { "epoch": 30.03864734299517, "grad_norm": 2.4736759662628174, "learning_rate": 0.001, "loss": 1.8608, "step": 348208 }, { "epoch": 30.043478260869566, "grad_norm": 6.71629524230957, "learning_rate": 0.001, "loss": 1.8629, "step": 348264 }, { "epoch": 30.04830917874396, "grad_norm": 2.431619882583618, "learning_rate": 0.001, "loss": 1.8613, "step": 348320 }, { "epoch": 30.053140096618357, "grad_norm": 2.8606183528900146, "learning_rate": 0.001, "loss": 1.861, "step": 348376 }, { "epoch": 30.057971014492754, "grad_norm": 0.47667524218559265, "learning_rate": 0.001, "loss": 1.8499, "step": 348432 }, { "epoch": 30.06280193236715, "grad_norm": 0.8094444274902344, "learning_rate": 0.001, "loss": 1.8586, "step": 348488 }, { "epoch": 30.067632850241544, "grad_norm": 0.7716014385223389, "learning_rate": 0.001, "loss": 1.8532, "step": 348544 }, { "epoch": 30.07246376811594, "grad_norm": 4.095464706420898, "learning_rate": 0.001, "loss": 1.872, "step": 348600 }, { "epoch": 30.07729468599034, "grad_norm": 8.32016658782959, "learning_rate": 0.001, "loss": 1.8802, "step": 348656 }, { "epoch": 30.082125603864736, "grad_norm": 1.7004462480545044, "learning_rate": 0.001, "loss": 1.8829, "step": 348712 }, { "epoch": 30.08695652173913, "grad_norm": 0.6757870316505432, "learning_rate": 0.001, "loss": 1.8762, "step": 348768 }, { "epoch": 30.091787439613526, "grad_norm": 0.509045422077179, "learning_rate": 0.001, "loss": 1.8807, "step": 348824 }, { "epoch": 30.096618357487923, "grad_norm": 0.9797523617744446, "learning_rate": 0.001, "loss": 1.8801, "step": 348880 }, { "epoch": 30.10144927536232, "grad_norm": 0.5945743322372437, "learning_rate": 0.001, "loss": 1.8641, "step": 348936 }, { "epoch": 30.106280193236714, "grad_norm": 5.183482646942139, "learning_rate": 0.001, "loss": 1.8767, "step": 348992 }, { "epoch": 30.11111111111111, "grad_norm": 1.6931238174438477, "learning_rate": 0.001, "loss": 1.8744, "step": 349048 }, { "epoch": 30.115942028985508, "grad_norm": 0.5609611868858337, "learning_rate": 0.001, "loss": 1.868, "step": 349104 }, { "epoch": 30.120772946859905, "grad_norm": 1.3362735509872437, "learning_rate": 0.001, "loss": 1.8645, "step": 349160 }, { "epoch": 30.1256038647343, "grad_norm": 0.9575352072715759, "learning_rate": 0.001, "loss": 1.87, "step": 349216 }, { "epoch": 30.130434782608695, "grad_norm": 0.5428534746170044, "learning_rate": 0.001, "loss": 1.8706, "step": 349272 }, { "epoch": 30.135265700483092, "grad_norm": 1.0157462358474731, "learning_rate": 0.001, "loss": 1.8687, "step": 349328 }, { "epoch": 30.14009661835749, "grad_norm": 0.9258418679237366, "learning_rate": 0.001, "loss": 1.8704, "step": 349384 }, { "epoch": 30.144927536231883, "grad_norm": 0.5394553542137146, "learning_rate": 0.001, "loss": 1.872, "step": 349440 }, { "epoch": 30.14975845410628, "grad_norm": 0.42806872725486755, "learning_rate": 0.001, "loss": 1.8705, "step": 349496 }, { "epoch": 30.154589371980677, "grad_norm": 0.7224445939064026, "learning_rate": 0.001, "loss": 1.8718, "step": 349552 }, { "epoch": 30.159420289855074, "grad_norm": 3.6242918968200684, "learning_rate": 0.001, "loss": 1.8635, "step": 349608 }, { "epoch": 30.164251207729468, "grad_norm": 1.0771609544754028, "learning_rate": 0.001, "loss": 1.8763, "step": 349664 }, { "epoch": 30.169082125603865, "grad_norm": 3.3896090984344482, "learning_rate": 0.001, "loss": 1.868, "step": 349720 }, { "epoch": 30.17391304347826, "grad_norm": 0.9373986124992371, "learning_rate": 0.001, "loss": 1.8735, "step": 349776 }, { "epoch": 30.17874396135266, "grad_norm": 0.6278557777404785, "learning_rate": 0.001, "loss": 1.8742, "step": 349832 }, { "epoch": 30.183574879227052, "grad_norm": 3.8788821697235107, "learning_rate": 0.001, "loss": 1.8658, "step": 349888 }, { "epoch": 30.18840579710145, "grad_norm": 2.0766570568084717, "learning_rate": 0.001, "loss": 1.8635, "step": 349944 }, { "epoch": 30.193236714975846, "grad_norm": 0.5334764719009399, "learning_rate": 0.001, "loss": 1.8727, "step": 350000 }, { "epoch": 30.19806763285024, "grad_norm": 1.8483541011810303, "learning_rate": 0.001, "loss": 1.8751, "step": 350056 }, { "epoch": 30.202898550724637, "grad_norm": 0.4267479479312897, "learning_rate": 0.001, "loss": 1.869, "step": 350112 }, { "epoch": 30.207729468599034, "grad_norm": 1.000412940979004, "learning_rate": 0.001, "loss": 1.8864, "step": 350168 }, { "epoch": 30.21256038647343, "grad_norm": 0.4552321135997772, "learning_rate": 0.001, "loss": 1.8743, "step": 350224 }, { "epoch": 30.217391304347824, "grad_norm": 1.0496025085449219, "learning_rate": 0.001, "loss": 1.885, "step": 350280 }, { "epoch": 30.22222222222222, "grad_norm": 1.0267835855484009, "learning_rate": 0.001, "loss": 1.8797, "step": 350336 }, { "epoch": 30.22705314009662, "grad_norm": 9.707250595092773, "learning_rate": 0.001, "loss": 1.8805, "step": 350392 }, { "epoch": 30.231884057971016, "grad_norm": 3.1955811977386475, "learning_rate": 0.001, "loss": 1.8958, "step": 350448 }, { "epoch": 30.23671497584541, "grad_norm": 1.7350873947143555, "learning_rate": 0.001, "loss": 1.8834, "step": 350504 }, { "epoch": 30.241545893719806, "grad_norm": 2.84063720703125, "learning_rate": 0.001, "loss": 1.8783, "step": 350560 }, { "epoch": 30.246376811594203, "grad_norm": 1.4558125734329224, "learning_rate": 0.001, "loss": 1.873, "step": 350616 }, { "epoch": 30.2512077294686, "grad_norm": 0.7567052245140076, "learning_rate": 0.001, "loss": 1.8766, "step": 350672 }, { "epoch": 30.256038647342994, "grad_norm": 5.560967445373535, "learning_rate": 0.001, "loss": 1.8841, "step": 350728 }, { "epoch": 30.26086956521739, "grad_norm": 2.511531352996826, "learning_rate": 0.001, "loss": 1.8727, "step": 350784 }, { "epoch": 30.265700483091788, "grad_norm": 0.6153662204742432, "learning_rate": 0.001, "loss": 1.8749, "step": 350840 }, { "epoch": 30.270531400966185, "grad_norm": 3.1324899196624756, "learning_rate": 0.001, "loss": 1.8813, "step": 350896 }, { "epoch": 30.27536231884058, "grad_norm": 1.6099321842193604, "learning_rate": 0.001, "loss": 1.8859, "step": 350952 }, { "epoch": 30.280193236714975, "grad_norm": 1.5619922876358032, "learning_rate": 0.001, "loss": 1.8909, "step": 351008 }, { "epoch": 30.285024154589372, "grad_norm": 1.7167088985443115, "learning_rate": 0.001, "loss": 1.8909, "step": 351064 }, { "epoch": 30.28985507246377, "grad_norm": 5.756916046142578, "learning_rate": 0.001, "loss": 1.8754, "step": 351120 }, { "epoch": 30.294685990338163, "grad_norm": 1.00927734375, "learning_rate": 0.001, "loss": 1.8785, "step": 351176 }, { "epoch": 30.29951690821256, "grad_norm": 1.4379228353500366, "learning_rate": 0.001, "loss": 1.8862, "step": 351232 }, { "epoch": 30.304347826086957, "grad_norm": 2.2918591499328613, "learning_rate": 0.001, "loss": 1.8808, "step": 351288 }, { "epoch": 30.309178743961354, "grad_norm": 0.5946013927459717, "learning_rate": 0.001, "loss": 1.8678, "step": 351344 }, { "epoch": 30.314009661835748, "grad_norm": 0.697689950466156, "learning_rate": 0.001, "loss": 1.874, "step": 351400 }, { "epoch": 30.318840579710145, "grad_norm": 1.8963210582733154, "learning_rate": 0.001, "loss": 1.8764, "step": 351456 }, { "epoch": 30.32367149758454, "grad_norm": 0.5150372982025146, "learning_rate": 0.001, "loss": 1.8758, "step": 351512 }, { "epoch": 30.32850241545894, "grad_norm": 2.926867961883545, "learning_rate": 0.001, "loss": 1.8891, "step": 351568 }, { "epoch": 30.333333333333332, "grad_norm": 0.6492698192596436, "learning_rate": 0.001, "loss": 1.8819, "step": 351624 }, { "epoch": 30.33816425120773, "grad_norm": 0.7006253600120544, "learning_rate": 0.001, "loss": 1.9088, "step": 351680 }, { "epoch": 30.342995169082126, "grad_norm": 0.3250187933444977, "learning_rate": 0.001, "loss": 1.9126, "step": 351736 }, { "epoch": 30.347826086956523, "grad_norm": 0.2729688584804535, "learning_rate": 0.001, "loss": 1.8987, "step": 351792 }, { "epoch": 30.352657004830917, "grad_norm": 1.5226668119430542, "learning_rate": 0.001, "loss": 1.8869, "step": 351848 }, { "epoch": 30.357487922705314, "grad_norm": 1.19586980342865, "learning_rate": 0.001, "loss": 1.8827, "step": 351904 }, { "epoch": 30.36231884057971, "grad_norm": 3.7458693981170654, "learning_rate": 0.001, "loss": 1.8731, "step": 351960 }, { "epoch": 30.367149758454108, "grad_norm": 5.488990306854248, "learning_rate": 0.001, "loss": 1.8755, "step": 352016 }, { "epoch": 30.3719806763285, "grad_norm": 0.9908542633056641, "learning_rate": 0.001, "loss": 1.8871, "step": 352072 }, { "epoch": 30.3768115942029, "grad_norm": 0.4259231686592102, "learning_rate": 0.001, "loss": 1.8784, "step": 352128 }, { "epoch": 30.381642512077295, "grad_norm": 0.45330068469047546, "learning_rate": 0.001, "loss": 1.8681, "step": 352184 }, { "epoch": 30.386473429951693, "grad_norm": 0.4506126642227173, "learning_rate": 0.001, "loss": 1.879, "step": 352240 }, { "epoch": 30.391304347826086, "grad_norm": 0.3546209931373596, "learning_rate": 0.001, "loss": 1.8691, "step": 352296 }, { "epoch": 30.396135265700483, "grad_norm": 0.3133753836154938, "learning_rate": 0.001, "loss": 1.8756, "step": 352352 }, { "epoch": 30.40096618357488, "grad_norm": 2.1002237796783447, "learning_rate": 0.001, "loss": 1.8736, "step": 352408 }, { "epoch": 30.405797101449274, "grad_norm": 0.5384484529495239, "learning_rate": 0.001, "loss": 1.8731, "step": 352464 }, { "epoch": 30.41062801932367, "grad_norm": 0.9713990092277527, "learning_rate": 0.001, "loss": 1.865, "step": 352520 }, { "epoch": 30.415458937198068, "grad_norm": 0.6886662244796753, "learning_rate": 0.001, "loss": 1.87, "step": 352576 }, { "epoch": 30.420289855072465, "grad_norm": 3.8965535163879395, "learning_rate": 0.001, "loss": 1.8748, "step": 352632 }, { "epoch": 30.42512077294686, "grad_norm": 0.39110586047172546, "learning_rate": 0.001, "loss": 1.8769, "step": 352688 }, { "epoch": 30.429951690821255, "grad_norm": 0.5718410611152649, "learning_rate": 0.001, "loss": 1.896, "step": 352744 }, { "epoch": 30.434782608695652, "grad_norm": 1.0877954959869385, "learning_rate": 0.001, "loss": 1.905, "step": 352800 }, { "epoch": 30.43961352657005, "grad_norm": 0.6660036444664001, "learning_rate": 0.001, "loss": 1.9195, "step": 352856 }, { "epoch": 30.444444444444443, "grad_norm": 1.493395447731018, "learning_rate": 0.001, "loss": 1.911, "step": 352912 }, { "epoch": 30.44927536231884, "grad_norm": 0.8233058452606201, "learning_rate": 0.001, "loss": 1.8915, "step": 352968 }, { "epoch": 30.454106280193237, "grad_norm": 1.0065187215805054, "learning_rate": 0.001, "loss": 1.9017, "step": 353024 }, { "epoch": 30.458937198067634, "grad_norm": 1.1533045768737793, "learning_rate": 0.001, "loss": 1.899, "step": 353080 }, { "epoch": 30.463768115942027, "grad_norm": 0.5408811569213867, "learning_rate": 0.001, "loss": 1.8964, "step": 353136 }, { "epoch": 30.468599033816425, "grad_norm": 2.091660499572754, "learning_rate": 0.001, "loss": 1.8864, "step": 353192 }, { "epoch": 30.47342995169082, "grad_norm": 1.5549612045288086, "learning_rate": 0.001, "loss": 1.8882, "step": 353248 }, { "epoch": 30.47826086956522, "grad_norm": 3.646867036819458, "learning_rate": 0.001, "loss": 1.8833, "step": 353304 }, { "epoch": 30.483091787439612, "grad_norm": 0.6905511021614075, "learning_rate": 0.001, "loss": 1.8953, "step": 353360 }, { "epoch": 30.48792270531401, "grad_norm": 7.458583354949951, "learning_rate": 0.001, "loss": 1.8971, "step": 353416 }, { "epoch": 30.492753623188406, "grad_norm": 3.6840155124664307, "learning_rate": 0.001, "loss": 1.8882, "step": 353472 }, { "epoch": 30.497584541062803, "grad_norm": 1.934524655342102, "learning_rate": 0.001, "loss": 1.8886, "step": 353528 }, { "epoch": 30.502415458937197, "grad_norm": 0.48761430382728577, "learning_rate": 0.001, "loss": 1.9019, "step": 353584 }, { "epoch": 30.507246376811594, "grad_norm": 1.849694013595581, "learning_rate": 0.001, "loss": 1.8822, "step": 353640 }, { "epoch": 30.51207729468599, "grad_norm": 1.0457483530044556, "learning_rate": 0.001, "loss": 1.8803, "step": 353696 }, { "epoch": 30.516908212560388, "grad_norm": 19.32866096496582, "learning_rate": 0.001, "loss": 1.8894, "step": 353752 }, { "epoch": 30.52173913043478, "grad_norm": 1.1918922662734985, "learning_rate": 0.001, "loss": 1.8969, "step": 353808 }, { "epoch": 30.52657004830918, "grad_norm": 0.8435972332954407, "learning_rate": 0.001, "loss": 1.8975, "step": 353864 }, { "epoch": 30.531400966183575, "grad_norm": 2.300915479660034, "learning_rate": 0.001, "loss": 1.9051, "step": 353920 }, { "epoch": 30.536231884057973, "grad_norm": 1.3580238819122314, "learning_rate": 0.001, "loss": 1.9022, "step": 353976 }, { "epoch": 30.541062801932366, "grad_norm": 1.833304524421692, "learning_rate": 0.001, "loss": 1.9031, "step": 354032 }, { "epoch": 30.545893719806763, "grad_norm": 1.482807993888855, "learning_rate": 0.001, "loss": 1.9081, "step": 354088 }, { "epoch": 30.55072463768116, "grad_norm": 1.5001224279403687, "learning_rate": 0.001, "loss": 1.9025, "step": 354144 }, { "epoch": 30.555555555555557, "grad_norm": 0.3176400363445282, "learning_rate": 0.001, "loss": 1.8879, "step": 354200 }, { "epoch": 30.56038647342995, "grad_norm": 0.6192721724510193, "learning_rate": 0.001, "loss": 1.8907, "step": 354256 }, { "epoch": 30.565217391304348, "grad_norm": 14.206862449645996, "learning_rate": 0.001, "loss": 1.8897, "step": 354312 }, { "epoch": 30.570048309178745, "grad_norm": 6.536881923675537, "learning_rate": 0.001, "loss": 1.8784, "step": 354368 }, { "epoch": 30.57487922705314, "grad_norm": 1.5987311601638794, "learning_rate": 0.001, "loss": 1.8948, "step": 354424 }, { "epoch": 30.579710144927535, "grad_norm": 4.253265857696533, "learning_rate": 0.001, "loss": 1.8958, "step": 354480 }, { "epoch": 30.584541062801932, "grad_norm": 6.087608814239502, "learning_rate": 0.001, "loss": 1.8939, "step": 354536 }, { "epoch": 30.58937198067633, "grad_norm": 4.231012344360352, "learning_rate": 0.001, "loss": 1.8944, "step": 354592 }, { "epoch": 30.594202898550726, "grad_norm": 0.6379872560501099, "learning_rate": 0.001, "loss": 1.899, "step": 354648 }, { "epoch": 30.59903381642512, "grad_norm": 0.5166046619415283, "learning_rate": 0.001, "loss": 1.8932, "step": 354704 }, { "epoch": 30.603864734299517, "grad_norm": 1.4545706510543823, "learning_rate": 0.001, "loss": 1.8952, "step": 354760 }, { "epoch": 30.608695652173914, "grad_norm": 0.5502257943153381, "learning_rate": 0.001, "loss": 1.9034, "step": 354816 }, { "epoch": 30.613526570048307, "grad_norm": 1.8456518650054932, "learning_rate": 0.001, "loss": 1.9101, "step": 354872 }, { "epoch": 30.618357487922705, "grad_norm": 0.6801899075508118, "learning_rate": 0.001, "loss": 1.9055, "step": 354928 }, { "epoch": 30.6231884057971, "grad_norm": 3.9923291206359863, "learning_rate": 0.001, "loss": 1.9051, "step": 354984 }, { "epoch": 30.6280193236715, "grad_norm": 0.29150211811065674, "learning_rate": 0.001, "loss": 1.9011, "step": 355040 }, { "epoch": 30.632850241545892, "grad_norm": 1.7221652269363403, "learning_rate": 0.001, "loss": 1.8993, "step": 355096 }, { "epoch": 30.63768115942029, "grad_norm": 0.46745234727859497, "learning_rate": 0.001, "loss": 1.8943, "step": 355152 }, { "epoch": 30.642512077294686, "grad_norm": 0.4678397476673126, "learning_rate": 0.001, "loss": 1.8977, "step": 355208 }, { "epoch": 30.647342995169083, "grad_norm": 1.9967821836471558, "learning_rate": 0.001, "loss": 1.8905, "step": 355264 }, { "epoch": 30.652173913043477, "grad_norm": 3.109009265899658, "learning_rate": 0.001, "loss": 1.8916, "step": 355320 }, { "epoch": 30.657004830917874, "grad_norm": 0.9057146906852722, "learning_rate": 0.001, "loss": 1.8862, "step": 355376 }, { "epoch": 30.66183574879227, "grad_norm": 0.5076047778129578, "learning_rate": 0.001, "loss": 1.8879, "step": 355432 }, { "epoch": 30.666666666666668, "grad_norm": 0.2994730472564697, "learning_rate": 0.001, "loss": 1.8877, "step": 355488 }, { "epoch": 30.67149758454106, "grad_norm": 0.3178282380104065, "learning_rate": 0.001, "loss": 1.8893, "step": 355544 }, { "epoch": 30.67632850241546, "grad_norm": 1.4147961139678955, "learning_rate": 0.001, "loss": 1.8959, "step": 355600 }, { "epoch": 30.681159420289855, "grad_norm": 1.8748258352279663, "learning_rate": 0.001, "loss": 1.8867, "step": 355656 }, { "epoch": 30.685990338164252, "grad_norm": 0.29489418864250183, "learning_rate": 0.001, "loss": 1.8845, "step": 355712 }, { "epoch": 30.690821256038646, "grad_norm": 1.7525562047958374, "learning_rate": 0.001, "loss": 1.8845, "step": 355768 }, { "epoch": 30.695652173913043, "grad_norm": 0.5614147186279297, "learning_rate": 0.001, "loss": 1.882, "step": 355824 }, { "epoch": 30.70048309178744, "grad_norm": 2.368215322494507, "learning_rate": 0.001, "loss": 1.8813, "step": 355880 }, { "epoch": 30.705314009661837, "grad_norm": 0.9378824830055237, "learning_rate": 0.001, "loss": 1.8916, "step": 355936 }, { "epoch": 30.71014492753623, "grad_norm": 1.390376091003418, "learning_rate": 0.001, "loss": 1.8809, "step": 355992 }, { "epoch": 30.714975845410628, "grad_norm": 0.41881877183914185, "learning_rate": 0.001, "loss": 1.8822, "step": 356048 }, { "epoch": 30.719806763285025, "grad_norm": 0.39202064275741577, "learning_rate": 0.001, "loss": 1.8737, "step": 356104 }, { "epoch": 30.72463768115942, "grad_norm": 7.556109428405762, "learning_rate": 0.001, "loss": 1.8834, "step": 356160 }, { "epoch": 30.729468599033815, "grad_norm": 0.6123942136764526, "learning_rate": 0.001, "loss": 1.8846, "step": 356216 }, { "epoch": 30.734299516908212, "grad_norm": 0.8291679620742798, "learning_rate": 0.001, "loss": 1.901, "step": 356272 }, { "epoch": 30.73913043478261, "grad_norm": 1.5180034637451172, "learning_rate": 0.001, "loss": 1.8911, "step": 356328 }, { "epoch": 30.743961352657006, "grad_norm": 2.6228694915771484, "learning_rate": 0.001, "loss": 1.9048, "step": 356384 }, { "epoch": 30.7487922705314, "grad_norm": 2.0018997192382812, "learning_rate": 0.001, "loss": 1.8942, "step": 356440 }, { "epoch": 30.753623188405797, "grad_norm": 1.1679586172103882, "learning_rate": 0.001, "loss": 1.9056, "step": 356496 }, { "epoch": 30.758454106280194, "grad_norm": 0.3004785180091858, "learning_rate": 0.001, "loss": 1.9099, "step": 356552 }, { "epoch": 30.76328502415459, "grad_norm": 0.6034873127937317, "learning_rate": 0.001, "loss": 1.8933, "step": 356608 }, { "epoch": 30.768115942028984, "grad_norm": 3.404514789581299, "learning_rate": 0.001, "loss": 1.8922, "step": 356664 }, { "epoch": 30.77294685990338, "grad_norm": 0.37851274013519287, "learning_rate": 0.001, "loss": 1.8978, "step": 356720 }, { "epoch": 30.77777777777778, "grad_norm": 0.35049712657928467, "learning_rate": 0.001, "loss": 1.8948, "step": 356776 }, { "epoch": 30.782608695652176, "grad_norm": 1.1702537536621094, "learning_rate": 0.001, "loss": 1.8843, "step": 356832 }, { "epoch": 30.78743961352657, "grad_norm": 0.41968366503715515, "learning_rate": 0.001, "loss": 1.8823, "step": 356888 }, { "epoch": 30.792270531400966, "grad_norm": 18.203081130981445, "learning_rate": 0.001, "loss": 1.8966, "step": 356944 }, { "epoch": 30.797101449275363, "grad_norm": 1.1103543043136597, "learning_rate": 0.001, "loss": 1.892, "step": 357000 }, { "epoch": 30.80193236714976, "grad_norm": 0.7177728414535522, "learning_rate": 0.001, "loss": 1.8938, "step": 357056 }, { "epoch": 30.806763285024154, "grad_norm": 1.4654901027679443, "learning_rate": 0.001, "loss": 1.8705, "step": 357112 }, { "epoch": 30.81159420289855, "grad_norm": 0.3273821175098419, "learning_rate": 0.001, "loss": 1.8857, "step": 357168 }, { "epoch": 30.816425120772948, "grad_norm": 0.5282752513885498, "learning_rate": 0.001, "loss": 1.8901, "step": 357224 }, { "epoch": 30.82125603864734, "grad_norm": 0.35609912872314453, "learning_rate": 0.001, "loss": 1.8864, "step": 357280 }, { "epoch": 30.82608695652174, "grad_norm": 0.7931268215179443, "learning_rate": 0.001, "loss": 1.8857, "step": 357336 }, { "epoch": 30.830917874396135, "grad_norm": 1.160841464996338, "learning_rate": 0.001, "loss": 1.8968, "step": 357392 }, { "epoch": 30.835748792270532, "grad_norm": 0.2842795252799988, "learning_rate": 0.001, "loss": 1.9003, "step": 357448 }, { "epoch": 30.840579710144926, "grad_norm": 0.33713632822036743, "learning_rate": 0.001, "loss": 1.8896, "step": 357504 }, { "epoch": 30.845410628019323, "grad_norm": 0.9215450286865234, "learning_rate": 0.001, "loss": 1.8765, "step": 357560 }, { "epoch": 30.85024154589372, "grad_norm": 0.43814125657081604, "learning_rate": 0.001, "loss": 1.8852, "step": 357616 }, { "epoch": 30.855072463768117, "grad_norm": 7.967289924621582, "learning_rate": 0.001, "loss": 1.885, "step": 357672 }, { "epoch": 30.85990338164251, "grad_norm": 1.269940972328186, "learning_rate": 0.001, "loss": 1.883, "step": 357728 }, { "epoch": 30.864734299516908, "grad_norm": 0.45210787653923035, "learning_rate": 0.001, "loss": 1.8877, "step": 357784 }, { "epoch": 30.869565217391305, "grad_norm": 1.2319374084472656, "learning_rate": 0.001, "loss": 1.8985, "step": 357840 }, { "epoch": 30.8743961352657, "grad_norm": 0.8083316683769226, "learning_rate": 0.001, "loss": 1.8862, "step": 357896 }, { "epoch": 30.879227053140095, "grad_norm": 4.0241570472717285, "learning_rate": 0.001, "loss": 1.8936, "step": 357952 }, { "epoch": 30.884057971014492, "grad_norm": 0.44804444909095764, "learning_rate": 0.001, "loss": 1.8871, "step": 358008 }, { "epoch": 30.88888888888889, "grad_norm": 1.57484769821167, "learning_rate": 0.001, "loss": 1.8819, "step": 358064 }, { "epoch": 30.893719806763286, "grad_norm": 5.863276481628418, "learning_rate": 0.001, "loss": 1.876, "step": 358120 }, { "epoch": 30.89855072463768, "grad_norm": 0.8706706762313843, "learning_rate": 0.001, "loss": 1.8819, "step": 358176 }, { "epoch": 30.903381642512077, "grad_norm": 2.754408121109009, "learning_rate": 0.001, "loss": 1.8798, "step": 358232 }, { "epoch": 30.908212560386474, "grad_norm": 1.2650333642959595, "learning_rate": 0.001, "loss": 1.8746, "step": 358288 }, { "epoch": 30.91304347826087, "grad_norm": 0.4831315875053406, "learning_rate": 0.001, "loss": 1.8843, "step": 358344 }, { "epoch": 30.917874396135264, "grad_norm": 0.5977744460105896, "learning_rate": 0.001, "loss": 1.8837, "step": 358400 }, { "epoch": 30.92270531400966, "grad_norm": 0.367818683385849, "learning_rate": 0.001, "loss": 1.8859, "step": 358456 }, { "epoch": 30.92753623188406, "grad_norm": 1.309637188911438, "learning_rate": 0.001, "loss": 1.8913, "step": 358512 }, { "epoch": 30.932367149758456, "grad_norm": 0.42118874192237854, "learning_rate": 0.001, "loss": 1.8908, "step": 358568 }, { "epoch": 30.93719806763285, "grad_norm": 0.9080486297607422, "learning_rate": 0.001, "loss": 1.8866, "step": 358624 }, { "epoch": 30.942028985507246, "grad_norm": 1.2292617559432983, "learning_rate": 0.001, "loss": 1.8896, "step": 358680 }, { "epoch": 30.946859903381643, "grad_norm": 1.4693320989608765, "learning_rate": 0.001, "loss": 1.8889, "step": 358736 }, { "epoch": 30.95169082125604, "grad_norm": 0.9383873343467712, "learning_rate": 0.001, "loss": 1.8832, "step": 358792 }, { "epoch": 30.956521739130434, "grad_norm": 0.3918863832950592, "learning_rate": 0.001, "loss": 1.8807, "step": 358848 }, { "epoch": 30.96135265700483, "grad_norm": 0.7786741256713867, "learning_rate": 0.001, "loss": 1.8793, "step": 358904 }, { "epoch": 30.966183574879228, "grad_norm": 0.6408087015151978, "learning_rate": 0.001, "loss": 1.879, "step": 358960 }, { "epoch": 30.971014492753625, "grad_norm": 1.3860994577407837, "learning_rate": 0.001, "loss": 1.8866, "step": 359016 }, { "epoch": 30.97584541062802, "grad_norm": 0.4226723909378052, "learning_rate": 0.001, "loss": 1.8848, "step": 359072 }, { "epoch": 30.980676328502415, "grad_norm": 0.40830492973327637, "learning_rate": 0.001, "loss": 1.8828, "step": 359128 }, { "epoch": 30.985507246376812, "grad_norm": 2.3204100131988525, "learning_rate": 0.001, "loss": 1.879, "step": 359184 }, { "epoch": 30.990338164251206, "grad_norm": 4.263381481170654, "learning_rate": 0.001, "loss": 1.8649, "step": 359240 }, { "epoch": 30.995169082125603, "grad_norm": 0.6153120994567871, "learning_rate": 0.001, "loss": 1.8724, "step": 359296 }, { "epoch": 31.0, "grad_norm": 0.5124632120132446, "learning_rate": 0.001, "loss": 1.8631, "step": 359352 }, { "epoch": 31.004830917874397, "grad_norm": 0.7665235996246338, "learning_rate": 0.001, "loss": 1.8441, "step": 359408 }, { "epoch": 31.00966183574879, "grad_norm": 3.1548423767089844, "learning_rate": 0.001, "loss": 1.8344, "step": 359464 }, { "epoch": 31.014492753623188, "grad_norm": 1.039751648902893, "learning_rate": 0.001, "loss": 1.8415, "step": 359520 }, { "epoch": 31.019323671497585, "grad_norm": 0.7947804927825928, "learning_rate": 0.001, "loss": 1.8436, "step": 359576 }, { "epoch": 31.02415458937198, "grad_norm": 0.4591917097568512, "learning_rate": 0.001, "loss": 1.8449, "step": 359632 }, { "epoch": 31.028985507246375, "grad_norm": 1.48048996925354, "learning_rate": 0.001, "loss": 1.8428, "step": 359688 }, { "epoch": 31.033816425120772, "grad_norm": 0.28117677569389343, "learning_rate": 0.001, "loss": 1.8507, "step": 359744 }, { "epoch": 31.03864734299517, "grad_norm": 3.1333396434783936, "learning_rate": 0.001, "loss": 1.8513, "step": 359800 }, { "epoch": 31.043478260869566, "grad_norm": 2.074233055114746, "learning_rate": 0.001, "loss": 1.8483, "step": 359856 }, { "epoch": 31.04830917874396, "grad_norm": 1.3762333393096924, "learning_rate": 0.001, "loss": 1.8438, "step": 359912 }, { "epoch": 31.053140096618357, "grad_norm": 0.8963009119033813, "learning_rate": 0.001, "loss": 1.8596, "step": 359968 }, { "epoch": 31.057971014492754, "grad_norm": 0.3135211765766144, "learning_rate": 0.001, "loss": 1.8676, "step": 360024 }, { "epoch": 31.06280193236715, "grad_norm": 2.031804323196411, "learning_rate": 0.001, "loss": 1.8837, "step": 360080 }, { "epoch": 31.067632850241544, "grad_norm": 0.8728108406066895, "learning_rate": 0.001, "loss": 1.8829, "step": 360136 }, { "epoch": 31.07246376811594, "grad_norm": 1.7144250869750977, "learning_rate": 0.001, "loss": 1.8794, "step": 360192 }, { "epoch": 31.07729468599034, "grad_norm": 0.5351441502571106, "learning_rate": 0.001, "loss": 1.8579, "step": 360248 }, { "epoch": 31.082125603864736, "grad_norm": 0.49805018305778503, "learning_rate": 0.001, "loss": 1.8494, "step": 360304 }, { "epoch": 31.08695652173913, "grad_norm": 0.5480920076370239, "learning_rate": 0.001, "loss": 1.8486, "step": 360360 }, { "epoch": 31.091787439613526, "grad_norm": 1.225471019744873, "learning_rate": 0.001, "loss": 1.8475, "step": 360416 }, { "epoch": 31.096618357487923, "grad_norm": 4.134900093078613, "learning_rate": 0.001, "loss": 1.8508, "step": 360472 }, { "epoch": 31.10144927536232, "grad_norm": 0.35351380705833435, "learning_rate": 0.001, "loss": 1.8529, "step": 360528 }, { "epoch": 31.106280193236714, "grad_norm": 3.017313003540039, "learning_rate": 0.001, "loss": 1.8399, "step": 360584 }, { "epoch": 31.11111111111111, "grad_norm": 0.5390178561210632, "learning_rate": 0.001, "loss": 1.8425, "step": 360640 }, { "epoch": 31.115942028985508, "grad_norm": 0.28583845496177673, "learning_rate": 0.001, "loss": 1.8476, "step": 360696 }, { "epoch": 31.120772946859905, "grad_norm": 0.38308778405189514, "learning_rate": 0.001, "loss": 1.8486, "step": 360752 }, { "epoch": 31.1256038647343, "grad_norm": 2.130467653274536, "learning_rate": 0.001, "loss": 1.8513, "step": 360808 }, { "epoch": 31.130434782608695, "grad_norm": 5.935608863830566, "learning_rate": 0.001, "loss": 1.8837, "step": 360864 }, { "epoch": 31.135265700483092, "grad_norm": 1.7721362113952637, "learning_rate": 0.001, "loss": 1.9122, "step": 360920 }, { "epoch": 31.14009661835749, "grad_norm": 2.543849468231201, "learning_rate": 0.001, "loss": 1.9048, "step": 360976 }, { "epoch": 31.144927536231883, "grad_norm": 2.250897169113159, "learning_rate": 0.001, "loss": 1.8928, "step": 361032 }, { "epoch": 31.14975845410628, "grad_norm": 1.2335624694824219, "learning_rate": 0.001, "loss": 1.8919, "step": 361088 }, { "epoch": 31.154589371980677, "grad_norm": 0.7922707200050354, "learning_rate": 0.001, "loss": 1.9071, "step": 361144 }, { "epoch": 31.159420289855074, "grad_norm": 6.928737640380859, "learning_rate": 0.001, "loss": 1.9078, "step": 361200 }, { "epoch": 31.164251207729468, "grad_norm": 1.592607021331787, "learning_rate": 0.001, "loss": 1.9179, "step": 361256 }, { "epoch": 31.169082125603865, "grad_norm": 1.4074281454086304, "learning_rate": 0.001, "loss": 1.9191, "step": 361312 }, { "epoch": 31.17391304347826, "grad_norm": 1.2853829860687256, "learning_rate": 0.001, "loss": 1.909, "step": 361368 }, { "epoch": 31.17874396135266, "grad_norm": 5.220868110656738, "learning_rate": 0.001, "loss": 1.8951, "step": 361424 }, { "epoch": 31.183574879227052, "grad_norm": 5.939455986022949, "learning_rate": 0.001, "loss": 1.8807, "step": 361480 }, { "epoch": 31.18840579710145, "grad_norm": 0.724431037902832, "learning_rate": 0.001, "loss": 1.8914, "step": 361536 }, { "epoch": 31.193236714975846, "grad_norm": 2.104313373565674, "learning_rate": 0.001, "loss": 1.8864, "step": 361592 }, { "epoch": 31.19806763285024, "grad_norm": 0.5674176216125488, "learning_rate": 0.001, "loss": 1.8791, "step": 361648 }, { "epoch": 31.202898550724637, "grad_norm": 1.9735665321350098, "learning_rate": 0.001, "loss": 1.8819, "step": 361704 }, { "epoch": 31.207729468599034, "grad_norm": 2.4828200340270996, "learning_rate": 0.001, "loss": 1.8917, "step": 361760 }, { "epoch": 31.21256038647343, "grad_norm": 2.5781335830688477, "learning_rate": 0.001, "loss": 1.8921, "step": 361816 }, { "epoch": 31.217391304347824, "grad_norm": 1.0383257865905762, "learning_rate": 0.001, "loss": 1.89, "step": 361872 }, { "epoch": 31.22222222222222, "grad_norm": 8.668288230895996, "learning_rate": 0.001, "loss": 1.8907, "step": 361928 }, { "epoch": 31.22705314009662, "grad_norm": 1.725039005279541, "learning_rate": 0.001, "loss": 1.8887, "step": 361984 }, { "epoch": 31.231884057971016, "grad_norm": 1.3334895372390747, "learning_rate": 0.001, "loss": 1.889, "step": 362040 }, { "epoch": 31.23671497584541, "grad_norm": 1.0550283193588257, "learning_rate": 0.001, "loss": 1.9006, "step": 362096 }, { "epoch": 31.241545893719806, "grad_norm": 0.9315765500068665, "learning_rate": 0.001, "loss": 1.9057, "step": 362152 }, { "epoch": 31.246376811594203, "grad_norm": 0.5694554448127747, "learning_rate": 0.001, "loss": 1.9001, "step": 362208 }, { "epoch": 31.2512077294686, "grad_norm": 4.522511959075928, "learning_rate": 0.001, "loss": 1.8935, "step": 362264 }, { "epoch": 31.256038647342994, "grad_norm": 0.9446779489517212, "learning_rate": 0.001, "loss": 1.8845, "step": 362320 }, { "epoch": 31.26086956521739, "grad_norm": 0.8478748798370361, "learning_rate": 0.001, "loss": 1.887, "step": 362376 }, { "epoch": 31.265700483091788, "grad_norm": 0.7540222406387329, "learning_rate": 0.001, "loss": 1.8911, "step": 362432 }, { "epoch": 31.270531400966185, "grad_norm": 1.1729736328125, "learning_rate": 0.001, "loss": 1.8813, "step": 362488 }, { "epoch": 31.27536231884058, "grad_norm": 0.775435745716095, "learning_rate": 0.001, "loss": 1.889, "step": 362544 }, { "epoch": 31.280193236714975, "grad_norm": 4.484166145324707, "learning_rate": 0.001, "loss": 1.8796, "step": 362600 }, { "epoch": 31.285024154589372, "grad_norm": 0.43832361698150635, "learning_rate": 0.001, "loss": 1.8937, "step": 362656 }, { "epoch": 31.28985507246377, "grad_norm": 2.779090404510498, "learning_rate": 0.001, "loss": 1.8878, "step": 362712 }, { "epoch": 31.294685990338163, "grad_norm": 1.391725778579712, "learning_rate": 0.001, "loss": 1.8871, "step": 362768 }, { "epoch": 31.29951690821256, "grad_norm": 4.036783695220947, "learning_rate": 0.001, "loss": 1.8884, "step": 362824 }, { "epoch": 31.304347826086957, "grad_norm": 3.537506580352783, "learning_rate": 0.001, "loss": 1.8938, "step": 362880 }, { "epoch": 31.309178743961354, "grad_norm": 1.4143881797790527, "learning_rate": 0.001, "loss": 1.884, "step": 362936 }, { "epoch": 31.314009661835748, "grad_norm": 16.878278732299805, "learning_rate": 0.001, "loss": 1.8838, "step": 362992 }, { "epoch": 31.318840579710145, "grad_norm": 3.729255199432373, "learning_rate": 0.001, "loss": 1.8909, "step": 363048 }, { "epoch": 31.32367149758454, "grad_norm": 3.9143974781036377, "learning_rate": 0.001, "loss": 1.8997, "step": 363104 }, { "epoch": 31.32850241545894, "grad_norm": 2.3262157440185547, "learning_rate": 0.001, "loss": 1.9033, "step": 363160 }, { "epoch": 31.333333333333332, "grad_norm": 4.7032952308654785, "learning_rate": 0.001, "loss": 1.915, "step": 363216 }, { "epoch": 31.33816425120773, "grad_norm": 2.9866273403167725, "learning_rate": 0.001, "loss": 1.9175, "step": 363272 }, { "epoch": 31.342995169082126, "grad_norm": 2.556567668914795, "learning_rate": 0.001, "loss": 1.9301, "step": 363328 }, { "epoch": 31.347826086956523, "grad_norm": 0.6238303780555725, "learning_rate": 0.001, "loss": 1.9048, "step": 363384 }, { "epoch": 31.352657004830917, "grad_norm": 3.960972309112549, "learning_rate": 0.001, "loss": 1.8953, "step": 363440 }, { "epoch": 31.357487922705314, "grad_norm": 0.8117548227310181, "learning_rate": 0.001, "loss": 1.8884, "step": 363496 }, { "epoch": 31.36231884057971, "grad_norm": 95.2965087890625, "learning_rate": 0.001, "loss": 1.9068, "step": 363552 }, { "epoch": 31.367149758454108, "grad_norm": 2.636323928833008, "learning_rate": 0.001, "loss": 1.9051, "step": 363608 }, { "epoch": 31.3719806763285, "grad_norm": 1.159105658531189, "learning_rate": 0.001, "loss": 1.9027, "step": 363664 }, { "epoch": 31.3768115942029, "grad_norm": 0.9292151927947998, "learning_rate": 0.001, "loss": 1.9031, "step": 363720 }, { "epoch": 31.381642512077295, "grad_norm": 2.7445006370544434, "learning_rate": 0.001, "loss": 1.912, "step": 363776 }, { "epoch": 31.386473429951693, "grad_norm": 1.198744535446167, "learning_rate": 0.001, "loss": 1.9196, "step": 363832 }, { "epoch": 31.391304347826086, "grad_norm": 18.0750789642334, "learning_rate": 0.001, "loss": 1.9173, "step": 363888 }, { "epoch": 31.396135265700483, "grad_norm": 0.8215798735618591, "learning_rate": 0.001, "loss": 1.9008, "step": 363944 }, { "epoch": 31.40096618357488, "grad_norm": 2.4406497478485107, "learning_rate": 0.001, "loss": 1.8952, "step": 364000 }, { "epoch": 31.405797101449274, "grad_norm": 0.9356613755226135, "learning_rate": 0.001, "loss": 1.8889, "step": 364056 }, { "epoch": 31.41062801932367, "grad_norm": 0.7125791907310486, "learning_rate": 0.001, "loss": 1.894, "step": 364112 }, { "epoch": 31.415458937198068, "grad_norm": 1.1252639293670654, "learning_rate": 0.001, "loss": 1.8945, "step": 364168 }, { "epoch": 31.420289855072465, "grad_norm": 2.4443447589874268, "learning_rate": 0.001, "loss": 1.8898, "step": 364224 }, { "epoch": 31.42512077294686, "grad_norm": 0.7925608158111572, "learning_rate": 0.001, "loss": 1.8819, "step": 364280 }, { "epoch": 31.429951690821255, "grad_norm": 1.6269303560256958, "learning_rate": 0.001, "loss": 1.8901, "step": 364336 }, { "epoch": 31.434782608695652, "grad_norm": 1.0643422603607178, "learning_rate": 0.001, "loss": 1.8965, "step": 364392 }, { "epoch": 31.43961352657005, "grad_norm": 6.14937686920166, "learning_rate": 0.001, "loss": 1.9056, "step": 364448 }, { "epoch": 31.444444444444443, "grad_norm": 0.7519615292549133, "learning_rate": 0.001, "loss": 1.9022, "step": 364504 }, { "epoch": 31.44927536231884, "grad_norm": 2.552140474319458, "learning_rate": 0.001, "loss": 1.8981, "step": 364560 }, { "epoch": 31.454106280193237, "grad_norm": 0.6152862310409546, "learning_rate": 0.001, "loss": 1.8895, "step": 364616 }, { "epoch": 31.458937198067634, "grad_norm": 2.9519598484039307, "learning_rate": 0.001, "loss": 1.8902, "step": 364672 }, { "epoch": 31.463768115942027, "grad_norm": 5.124978542327881, "learning_rate": 0.001, "loss": 1.8894, "step": 364728 }, { "epoch": 31.468599033816425, "grad_norm": 0.8347622752189636, "learning_rate": 0.001, "loss": 1.8838, "step": 364784 }, { "epoch": 31.47342995169082, "grad_norm": 1.9144797325134277, "learning_rate": 0.001, "loss": 1.8933, "step": 364840 }, { "epoch": 31.47826086956522, "grad_norm": 1.066724419593811, "learning_rate": 0.001, "loss": 1.8863, "step": 364896 }, { "epoch": 31.483091787439612, "grad_norm": 0.5903692245483398, "learning_rate": 0.001, "loss": 1.8875, "step": 364952 }, { "epoch": 31.48792270531401, "grad_norm": 1.2487493753433228, "learning_rate": 0.001, "loss": 1.883, "step": 365008 }, { "epoch": 31.492753623188406, "grad_norm": 124.77029418945312, "learning_rate": 0.001, "loss": 1.8796, "step": 365064 }, { "epoch": 31.497584541062803, "grad_norm": 0.7982252836227417, "learning_rate": 0.001, "loss": 1.8914, "step": 365120 }, { "epoch": 31.502415458937197, "grad_norm": 1.2779284715652466, "learning_rate": 0.001, "loss": 1.8959, "step": 365176 }, { "epoch": 31.507246376811594, "grad_norm": 1.3816096782684326, "learning_rate": 0.001, "loss": 1.8978, "step": 365232 }, { "epoch": 31.51207729468599, "grad_norm": 1.5434436798095703, "learning_rate": 0.001, "loss": 1.8929, "step": 365288 }, { "epoch": 31.516908212560388, "grad_norm": 0.633134126663208, "learning_rate": 0.001, "loss": 1.8979, "step": 365344 }, { "epoch": 31.52173913043478, "grad_norm": 0.5954933166503906, "learning_rate": 0.001, "loss": 1.9004, "step": 365400 }, { "epoch": 31.52657004830918, "grad_norm": 1.0120669603347778, "learning_rate": 0.001, "loss": 1.8991, "step": 365456 }, { "epoch": 31.531400966183575, "grad_norm": 1.978076457977295, "learning_rate": 0.001, "loss": 1.898, "step": 365512 }, { "epoch": 31.536231884057973, "grad_norm": 1.6839720010757446, "learning_rate": 0.001, "loss": 1.8872, "step": 365568 }, { "epoch": 31.541062801932366, "grad_norm": 0.29726824164390564, "learning_rate": 0.001, "loss": 1.8839, "step": 365624 }, { "epoch": 31.545893719806763, "grad_norm": 15.902640342712402, "learning_rate": 0.001, "loss": 1.8721, "step": 365680 }, { "epoch": 31.55072463768116, "grad_norm": 1.433428168296814, "learning_rate": 0.001, "loss": 1.8802, "step": 365736 }, { "epoch": 31.555555555555557, "grad_norm": 1.1068814992904663, "learning_rate": 0.001, "loss": 1.8736, "step": 365792 }, { "epoch": 31.56038647342995, "grad_norm": 5.274502277374268, "learning_rate": 0.001, "loss": 1.8742, "step": 365848 }, { "epoch": 31.565217391304348, "grad_norm": 0.9520684480667114, "learning_rate": 0.001, "loss": 1.8735, "step": 365904 }, { "epoch": 31.570048309178745, "grad_norm": 0.44822487235069275, "learning_rate": 0.001, "loss": 1.8785, "step": 365960 }, { "epoch": 31.57487922705314, "grad_norm": 2.8123536109924316, "learning_rate": 0.001, "loss": 1.8792, "step": 366016 }, { "epoch": 31.579710144927535, "grad_norm": 0.35997501015663147, "learning_rate": 0.001, "loss": 1.8855, "step": 366072 }, { "epoch": 31.584541062801932, "grad_norm": 2.1557633876800537, "learning_rate": 0.001, "loss": 1.8853, "step": 366128 }, { "epoch": 31.58937198067633, "grad_norm": 2.0968704223632812, "learning_rate": 0.001, "loss": 1.8952, "step": 366184 }, { "epoch": 31.594202898550726, "grad_norm": 0.5686438679695129, "learning_rate": 0.001, "loss": 1.9056, "step": 366240 }, { "epoch": 31.59903381642512, "grad_norm": 0.28103092312812805, "learning_rate": 0.001, "loss": 1.89, "step": 366296 }, { "epoch": 31.603864734299517, "grad_norm": 0.43879464268684387, "learning_rate": 0.001, "loss": 1.8859, "step": 366352 }, { "epoch": 31.608695652173914, "grad_norm": 0.5932091474533081, "learning_rate": 0.001, "loss": 1.8905, "step": 366408 }, { "epoch": 31.613526570048307, "grad_norm": 0.5870943665504456, "learning_rate": 0.001, "loss": 1.8837, "step": 366464 }, { "epoch": 31.618357487922705, "grad_norm": 0.7316195368766785, "learning_rate": 0.001, "loss": 1.8764, "step": 366520 }, { "epoch": 31.6231884057971, "grad_norm": 0.6806275248527527, "learning_rate": 0.001, "loss": 1.8816, "step": 366576 }, { "epoch": 31.6280193236715, "grad_norm": 0.5371478199958801, "learning_rate": 0.001, "loss": 1.892, "step": 366632 }, { "epoch": 31.632850241545892, "grad_norm": 0.516328752040863, "learning_rate": 0.001, "loss": 1.8922, "step": 366688 }, { "epoch": 31.63768115942029, "grad_norm": 0.26439759135246277, "learning_rate": 0.001, "loss": 1.8953, "step": 366744 }, { "epoch": 31.642512077294686, "grad_norm": 0.45338067412376404, "learning_rate": 0.001, "loss": 1.876, "step": 366800 }, { "epoch": 31.647342995169083, "grad_norm": 0.3305548131465912, "learning_rate": 0.001, "loss": 1.8697, "step": 366856 }, { "epoch": 31.652173913043477, "grad_norm": 1.032436490058899, "learning_rate": 0.001, "loss": 1.8694, "step": 366912 }, { "epoch": 31.657004830917874, "grad_norm": 1.9351085424423218, "learning_rate": 0.001, "loss": 1.8791, "step": 366968 }, { "epoch": 31.66183574879227, "grad_norm": 1.9809174537658691, "learning_rate": 0.001, "loss": 1.8844, "step": 367024 }, { "epoch": 31.666666666666668, "grad_norm": 2.08723783493042, "learning_rate": 0.001, "loss": 1.8825, "step": 367080 }, { "epoch": 31.67149758454106, "grad_norm": 1.454206943511963, "learning_rate": 0.001, "loss": 1.8772, "step": 367136 }, { "epoch": 31.67632850241546, "grad_norm": 0.5978344678878784, "learning_rate": 0.001, "loss": 1.8846, "step": 367192 }, { "epoch": 31.681159420289855, "grad_norm": 13.173473358154297, "learning_rate": 0.001, "loss": 1.8922, "step": 367248 }, { "epoch": 31.685990338164252, "grad_norm": 1.1626501083374023, "learning_rate": 0.001, "loss": 1.8779, "step": 367304 }, { "epoch": 31.690821256038646, "grad_norm": 0.4801710247993469, "learning_rate": 0.001, "loss": 1.8819, "step": 367360 }, { "epoch": 31.695652173913043, "grad_norm": 0.8886069059371948, "learning_rate": 0.001, "loss": 1.8739, "step": 367416 }, { "epoch": 31.70048309178744, "grad_norm": 1.299600601196289, "learning_rate": 0.001, "loss": 1.877, "step": 367472 }, { "epoch": 31.705314009661837, "grad_norm": 2.892421007156372, "learning_rate": 0.001, "loss": 1.8789, "step": 367528 }, { "epoch": 31.71014492753623, "grad_norm": 2.0238373279571533, "learning_rate": 0.001, "loss": 1.8703, "step": 367584 }, { "epoch": 31.714975845410628, "grad_norm": 1.7803832292556763, "learning_rate": 0.001, "loss": 1.8778, "step": 367640 }, { "epoch": 31.719806763285025, "grad_norm": 0.3582864999771118, "learning_rate": 0.001, "loss": 1.8903, "step": 367696 }, { "epoch": 31.72463768115942, "grad_norm": 1.3473167419433594, "learning_rate": 0.001, "loss": 1.8757, "step": 367752 }, { "epoch": 31.729468599033815, "grad_norm": 17.073017120361328, "learning_rate": 0.001, "loss": 1.8701, "step": 367808 }, { "epoch": 31.734299516908212, "grad_norm": 0.895175576210022, "learning_rate": 0.001, "loss": 1.8756, "step": 367864 }, { "epoch": 31.73913043478261, "grad_norm": 0.3503738343715668, "learning_rate": 0.001, "loss": 1.8886, "step": 367920 }, { "epoch": 31.743961352657006, "grad_norm": 0.6311994791030884, "learning_rate": 0.001, "loss": 1.8734, "step": 367976 }, { "epoch": 31.7487922705314, "grad_norm": 2.110567092895508, "learning_rate": 0.001, "loss": 1.8588, "step": 368032 }, { "epoch": 31.753623188405797, "grad_norm": 1.9593119621276855, "learning_rate": 0.001, "loss": 1.8748, "step": 368088 }, { "epoch": 31.758454106280194, "grad_norm": 0.4072810113430023, "learning_rate": 0.001, "loss": 1.8576, "step": 368144 }, { "epoch": 31.76328502415459, "grad_norm": 3.2993719577789307, "learning_rate": 0.001, "loss": 1.8613, "step": 368200 }, { "epoch": 31.768115942028984, "grad_norm": 0.33148321509361267, "learning_rate": 0.001, "loss": 1.8755, "step": 368256 }, { "epoch": 31.77294685990338, "grad_norm": 0.49064525961875916, "learning_rate": 0.001, "loss": 1.8731, "step": 368312 }, { "epoch": 31.77777777777778, "grad_norm": 0.918169379234314, "learning_rate": 0.001, "loss": 1.872, "step": 368368 }, { "epoch": 31.782608695652176, "grad_norm": 0.3168325424194336, "learning_rate": 0.001, "loss": 1.8646, "step": 368424 }, { "epoch": 31.78743961352657, "grad_norm": 0.7635107040405273, "learning_rate": 0.001, "loss": 1.8652, "step": 368480 }, { "epoch": 31.792270531400966, "grad_norm": 0.4566832184791565, "learning_rate": 0.001, "loss": 1.874, "step": 368536 }, { "epoch": 31.797101449275363, "grad_norm": 5.333982944488525, "learning_rate": 0.001, "loss": 1.8723, "step": 368592 }, { "epoch": 31.80193236714976, "grad_norm": 0.3626226782798767, "learning_rate": 0.001, "loss": 1.8692, "step": 368648 }, { "epoch": 31.806763285024154, "grad_norm": 1.1475409269332886, "learning_rate": 0.001, "loss": 1.871, "step": 368704 }, { "epoch": 31.81159420289855, "grad_norm": 0.30854079127311707, "learning_rate": 0.001, "loss": 1.8673, "step": 368760 }, { "epoch": 31.816425120772948, "grad_norm": 1.4551008939743042, "learning_rate": 0.001, "loss": 1.8643, "step": 368816 }, { "epoch": 31.82125603864734, "grad_norm": 0.8948751091957092, "learning_rate": 0.001, "loss": 1.8619, "step": 368872 }, { "epoch": 31.82608695652174, "grad_norm": 8.599570274353027, "learning_rate": 0.001, "loss": 1.8672, "step": 368928 }, { "epoch": 31.830917874396135, "grad_norm": 0.3842622637748718, "learning_rate": 0.001, "loss": 1.8688, "step": 368984 }, { "epoch": 31.835748792270532, "grad_norm": 1.985412359237671, "learning_rate": 0.001, "loss": 1.8689, "step": 369040 }, { "epoch": 31.840579710144926, "grad_norm": 1.8302814960479736, "learning_rate": 0.001, "loss": 1.874, "step": 369096 }, { "epoch": 31.845410628019323, "grad_norm": 11.460724830627441, "learning_rate": 0.001, "loss": 1.8678, "step": 369152 }, { "epoch": 31.85024154589372, "grad_norm": 0.32157501578330994, "learning_rate": 0.001, "loss": 1.875, "step": 369208 }, { "epoch": 31.855072463768117, "grad_norm": 1.1456849575042725, "learning_rate": 0.001, "loss": 1.8585, "step": 369264 }, { "epoch": 31.85990338164251, "grad_norm": 0.9333752393722534, "learning_rate": 0.001, "loss": 1.8709, "step": 369320 }, { "epoch": 31.864734299516908, "grad_norm": 1.6301710605621338, "learning_rate": 0.001, "loss": 1.8767, "step": 369376 }, { "epoch": 31.869565217391305, "grad_norm": 1.123384952545166, "learning_rate": 0.001, "loss": 1.8797, "step": 369432 }, { "epoch": 31.8743961352657, "grad_norm": 2.176255941390991, "learning_rate": 0.001, "loss": 1.893, "step": 369488 }, { "epoch": 31.879227053140095, "grad_norm": 1.0297493934631348, "learning_rate": 0.001, "loss": 1.8958, "step": 369544 }, { "epoch": 31.884057971014492, "grad_norm": 2.150526523590088, "learning_rate": 0.001, "loss": 1.8918, "step": 369600 }, { "epoch": 31.88888888888889, "grad_norm": 0.3463974595069885, "learning_rate": 0.001, "loss": 1.8936, "step": 369656 }, { "epoch": 31.893719806763286, "grad_norm": 0.7792313098907471, "learning_rate": 0.001, "loss": 1.8883, "step": 369712 }, { "epoch": 31.89855072463768, "grad_norm": 1.9169609546661377, "learning_rate": 0.001, "loss": 1.8754, "step": 369768 }, { "epoch": 31.903381642512077, "grad_norm": 2.7753422260284424, "learning_rate": 0.001, "loss": 1.8745, "step": 369824 }, { "epoch": 31.908212560386474, "grad_norm": 0.42601147294044495, "learning_rate": 0.001, "loss": 1.8772, "step": 369880 }, { "epoch": 31.91304347826087, "grad_norm": 1.050900936126709, "learning_rate": 0.001, "loss": 1.884, "step": 369936 }, { "epoch": 31.917874396135264, "grad_norm": 0.4134643077850342, "learning_rate": 0.001, "loss": 1.8962, "step": 369992 }, { "epoch": 31.92270531400966, "grad_norm": 0.978430986404419, "learning_rate": 0.001, "loss": 1.8902, "step": 370048 }, { "epoch": 31.92753623188406, "grad_norm": 0.8464446663856506, "learning_rate": 0.001, "loss": 1.8889, "step": 370104 }, { "epoch": 31.932367149758456, "grad_norm": 0.3765876591205597, "learning_rate": 0.001, "loss": 1.8886, "step": 370160 }, { "epoch": 31.93719806763285, "grad_norm": 0.5416330099105835, "learning_rate": 0.001, "loss": 1.891, "step": 370216 }, { "epoch": 31.942028985507246, "grad_norm": 1.303542971611023, "learning_rate": 0.001, "loss": 1.8926, "step": 370272 }, { "epoch": 31.946859903381643, "grad_norm": 2.5459649562835693, "learning_rate": 0.001, "loss": 1.8946, "step": 370328 }, { "epoch": 31.95169082125604, "grad_norm": 1.2558022737503052, "learning_rate": 0.001, "loss": 1.8906, "step": 370384 }, { "epoch": 31.956521739130434, "grad_norm": 2.809849977493286, "learning_rate": 0.001, "loss": 1.881, "step": 370440 }, { "epoch": 31.96135265700483, "grad_norm": 0.362740159034729, "learning_rate": 0.001, "loss": 1.8794, "step": 370496 }, { "epoch": 31.966183574879228, "grad_norm": 2.121236562728882, "learning_rate": 0.001, "loss": 1.8784, "step": 370552 }, { "epoch": 31.971014492753625, "grad_norm": 4.78397798538208, "learning_rate": 0.001, "loss": 1.8784, "step": 370608 }, { "epoch": 31.97584541062802, "grad_norm": 1.5099594593048096, "learning_rate": 0.001, "loss": 1.8938, "step": 370664 }, { "epoch": 31.980676328502415, "grad_norm": 0.8130882978439331, "learning_rate": 0.001, "loss": 1.8882, "step": 370720 }, { "epoch": 31.985507246376812, "grad_norm": 1.129683017730713, "learning_rate": 0.001, "loss": 1.872, "step": 370776 }, { "epoch": 31.990338164251206, "grad_norm": 0.6803610920906067, "learning_rate": 0.001, "loss": 1.8753, "step": 370832 }, { "epoch": 31.995169082125603, "grad_norm": 1.2479597330093384, "learning_rate": 0.001, "loss": 1.8723, "step": 370888 }, { "epoch": 32.0, "grad_norm": 0.8689315915107727, "learning_rate": 0.001, "loss": 1.8699, "step": 370944 }, { "epoch": 32.00483091787439, "grad_norm": 1.2008572816848755, "learning_rate": 0.001, "loss": 1.8401, "step": 371000 }, { "epoch": 32.009661835748794, "grad_norm": 0.5709912180900574, "learning_rate": 0.001, "loss": 1.8507, "step": 371056 }, { "epoch": 32.01449275362319, "grad_norm": 3.2960045337677, "learning_rate": 0.001, "loss": 1.8383, "step": 371112 }, { "epoch": 32.01932367149758, "grad_norm": 0.9115668535232544, "learning_rate": 0.001, "loss": 1.8525, "step": 371168 }, { "epoch": 32.02415458937198, "grad_norm": 1.3289488554000854, "learning_rate": 0.001, "loss": 1.8554, "step": 371224 }, { "epoch": 32.028985507246375, "grad_norm": 1.4813932180404663, "learning_rate": 0.001, "loss": 1.8484, "step": 371280 }, { "epoch": 32.033816425120776, "grad_norm": 0.4108002781867981, "learning_rate": 0.001, "loss": 1.8353, "step": 371336 }, { "epoch": 32.03864734299517, "grad_norm": 0.29496416449546814, "learning_rate": 0.001, "loss": 1.8398, "step": 371392 }, { "epoch": 32.04347826086956, "grad_norm": 0.4634047746658325, "learning_rate": 0.001, "loss": 1.8412, "step": 371448 }, { "epoch": 32.04830917874396, "grad_norm": 1.7094523906707764, "learning_rate": 0.001, "loss": 1.8481, "step": 371504 }, { "epoch": 32.05314009661836, "grad_norm": 0.6704553365707397, "learning_rate": 0.001, "loss": 1.8481, "step": 371560 }, { "epoch": 32.05797101449275, "grad_norm": 0.42356380820274353, "learning_rate": 0.001, "loss": 1.8417, "step": 371616 }, { "epoch": 32.06280193236715, "grad_norm": 0.3620864450931549, "learning_rate": 0.001, "loss": 1.8393, "step": 371672 }, { "epoch": 32.067632850241544, "grad_norm": 0.45119205117225647, "learning_rate": 0.001, "loss": 1.8359, "step": 371728 }, { "epoch": 32.072463768115945, "grad_norm": 1.766597867012024, "learning_rate": 0.001, "loss": 1.8372, "step": 371784 }, { "epoch": 32.07729468599034, "grad_norm": 0.5697992444038391, "learning_rate": 0.001, "loss": 1.8319, "step": 371840 }, { "epoch": 32.08212560386473, "grad_norm": 0.28652557730674744, "learning_rate": 0.001, "loss": 1.8408, "step": 371896 }, { "epoch": 32.08695652173913, "grad_norm": 0.686758279800415, "learning_rate": 0.001, "loss": 1.8331, "step": 371952 }, { "epoch": 32.091787439613526, "grad_norm": 2.194305896759033, "learning_rate": 0.001, "loss": 1.8422, "step": 372008 }, { "epoch": 32.09661835748792, "grad_norm": 2.3242835998535156, "learning_rate": 0.001, "loss": 1.84, "step": 372064 }, { "epoch": 32.10144927536232, "grad_norm": 2.731315851211548, "learning_rate": 0.001, "loss": 1.8392, "step": 372120 }, { "epoch": 32.106280193236714, "grad_norm": 0.6502743363380432, "learning_rate": 0.001, "loss": 1.8358, "step": 372176 }, { "epoch": 32.111111111111114, "grad_norm": 0.3713311553001404, "learning_rate": 0.001, "loss": 1.8291, "step": 372232 }, { "epoch": 32.11594202898551, "grad_norm": 0.6289574503898621, "learning_rate": 0.001, "loss": 1.8318, "step": 372288 }, { "epoch": 32.1207729468599, "grad_norm": 4.511777400970459, "learning_rate": 0.001, "loss": 1.8437, "step": 372344 }, { "epoch": 32.1256038647343, "grad_norm": 0.5842333436012268, "learning_rate": 0.001, "loss": 1.8578, "step": 372400 }, { "epoch": 32.130434782608695, "grad_norm": 0.4360392987728119, "learning_rate": 0.001, "loss": 1.8416, "step": 372456 }, { "epoch": 32.13526570048309, "grad_norm": 0.7678411602973938, "learning_rate": 0.001, "loss": 1.8304, "step": 372512 }, { "epoch": 32.14009661835749, "grad_norm": 1.5205198526382446, "learning_rate": 0.001, "loss": 1.8344, "step": 372568 }, { "epoch": 32.14492753623188, "grad_norm": 0.41204118728637695, "learning_rate": 0.001, "loss": 1.837, "step": 372624 }, { "epoch": 32.14975845410628, "grad_norm": 0.3045431673526764, "learning_rate": 0.001, "loss": 1.8331, "step": 372680 }, { "epoch": 32.15458937198068, "grad_norm": 1.3136420249938965, "learning_rate": 0.001, "loss": 1.845, "step": 372736 }, { "epoch": 32.15942028985507, "grad_norm": 0.9799510836601257, "learning_rate": 0.001, "loss": 1.8423, "step": 372792 }, { "epoch": 32.16425120772947, "grad_norm": 0.39545878767967224, "learning_rate": 0.001, "loss": 1.8378, "step": 372848 }, { "epoch": 32.169082125603865, "grad_norm": 0.5365079641342163, "learning_rate": 0.001, "loss": 1.8382, "step": 372904 }, { "epoch": 32.17391304347826, "grad_norm": 0.3844505548477173, "learning_rate": 0.001, "loss": 1.8298, "step": 372960 }, { "epoch": 32.17874396135266, "grad_norm": 0.31090861558914185, "learning_rate": 0.001, "loss": 1.8378, "step": 373016 }, { "epoch": 32.18357487922705, "grad_norm": 0.8890419006347656, "learning_rate": 0.001, "loss": 1.8354, "step": 373072 }, { "epoch": 32.18840579710145, "grad_norm": 0.6335465908050537, "learning_rate": 0.001, "loss": 1.8401, "step": 373128 }, { "epoch": 32.193236714975846, "grad_norm": 0.2977682948112488, "learning_rate": 0.001, "loss": 1.8444, "step": 373184 }, { "epoch": 32.19806763285024, "grad_norm": 0.6324213743209839, "learning_rate": 0.001, "loss": 1.8397, "step": 373240 }, { "epoch": 32.20289855072464, "grad_norm": 0.7859739661216736, "learning_rate": 0.001, "loss": 1.836, "step": 373296 }, { "epoch": 32.207729468599034, "grad_norm": 1.256189227104187, "learning_rate": 0.001, "loss": 1.8312, "step": 373352 }, { "epoch": 32.21256038647343, "grad_norm": 2.799471139907837, "learning_rate": 0.001, "loss": 1.8455, "step": 373408 }, { "epoch": 32.21739130434783, "grad_norm": 0.27061259746551514, "learning_rate": 0.001, "loss": 1.8422, "step": 373464 }, { "epoch": 32.22222222222222, "grad_norm": 0.31387603282928467, "learning_rate": 0.001, "loss": 1.8458, "step": 373520 }, { "epoch": 32.227053140096615, "grad_norm": 1.364209532737732, "learning_rate": 0.001, "loss": 1.8466, "step": 373576 }, { "epoch": 32.231884057971016, "grad_norm": 0.7400078177452087, "learning_rate": 0.001, "loss": 1.8364, "step": 373632 }, { "epoch": 32.23671497584541, "grad_norm": 1.4662961959838867, "learning_rate": 0.001, "loss": 1.8522, "step": 373688 }, { "epoch": 32.24154589371981, "grad_norm": 0.3981078863143921, "learning_rate": 0.001, "loss": 1.8476, "step": 373744 }, { "epoch": 32.2463768115942, "grad_norm": 0.3808528482913971, "learning_rate": 0.001, "loss": 1.8508, "step": 373800 }, { "epoch": 32.2512077294686, "grad_norm": 5.781691074371338, "learning_rate": 0.001, "loss": 1.8395, "step": 373856 }, { "epoch": 32.256038647343, "grad_norm": 0.7164187431335449, "learning_rate": 0.001, "loss": 1.8363, "step": 373912 }, { "epoch": 32.26086956521739, "grad_norm": 13.646059036254883, "learning_rate": 0.001, "loss": 1.8345, "step": 373968 }, { "epoch": 32.265700483091784, "grad_norm": 0.5398547649383545, "learning_rate": 0.001, "loss": 1.8386, "step": 374024 }, { "epoch": 32.270531400966185, "grad_norm": 1.4126924276351929, "learning_rate": 0.001, "loss": 1.8464, "step": 374080 }, { "epoch": 32.27536231884058, "grad_norm": 0.3202066421508789, "learning_rate": 0.001, "loss": 1.8532, "step": 374136 }, { "epoch": 32.28019323671498, "grad_norm": 0.7664832472801208, "learning_rate": 0.001, "loss": 1.8419, "step": 374192 }, { "epoch": 32.28502415458937, "grad_norm": 0.478803813457489, "learning_rate": 0.001, "loss": 1.8362, "step": 374248 }, { "epoch": 32.289855072463766, "grad_norm": 1.117698073387146, "learning_rate": 0.001, "loss": 1.8416, "step": 374304 }, { "epoch": 32.29468599033817, "grad_norm": 0.2940598726272583, "learning_rate": 0.001, "loss": 1.8551, "step": 374360 }, { "epoch": 32.29951690821256, "grad_norm": 0.4876776337623596, "learning_rate": 0.001, "loss": 1.8591, "step": 374416 }, { "epoch": 32.30434782608695, "grad_norm": 0.7879142165184021, "learning_rate": 0.001, "loss": 1.8445, "step": 374472 }, { "epoch": 32.309178743961354, "grad_norm": 0.3670964539051056, "learning_rate": 0.001, "loss": 1.8597, "step": 374528 }, { "epoch": 32.31400966183575, "grad_norm": 2.608320951461792, "learning_rate": 0.001, "loss": 1.855, "step": 374584 }, { "epoch": 32.31884057971015, "grad_norm": 0.6056095361709595, "learning_rate": 0.001, "loss": 1.8513, "step": 374640 }, { "epoch": 32.32367149758454, "grad_norm": 1.3014816045761108, "learning_rate": 0.001, "loss": 1.848, "step": 374696 }, { "epoch": 32.328502415458935, "grad_norm": 1.1073057651519775, "learning_rate": 0.001, "loss": 1.8478, "step": 374752 }, { "epoch": 32.333333333333336, "grad_norm": 0.2885875403881073, "learning_rate": 0.001, "loss": 1.8405, "step": 374808 }, { "epoch": 32.33816425120773, "grad_norm": 0.7921796441078186, "learning_rate": 0.001, "loss": 1.8342, "step": 374864 }, { "epoch": 32.34299516908212, "grad_norm": 0.5287802219390869, "learning_rate": 0.001, "loss": 1.8354, "step": 374920 }, { "epoch": 32.34782608695652, "grad_norm": 0.49546757340431213, "learning_rate": 0.001, "loss": 1.843, "step": 374976 }, { "epoch": 32.35265700483092, "grad_norm": 1.1142160892486572, "learning_rate": 0.001, "loss": 1.8529, "step": 375032 }, { "epoch": 32.35748792270532, "grad_norm": 3.57182240486145, "learning_rate": 0.001, "loss": 1.853, "step": 375088 }, { "epoch": 32.36231884057971, "grad_norm": 0.468313604593277, "learning_rate": 0.001, "loss": 1.8559, "step": 375144 }, { "epoch": 32.367149758454104, "grad_norm": 0.4340816140174866, "learning_rate": 0.001, "loss": 1.8585, "step": 375200 }, { "epoch": 32.371980676328505, "grad_norm": 0.4288213551044464, "learning_rate": 0.001, "loss": 1.8447, "step": 375256 }, { "epoch": 32.3768115942029, "grad_norm": 0.3087408244609833, "learning_rate": 0.001, "loss": 1.8484, "step": 375312 }, { "epoch": 32.38164251207729, "grad_norm": 2.7799503803253174, "learning_rate": 0.001, "loss": 1.8508, "step": 375368 }, { "epoch": 32.38647342995169, "grad_norm": 3.459364891052246, "learning_rate": 0.001, "loss": 1.8464, "step": 375424 }, { "epoch": 32.391304347826086, "grad_norm": 1.3312876224517822, "learning_rate": 0.001, "loss": 1.8457, "step": 375480 }, { "epoch": 32.39613526570048, "grad_norm": 0.4875665009021759, "learning_rate": 0.001, "loss": 1.8515, "step": 375536 }, { "epoch": 32.40096618357488, "grad_norm": 0.421225905418396, "learning_rate": 0.001, "loss": 1.8558, "step": 375592 }, { "epoch": 32.405797101449274, "grad_norm": 0.3473358750343323, "learning_rate": 0.001, "loss": 1.844, "step": 375648 }, { "epoch": 32.410628019323674, "grad_norm": 0.26126474142074585, "learning_rate": 0.001, "loss": 1.8482, "step": 375704 }, { "epoch": 32.41545893719807, "grad_norm": 0.2654518485069275, "learning_rate": 0.001, "loss": 1.849, "step": 375760 }, { "epoch": 32.42028985507246, "grad_norm": 1.1230734586715698, "learning_rate": 0.001, "loss": 1.8528, "step": 375816 }, { "epoch": 32.42512077294686, "grad_norm": 3.879697322845459, "learning_rate": 0.001, "loss": 1.8547, "step": 375872 }, { "epoch": 32.429951690821255, "grad_norm": 0.4892650246620178, "learning_rate": 0.001, "loss": 1.8534, "step": 375928 }, { "epoch": 32.43478260869565, "grad_norm": 0.25833895802497864, "learning_rate": 0.001, "loss": 1.8512, "step": 375984 }, { "epoch": 32.43961352657005, "grad_norm": 0.8528651595115662, "learning_rate": 0.001, "loss": 1.8455, "step": 376040 }, { "epoch": 32.44444444444444, "grad_norm": 0.283153235912323, "learning_rate": 0.001, "loss": 1.8384, "step": 376096 }, { "epoch": 32.44927536231884, "grad_norm": 0.30392587184906006, "learning_rate": 0.001, "loss": 1.8448, "step": 376152 }, { "epoch": 32.45410628019324, "grad_norm": 0.5906954407691956, "learning_rate": 0.001, "loss": 1.8373, "step": 376208 }, { "epoch": 32.45893719806763, "grad_norm": 1.9905575513839722, "learning_rate": 0.001, "loss": 1.8361, "step": 376264 }, { "epoch": 32.46376811594203, "grad_norm": 0.5323963165283203, "learning_rate": 0.001, "loss": 1.84, "step": 376320 }, { "epoch": 32.468599033816425, "grad_norm": 3.6400694847106934, "learning_rate": 0.001, "loss": 1.8418, "step": 376376 }, { "epoch": 32.47342995169082, "grad_norm": 0.35972192883491516, "learning_rate": 0.001, "loss": 1.8466, "step": 376432 }, { "epoch": 32.47826086956522, "grad_norm": 0.7316411733627319, "learning_rate": 0.001, "loss": 1.8451, "step": 376488 }, { "epoch": 32.48309178743961, "grad_norm": 0.36495721340179443, "learning_rate": 0.001, "loss": 1.8596, "step": 376544 }, { "epoch": 32.48792270531401, "grad_norm": 0.7636012434959412, "learning_rate": 0.001, "loss": 1.8733, "step": 376600 }, { "epoch": 32.492753623188406, "grad_norm": 0.29365861415863037, "learning_rate": 0.001, "loss": 1.8558, "step": 376656 }, { "epoch": 32.4975845410628, "grad_norm": 0.5585270524024963, "learning_rate": 0.001, "loss": 1.853, "step": 376712 }, { "epoch": 32.5024154589372, "grad_norm": 0.47341543436050415, "learning_rate": 0.001, "loss": 1.8535, "step": 376768 }, { "epoch": 32.507246376811594, "grad_norm": 0.293991357088089, "learning_rate": 0.001, "loss": 1.8474, "step": 376824 }, { "epoch": 32.51207729468599, "grad_norm": 1.4911845922470093, "learning_rate": 0.001, "loss": 1.8398, "step": 376880 }, { "epoch": 32.51690821256039, "grad_norm": 1.716740608215332, "learning_rate": 0.001, "loss": 1.8364, "step": 376936 }, { "epoch": 32.52173913043478, "grad_norm": 1.4789096117019653, "learning_rate": 0.001, "loss": 1.8376, "step": 376992 }, { "epoch": 32.52657004830918, "grad_norm": 0.6244176626205444, "learning_rate": 0.001, "loss": 1.8457, "step": 377048 }, { "epoch": 32.531400966183575, "grad_norm": 0.958246648311615, "learning_rate": 0.001, "loss": 1.8518, "step": 377104 }, { "epoch": 32.53623188405797, "grad_norm": 0.32466602325439453, "learning_rate": 0.001, "loss": 1.8526, "step": 377160 }, { "epoch": 32.54106280193237, "grad_norm": 1.327105164527893, "learning_rate": 0.001, "loss": 1.8511, "step": 377216 }, { "epoch": 32.54589371980676, "grad_norm": 1.754935622215271, "learning_rate": 0.001, "loss": 1.8404, "step": 377272 }, { "epoch": 32.55072463768116, "grad_norm": 0.42063432931900024, "learning_rate": 0.001, "loss": 1.8383, "step": 377328 }, { "epoch": 32.55555555555556, "grad_norm": 1.7083438634872437, "learning_rate": 0.001, "loss": 1.8388, "step": 377384 }, { "epoch": 32.56038647342995, "grad_norm": 0.35757118463516235, "learning_rate": 0.001, "loss": 1.831, "step": 377440 }, { "epoch": 32.56521739130435, "grad_norm": 0.3467182219028473, "learning_rate": 0.001, "loss": 1.8405, "step": 377496 }, { "epoch": 32.570048309178745, "grad_norm": 0.5448659658432007, "learning_rate": 0.001, "loss": 1.8455, "step": 377552 }, { "epoch": 32.57487922705314, "grad_norm": 0.35185661911964417, "learning_rate": 0.001, "loss": 1.845, "step": 377608 }, { "epoch": 32.57971014492754, "grad_norm": 0.34073004126548767, "learning_rate": 0.001, "loss": 1.8506, "step": 377664 }, { "epoch": 32.58454106280193, "grad_norm": 0.34111663699150085, "learning_rate": 0.001, "loss": 1.8501, "step": 377720 }, { "epoch": 32.589371980676326, "grad_norm": 0.3987845182418823, "learning_rate": 0.001, "loss": 1.8441, "step": 377776 }, { "epoch": 32.594202898550726, "grad_norm": 2.2910256385803223, "learning_rate": 0.001, "loss": 1.8399, "step": 377832 }, { "epoch": 32.59903381642512, "grad_norm": 0.6643227934837341, "learning_rate": 0.001, "loss": 1.8439, "step": 377888 }, { "epoch": 32.60386473429952, "grad_norm": 0.8784595131874084, "learning_rate": 0.001, "loss": 1.8422, "step": 377944 }, { "epoch": 32.608695652173914, "grad_norm": 0.672061026096344, "learning_rate": 0.001, "loss": 1.8419, "step": 378000 }, { "epoch": 32.61352657004831, "grad_norm": 0.36900657415390015, "learning_rate": 0.001, "loss": 1.8509, "step": 378056 }, { "epoch": 32.61835748792271, "grad_norm": 1.5382411479949951, "learning_rate": 0.001, "loss": 1.8541, "step": 378112 }, { "epoch": 32.6231884057971, "grad_norm": 0.404506653547287, "learning_rate": 0.001, "loss": 1.8542, "step": 378168 }, { "epoch": 32.628019323671495, "grad_norm": 0.6450835466384888, "learning_rate": 0.001, "loss": 1.8654, "step": 378224 }, { "epoch": 32.632850241545896, "grad_norm": 0.5529160499572754, "learning_rate": 0.001, "loss": 1.8622, "step": 378280 }, { "epoch": 32.63768115942029, "grad_norm": 0.6786965727806091, "learning_rate": 0.001, "loss": 1.8591, "step": 378336 }, { "epoch": 32.64251207729468, "grad_norm": 0.794245719909668, "learning_rate": 0.001, "loss": 1.8515, "step": 378392 }, { "epoch": 32.64734299516908, "grad_norm": 0.31528031826019287, "learning_rate": 0.001, "loss": 1.8527, "step": 378448 }, { "epoch": 32.65217391304348, "grad_norm": 0.374546617269516, "learning_rate": 0.001, "loss": 1.8634, "step": 378504 }, { "epoch": 32.65700483091788, "grad_norm": 1.0305025577545166, "learning_rate": 0.001, "loss": 1.8452, "step": 378560 }, { "epoch": 32.66183574879227, "grad_norm": 0.36057665944099426, "learning_rate": 0.001, "loss": 1.8452, "step": 378616 }, { "epoch": 32.666666666666664, "grad_norm": 0.5232740044593811, "learning_rate": 0.001, "loss": 1.8441, "step": 378672 }, { "epoch": 32.671497584541065, "grad_norm": 0.4006281793117523, "learning_rate": 0.001, "loss": 1.8495, "step": 378728 }, { "epoch": 32.67632850241546, "grad_norm": 0.41494491696357727, "learning_rate": 0.001, "loss": 1.839, "step": 378784 }, { "epoch": 32.68115942028985, "grad_norm": 8.640713691711426, "learning_rate": 0.001, "loss": 1.8455, "step": 378840 }, { "epoch": 32.68599033816425, "grad_norm": 2.596590995788574, "learning_rate": 0.001, "loss": 1.8608, "step": 378896 }, { "epoch": 32.690821256038646, "grad_norm": 0.849643349647522, "learning_rate": 0.001, "loss": 1.8585, "step": 378952 }, { "epoch": 32.69565217391305, "grad_norm": 0.8204634189605713, "learning_rate": 0.001, "loss": 1.8593, "step": 379008 }, { "epoch": 32.70048309178744, "grad_norm": 0.5446606278419495, "learning_rate": 0.001, "loss": 1.8604, "step": 379064 }, { "epoch": 32.70531400966183, "grad_norm": 1.643191933631897, "learning_rate": 0.001, "loss": 1.8483, "step": 379120 }, { "epoch": 32.710144927536234, "grad_norm": 1.6326783895492554, "learning_rate": 0.001, "loss": 1.8563, "step": 379176 }, { "epoch": 32.71497584541063, "grad_norm": 0.49490636587142944, "learning_rate": 0.001, "loss": 1.8555, "step": 379232 }, { "epoch": 32.71980676328502, "grad_norm": 1.0047205686569214, "learning_rate": 0.001, "loss": 1.8469, "step": 379288 }, { "epoch": 32.72463768115942, "grad_norm": 1.4708741903305054, "learning_rate": 0.001, "loss": 1.8421, "step": 379344 }, { "epoch": 32.729468599033815, "grad_norm": 0.8628129363059998, "learning_rate": 0.001, "loss": 1.8483, "step": 379400 }, { "epoch": 32.734299516908216, "grad_norm": 1.7796859741210938, "learning_rate": 0.001, "loss": 1.8507, "step": 379456 }, { "epoch": 32.73913043478261, "grad_norm": 0.5412070155143738, "learning_rate": 0.001, "loss": 1.8542, "step": 379512 }, { "epoch": 32.743961352657, "grad_norm": 0.3636011481285095, "learning_rate": 0.001, "loss": 1.8542, "step": 379568 }, { "epoch": 32.7487922705314, "grad_norm": 0.7431475520133972, "learning_rate": 0.001, "loss": 1.8571, "step": 379624 }, { "epoch": 32.7536231884058, "grad_norm": 0.34459924697875977, "learning_rate": 0.001, "loss": 1.8536, "step": 379680 }, { "epoch": 32.75845410628019, "grad_norm": 0.44119343161582947, "learning_rate": 0.001, "loss": 1.8541, "step": 379736 }, { "epoch": 32.76328502415459, "grad_norm": 0.2517727017402649, "learning_rate": 0.001, "loss": 1.8421, "step": 379792 }, { "epoch": 32.768115942028984, "grad_norm": 0.9753492474555969, "learning_rate": 0.001, "loss": 1.8465, "step": 379848 }, { "epoch": 32.772946859903385, "grad_norm": 1.606237769126892, "learning_rate": 0.001, "loss": 1.8438, "step": 379904 }, { "epoch": 32.77777777777778, "grad_norm": 0.9974656105041504, "learning_rate": 0.001, "loss": 1.8607, "step": 379960 }, { "epoch": 32.78260869565217, "grad_norm": 0.6982571482658386, "learning_rate": 0.001, "loss": 1.8776, "step": 380016 }, { "epoch": 32.78743961352657, "grad_norm": 0.43610435724258423, "learning_rate": 0.001, "loss": 1.857, "step": 380072 }, { "epoch": 32.792270531400966, "grad_norm": 0.9027001857757568, "learning_rate": 0.001, "loss": 1.8483, "step": 380128 }, { "epoch": 32.79710144927536, "grad_norm": 1.5463149547576904, "learning_rate": 0.001, "loss": 1.8554, "step": 380184 }, { "epoch": 32.80193236714976, "grad_norm": 0.7897469997406006, "learning_rate": 0.001, "loss": 1.8517, "step": 380240 }, { "epoch": 32.806763285024154, "grad_norm": 1.6597342491149902, "learning_rate": 0.001, "loss": 1.8497, "step": 380296 }, { "epoch": 32.81159420289855, "grad_norm": 0.2787741720676422, "learning_rate": 0.001, "loss": 1.8587, "step": 380352 }, { "epoch": 32.81642512077295, "grad_norm": 0.2956477105617523, "learning_rate": 0.001, "loss": 1.8546, "step": 380408 }, { "epoch": 32.82125603864734, "grad_norm": 0.3225376009941101, "learning_rate": 0.001, "loss": 1.8596, "step": 380464 }, { "epoch": 32.82608695652174, "grad_norm": 1.282189130783081, "learning_rate": 0.001, "loss": 1.8635, "step": 380520 }, { "epoch": 32.830917874396135, "grad_norm": 0.2542252838611603, "learning_rate": 0.001, "loss": 1.8426, "step": 380576 }, { "epoch": 32.83574879227053, "grad_norm": 0.506098210811615, "learning_rate": 0.001, "loss": 1.8539, "step": 380632 }, { "epoch": 32.84057971014493, "grad_norm": 1.0691663026809692, "learning_rate": 0.001, "loss": 1.8546, "step": 380688 }, { "epoch": 32.84541062801932, "grad_norm": 0.9779183268547058, "learning_rate": 0.001, "loss": 1.8464, "step": 380744 }, { "epoch": 32.85024154589372, "grad_norm": 0.35702377557754517, "learning_rate": 0.001, "loss": 1.8473, "step": 380800 }, { "epoch": 32.85507246376812, "grad_norm": 1.1675342321395874, "learning_rate": 0.001, "loss": 1.8759, "step": 380856 }, { "epoch": 32.85990338164251, "grad_norm": 1.2844762802124023, "learning_rate": 0.001, "loss": 1.8592, "step": 380912 }, { "epoch": 32.86473429951691, "grad_norm": 0.27818992733955383, "learning_rate": 0.001, "loss": 1.8611, "step": 380968 }, { "epoch": 32.869565217391305, "grad_norm": 0.9189602136611938, "learning_rate": 0.001, "loss": 1.8496, "step": 381024 }, { "epoch": 32.8743961352657, "grad_norm": 0.44964373111724854, "learning_rate": 0.001, "loss": 1.8471, "step": 381080 }, { "epoch": 32.8792270531401, "grad_norm": 0.7384527325630188, "learning_rate": 0.001, "loss": 1.8471, "step": 381136 }, { "epoch": 32.88405797101449, "grad_norm": 1.1870777606964111, "learning_rate": 0.001, "loss": 1.8408, "step": 381192 }, { "epoch": 32.888888888888886, "grad_norm": 1.3894047737121582, "learning_rate": 0.001, "loss": 1.8444, "step": 381248 }, { "epoch": 32.893719806763286, "grad_norm": 0.4911687672138214, "learning_rate": 0.001, "loss": 1.8514, "step": 381304 }, { "epoch": 32.89855072463768, "grad_norm": 0.30646243691444397, "learning_rate": 0.001, "loss": 1.8525, "step": 381360 }, { "epoch": 32.90338164251208, "grad_norm": 1.2023776769638062, "learning_rate": 0.001, "loss": 1.8522, "step": 381416 }, { "epoch": 32.908212560386474, "grad_norm": 2.8627359867095947, "learning_rate": 0.001, "loss": 1.8484, "step": 381472 }, { "epoch": 32.91304347826087, "grad_norm": 0.5137643218040466, "learning_rate": 0.001, "loss": 1.8528, "step": 381528 }, { "epoch": 32.91787439613527, "grad_norm": 1.190400242805481, "learning_rate": 0.001, "loss": 1.8552, "step": 381584 }, { "epoch": 32.92270531400966, "grad_norm": 0.36700430512428284, "learning_rate": 0.001, "loss": 1.8511, "step": 381640 }, { "epoch": 32.927536231884055, "grad_norm": 0.4066595733165741, "learning_rate": 0.001, "loss": 1.8458, "step": 381696 }, { "epoch": 32.932367149758456, "grad_norm": 0.3802686035633087, "learning_rate": 0.001, "loss": 1.8481, "step": 381752 }, { "epoch": 32.93719806763285, "grad_norm": 0.24708229303359985, "learning_rate": 0.001, "loss": 1.858, "step": 381808 }, { "epoch": 32.94202898550725, "grad_norm": 2.311570644378662, "learning_rate": 0.001, "loss": 1.8548, "step": 381864 }, { "epoch": 32.94685990338164, "grad_norm": 1.4765944480895996, "learning_rate": 0.001, "loss": 1.8567, "step": 381920 }, { "epoch": 32.95169082125604, "grad_norm": 1.4749515056610107, "learning_rate": 0.001, "loss": 1.8541, "step": 381976 }, { "epoch": 32.95652173913044, "grad_norm": 1.1101292371749878, "learning_rate": 0.001, "loss": 1.8524, "step": 382032 }, { "epoch": 32.96135265700483, "grad_norm": 1.421686053276062, "learning_rate": 0.001, "loss": 1.8494, "step": 382088 }, { "epoch": 32.966183574879224, "grad_norm": 0.993030309677124, "learning_rate": 0.001, "loss": 1.8652, "step": 382144 }, { "epoch": 32.971014492753625, "grad_norm": 1.7938148975372314, "learning_rate": 0.001, "loss": 1.8522, "step": 382200 }, { "epoch": 32.97584541062802, "grad_norm": 0.2918282449245453, "learning_rate": 0.001, "loss": 1.8628, "step": 382256 }, { "epoch": 32.98067632850242, "grad_norm": 5.5111236572265625, "learning_rate": 0.001, "loss": 1.8608, "step": 382312 }, { "epoch": 32.98550724637681, "grad_norm": 2.92281174659729, "learning_rate": 0.001, "loss": 1.8468, "step": 382368 }, { "epoch": 32.990338164251206, "grad_norm": 0.38964682817459106, "learning_rate": 0.001, "loss": 1.8607, "step": 382424 }, { "epoch": 32.99516908212561, "grad_norm": 0.4770703911781311, "learning_rate": 0.001, "loss": 1.867, "step": 382480 }, { "epoch": 33.0, "grad_norm": 0.6109275221824646, "learning_rate": 0.001, "loss": 1.8494, "step": 382536 }, { "epoch": 33.00483091787439, "grad_norm": 3.7642152309417725, "learning_rate": 0.001, "loss": 1.8286, "step": 382592 }, { "epoch": 33.009661835748794, "grad_norm": 0.7113749384880066, "learning_rate": 0.001, "loss": 1.8297, "step": 382648 }, { "epoch": 33.01449275362319, "grad_norm": 0.5638636946678162, "learning_rate": 0.001, "loss": 1.833, "step": 382704 }, { "epoch": 33.01932367149758, "grad_norm": 0.35028257966041565, "learning_rate": 0.001, "loss": 1.832, "step": 382760 }, { "epoch": 33.02415458937198, "grad_norm": 0.5259183049201965, "learning_rate": 0.001, "loss": 1.822, "step": 382816 }, { "epoch": 33.028985507246375, "grad_norm": 0.7119775414466858, "learning_rate": 0.001, "loss": 1.8128, "step": 382872 }, { "epoch": 33.033816425120776, "grad_norm": 0.4885420799255371, "learning_rate": 0.001, "loss": 1.829, "step": 382928 }, { "epoch": 33.03864734299517, "grad_norm": 0.38046929240226746, "learning_rate": 0.001, "loss": 1.8283, "step": 382984 }, { "epoch": 33.04347826086956, "grad_norm": 1.0843122005462646, "learning_rate": 0.001, "loss": 1.8387, "step": 383040 }, { "epoch": 33.04830917874396, "grad_norm": 0.6481552124023438, "learning_rate": 0.001, "loss": 1.8456, "step": 383096 }, { "epoch": 33.05314009661836, "grad_norm": 0.9348629713058472, "learning_rate": 0.001, "loss": 1.8445, "step": 383152 }, { "epoch": 33.05797101449275, "grad_norm": 0.9992024302482605, "learning_rate": 0.001, "loss": 1.8428, "step": 383208 }, { "epoch": 33.06280193236715, "grad_norm": 1.5345975160598755, "learning_rate": 0.001, "loss": 1.8411, "step": 383264 }, { "epoch": 33.067632850241544, "grad_norm": 0.6153743267059326, "learning_rate": 0.001, "loss": 1.8321, "step": 383320 }, { "epoch": 33.072463768115945, "grad_norm": 0.9540253281593323, "learning_rate": 0.001, "loss": 1.8327, "step": 383376 }, { "epoch": 33.07729468599034, "grad_norm": 5.080650806427002, "learning_rate": 0.001, "loss": 1.834, "step": 383432 }, { "epoch": 33.08212560386473, "grad_norm": 3.1469767093658447, "learning_rate": 0.001, "loss": 1.844, "step": 383488 }, { "epoch": 33.08695652173913, "grad_norm": 0.3300671875476837, "learning_rate": 0.001, "loss": 1.846, "step": 383544 }, { "epoch": 33.091787439613526, "grad_norm": 1.5894296169281006, "learning_rate": 0.001, "loss": 1.8574, "step": 383600 }, { "epoch": 33.09661835748792, "grad_norm": 0.48993924260139465, "learning_rate": 0.001, "loss": 1.8474, "step": 383656 }, { "epoch": 33.10144927536232, "grad_norm": 0.3555910587310791, "learning_rate": 0.001, "loss": 1.8472, "step": 383712 }, { "epoch": 33.106280193236714, "grad_norm": 0.7348004579544067, "learning_rate": 0.001, "loss": 1.8416, "step": 383768 }, { "epoch": 33.111111111111114, "grad_norm": 2.8869214057922363, "learning_rate": 0.001, "loss": 1.8396, "step": 383824 }, { "epoch": 33.11594202898551, "grad_norm": 0.5226358771324158, "learning_rate": 0.001, "loss": 1.8463, "step": 383880 }, { "epoch": 33.1207729468599, "grad_norm": 1.3559980392456055, "learning_rate": 0.001, "loss": 1.8454, "step": 383936 }, { "epoch": 33.1256038647343, "grad_norm": 0.36827754974365234, "learning_rate": 0.001, "loss": 1.8422, "step": 383992 }, { "epoch": 33.130434782608695, "grad_norm": 0.5226930379867554, "learning_rate": 0.001, "loss": 1.8337, "step": 384048 }, { "epoch": 33.13526570048309, "grad_norm": 0.368125855922699, "learning_rate": 0.001, "loss": 1.8544, "step": 384104 }, { "epoch": 33.14009661835749, "grad_norm": 1.0597519874572754, "learning_rate": 0.001, "loss": 1.8508, "step": 384160 }, { "epoch": 33.14492753623188, "grad_norm": 0.2916831970214844, "learning_rate": 0.001, "loss": 1.8493, "step": 384216 }, { "epoch": 33.14975845410628, "grad_norm": 0.26027148962020874, "learning_rate": 0.001, "loss": 1.8586, "step": 384272 }, { "epoch": 33.15458937198068, "grad_norm": 0.8502988815307617, "learning_rate": 0.001, "loss": 1.8633, "step": 384328 }, { "epoch": 33.15942028985507, "grad_norm": 0.5353624224662781, "learning_rate": 0.001, "loss": 1.8559, "step": 384384 }, { "epoch": 33.16425120772947, "grad_norm": 0.283352792263031, "learning_rate": 0.001, "loss": 1.8517, "step": 384440 }, { "epoch": 33.169082125603865, "grad_norm": 0.36158379912376404, "learning_rate": 0.001, "loss": 1.8507, "step": 384496 }, { "epoch": 33.17391304347826, "grad_norm": 0.5547168254852295, "learning_rate": 0.001, "loss": 1.8435, "step": 384552 }, { "epoch": 33.17874396135266, "grad_norm": 1.2818000316619873, "learning_rate": 0.001, "loss": 1.8389, "step": 384608 }, { "epoch": 33.18357487922705, "grad_norm": 0.47685927152633667, "learning_rate": 0.001, "loss": 1.8468, "step": 384664 }, { "epoch": 33.18840579710145, "grad_norm": 5.0359721183776855, "learning_rate": 0.001, "loss": 1.8485, "step": 384720 }, { "epoch": 33.193236714975846, "grad_norm": 1.5601245164871216, "learning_rate": 0.001, "loss": 1.8512, "step": 384776 }, { "epoch": 33.19806763285024, "grad_norm": 4.911972999572754, "learning_rate": 0.001, "loss": 1.8365, "step": 384832 }, { "epoch": 33.20289855072464, "grad_norm": 1.1329741477966309, "learning_rate": 0.001, "loss": 1.8412, "step": 384888 }, { "epoch": 33.207729468599034, "grad_norm": 0.36179405450820923, "learning_rate": 0.001, "loss": 1.8435, "step": 384944 }, { "epoch": 33.21256038647343, "grad_norm": 0.8295024037361145, "learning_rate": 0.001, "loss": 1.8586, "step": 385000 }, { "epoch": 33.21739130434783, "grad_norm": 0.5777642130851746, "learning_rate": 0.001, "loss": 1.8554, "step": 385056 }, { "epoch": 33.22222222222222, "grad_norm": 1.7675678730010986, "learning_rate": 0.001, "loss": 1.86, "step": 385112 }, { "epoch": 33.227053140096615, "grad_norm": 0.6530225276947021, "learning_rate": 0.001, "loss": 1.8494, "step": 385168 }, { "epoch": 33.231884057971016, "grad_norm": 1.0190540552139282, "learning_rate": 0.001, "loss": 1.8489, "step": 385224 }, { "epoch": 33.23671497584541, "grad_norm": 0.3430393636226654, "learning_rate": 0.001, "loss": 1.8357, "step": 385280 }, { "epoch": 33.24154589371981, "grad_norm": 1.2124278545379639, "learning_rate": 0.001, "loss": 1.8397, "step": 385336 }, { "epoch": 33.2463768115942, "grad_norm": 1.3630893230438232, "learning_rate": 0.001, "loss": 1.8596, "step": 385392 }, { "epoch": 33.2512077294686, "grad_norm": 0.9709978103637695, "learning_rate": 0.001, "loss": 1.8461, "step": 385448 }, { "epoch": 33.256038647343, "grad_norm": 0.5541308522224426, "learning_rate": 0.001, "loss": 1.8458, "step": 385504 }, { "epoch": 33.26086956521739, "grad_norm": 1.2440598011016846, "learning_rate": 0.001, "loss": 1.8432, "step": 385560 }, { "epoch": 33.265700483091784, "grad_norm": 0.7311402559280396, "learning_rate": 0.001, "loss": 1.8416, "step": 385616 }, { "epoch": 33.270531400966185, "grad_norm": 0.3230074346065521, "learning_rate": 0.001, "loss": 1.8336, "step": 385672 }, { "epoch": 33.27536231884058, "grad_norm": 0.3788334131240845, "learning_rate": 0.001, "loss": 1.837, "step": 385728 }, { "epoch": 33.28019323671498, "grad_norm": 0.7328329086303711, "learning_rate": 0.001, "loss": 1.8285, "step": 385784 }, { "epoch": 33.28502415458937, "grad_norm": 10.460920333862305, "learning_rate": 0.001, "loss": 1.8418, "step": 385840 }, { "epoch": 33.289855072463766, "grad_norm": 0.5932354927062988, "learning_rate": 0.001, "loss": 1.8696, "step": 385896 }, { "epoch": 33.29468599033817, "grad_norm": 0.6016402244567871, "learning_rate": 0.001, "loss": 1.8697, "step": 385952 }, { "epoch": 33.29951690821256, "grad_norm": 0.3926424980163574, "learning_rate": 0.001, "loss": 1.8633, "step": 386008 }, { "epoch": 33.30434782608695, "grad_norm": 0.37828126549720764, "learning_rate": 0.001, "loss": 1.8594, "step": 386064 }, { "epoch": 33.309178743961354, "grad_norm": 0.4842880070209503, "learning_rate": 0.001, "loss": 1.8511, "step": 386120 }, { "epoch": 33.31400966183575, "grad_norm": 0.39039677381515503, "learning_rate": 0.001, "loss": 1.8521, "step": 386176 }, { "epoch": 33.31884057971015, "grad_norm": 0.43778035044670105, "learning_rate": 0.001, "loss": 1.8476, "step": 386232 }, { "epoch": 33.32367149758454, "grad_norm": 0.3074943423271179, "learning_rate": 0.001, "loss": 1.8457, "step": 386288 }, { "epoch": 33.328502415458935, "grad_norm": 0.41545557975769043, "learning_rate": 0.001, "loss": 1.8519, "step": 386344 }, { "epoch": 33.333333333333336, "grad_norm": 0.8189210891723633, "learning_rate": 0.001, "loss": 1.8532, "step": 386400 }, { "epoch": 33.33816425120773, "grad_norm": 0.6503366827964783, "learning_rate": 0.001, "loss": 1.8533, "step": 386456 }, { "epoch": 33.34299516908212, "grad_norm": 2.9798851013183594, "learning_rate": 0.001, "loss": 1.8536, "step": 386512 }, { "epoch": 33.34782608695652, "grad_norm": 0.6166926622390747, "learning_rate": 0.001, "loss": 1.8547, "step": 386568 }, { "epoch": 33.35265700483092, "grad_norm": 0.7489621639251709, "learning_rate": 0.001, "loss": 1.849, "step": 386624 }, { "epoch": 33.35748792270532, "grad_norm": 2.761019229888916, "learning_rate": 0.001, "loss": 1.8496, "step": 386680 }, { "epoch": 33.36231884057971, "grad_norm": 0.48480159044265747, "learning_rate": 0.001, "loss": 1.8511, "step": 386736 }, { "epoch": 33.367149758454104, "grad_norm": 0.5536046028137207, "learning_rate": 0.001, "loss": 1.8557, "step": 386792 }, { "epoch": 33.371980676328505, "grad_norm": 0.3162173628807068, "learning_rate": 0.001, "loss": 1.8425, "step": 386848 }, { "epoch": 33.3768115942029, "grad_norm": 0.521343469619751, "learning_rate": 0.001, "loss": 1.8429, "step": 386904 }, { "epoch": 33.38164251207729, "grad_norm": 1.0974353551864624, "learning_rate": 0.001, "loss": 1.8329, "step": 386960 }, { "epoch": 33.38647342995169, "grad_norm": 0.44107648730278015, "learning_rate": 0.001, "loss": 1.8359, "step": 387016 }, { "epoch": 33.391304347826086, "grad_norm": 0.4756486713886261, "learning_rate": 0.001, "loss": 1.8268, "step": 387072 }, { "epoch": 33.39613526570048, "grad_norm": 0.4744235575199127, "learning_rate": 0.001, "loss": 1.826, "step": 387128 }, { "epoch": 33.40096618357488, "grad_norm": 0.7108448147773743, "learning_rate": 0.001, "loss": 1.837, "step": 387184 }, { "epoch": 33.405797101449274, "grad_norm": 0.41541534662246704, "learning_rate": 0.001, "loss": 1.8242, "step": 387240 }, { "epoch": 33.410628019323674, "grad_norm": 2.1021149158477783, "learning_rate": 0.001, "loss": 1.8292, "step": 387296 }, { "epoch": 33.41545893719807, "grad_norm": 0.43672025203704834, "learning_rate": 0.001, "loss": 1.8254, "step": 387352 }, { "epoch": 33.42028985507246, "grad_norm": 0.6818185448646545, "learning_rate": 0.001, "loss": 1.8244, "step": 387408 }, { "epoch": 33.42512077294686, "grad_norm": 0.817703366279602, "learning_rate": 0.001, "loss": 1.8266, "step": 387464 }, { "epoch": 33.429951690821255, "grad_norm": 1.0184825658798218, "learning_rate": 0.001, "loss": 1.8262, "step": 387520 }, { "epoch": 33.43478260869565, "grad_norm": 1.4650359153747559, "learning_rate": 0.001, "loss": 1.8256, "step": 387576 }, { "epoch": 33.43961352657005, "grad_norm": 0.6176877021789551, "learning_rate": 0.001, "loss": 1.8221, "step": 387632 }, { "epoch": 33.44444444444444, "grad_norm": 0.3248637318611145, "learning_rate": 0.001, "loss": 1.8275, "step": 387688 }, { "epoch": 33.44927536231884, "grad_norm": 0.4993076026439667, "learning_rate": 0.001, "loss": 1.8242, "step": 387744 }, { "epoch": 33.45410628019324, "grad_norm": 0.2622261047363281, "learning_rate": 0.001, "loss": 1.8174, "step": 387800 }, { "epoch": 33.45893719806763, "grad_norm": 1.6135770082473755, "learning_rate": 0.001, "loss": 1.8217, "step": 387856 }, { "epoch": 33.46376811594203, "grad_norm": 2.4823291301727295, "learning_rate": 0.001, "loss": 1.8295, "step": 387912 }, { "epoch": 33.468599033816425, "grad_norm": 3.4560706615448, "learning_rate": 0.001, "loss": 1.8249, "step": 387968 }, { "epoch": 33.47342995169082, "grad_norm": 0.9529895186424255, "learning_rate": 0.001, "loss": 1.8422, "step": 388024 }, { "epoch": 33.47826086956522, "grad_norm": 1.3982553482055664, "learning_rate": 0.001, "loss": 1.845, "step": 388080 }, { "epoch": 33.48309178743961, "grad_norm": 1.281186580657959, "learning_rate": 0.001, "loss": 1.8513, "step": 388136 }, { "epoch": 33.48792270531401, "grad_norm": 2.2583999633789062, "learning_rate": 0.001, "loss": 1.8548, "step": 388192 }, { "epoch": 33.492753623188406, "grad_norm": 16.483736038208008, "learning_rate": 0.001, "loss": 1.8548, "step": 388248 }, { "epoch": 33.4975845410628, "grad_norm": 0.8461055755615234, "learning_rate": 0.001, "loss": 1.8371, "step": 388304 }, { "epoch": 33.5024154589372, "grad_norm": 1.8284337520599365, "learning_rate": 0.001, "loss": 1.8384, "step": 388360 }, { "epoch": 33.507246376811594, "grad_norm": 0.5075252056121826, "learning_rate": 0.001, "loss": 1.8509, "step": 388416 }, { "epoch": 33.51207729468599, "grad_norm": 0.5701984167098999, "learning_rate": 0.001, "loss": 1.8384, "step": 388472 }, { "epoch": 33.51690821256039, "grad_norm": 13.03590202331543, "learning_rate": 0.001, "loss": 1.8395, "step": 388528 }, { "epoch": 33.52173913043478, "grad_norm": 0.4395972192287445, "learning_rate": 0.001, "loss": 1.8389, "step": 388584 }, { "epoch": 33.52657004830918, "grad_norm": 0.3167584538459778, "learning_rate": 0.001, "loss": 1.8375, "step": 388640 }, { "epoch": 33.531400966183575, "grad_norm": 3.5270092487335205, "learning_rate": 0.001, "loss": 1.8303, "step": 388696 }, { "epoch": 33.53623188405797, "grad_norm": 0.26903006434440613, "learning_rate": 0.001, "loss": 1.8414, "step": 388752 }, { "epoch": 33.54106280193237, "grad_norm": 0.4363032877445221, "learning_rate": 0.001, "loss": 1.8334, "step": 388808 }, { "epoch": 33.54589371980676, "grad_norm": 0.3003602623939514, "learning_rate": 0.001, "loss": 1.8364, "step": 388864 }, { "epoch": 33.55072463768116, "grad_norm": 0.2979651689529419, "learning_rate": 0.001, "loss": 1.833, "step": 388920 }, { "epoch": 33.55555555555556, "grad_norm": 0.5554699301719666, "learning_rate": 0.001, "loss": 1.8278, "step": 388976 }, { "epoch": 33.56038647342995, "grad_norm": 0.7324069142341614, "learning_rate": 0.001, "loss": 1.8317, "step": 389032 }, { "epoch": 33.56521739130435, "grad_norm": 0.8645010590553284, "learning_rate": 0.001, "loss": 1.8293, "step": 389088 }, { "epoch": 33.570048309178745, "grad_norm": 0.6930968761444092, "learning_rate": 0.001, "loss": 1.8384, "step": 389144 }, { "epoch": 33.57487922705314, "grad_norm": 0.2999052405357361, "learning_rate": 0.001, "loss": 1.8371, "step": 389200 }, { "epoch": 33.57971014492754, "grad_norm": 2.6434919834136963, "learning_rate": 0.001, "loss": 1.8307, "step": 389256 }, { "epoch": 33.58454106280193, "grad_norm": 26.93202781677246, "learning_rate": 0.001, "loss": 1.8304, "step": 389312 }, { "epoch": 33.589371980676326, "grad_norm": 0.6684861779212952, "learning_rate": 0.001, "loss": 1.8345, "step": 389368 }, { "epoch": 33.594202898550726, "grad_norm": 0.2618001699447632, "learning_rate": 0.001, "loss": 1.8281, "step": 389424 }, { "epoch": 33.59903381642512, "grad_norm": 0.4034615755081177, "learning_rate": 0.001, "loss": 1.8311, "step": 389480 }, { "epoch": 33.60386473429952, "grad_norm": 0.3703325092792511, "learning_rate": 0.001, "loss": 1.8252, "step": 389536 }, { "epoch": 33.608695652173914, "grad_norm": 4.496723175048828, "learning_rate": 0.001, "loss": 1.8259, "step": 389592 }, { "epoch": 33.61352657004831, "grad_norm": 0.3593529462814331, "learning_rate": 0.001, "loss": 1.8205, "step": 389648 }, { "epoch": 33.61835748792271, "grad_norm": 6.112456798553467, "learning_rate": 0.001, "loss": 1.8275, "step": 389704 }, { "epoch": 33.6231884057971, "grad_norm": 0.9142809510231018, "learning_rate": 0.001, "loss": 1.8385, "step": 389760 }, { "epoch": 33.628019323671495, "grad_norm": 2.3053598403930664, "learning_rate": 0.001, "loss": 1.8344, "step": 389816 }, { "epoch": 33.632850241545896, "grad_norm": 7.077793121337891, "learning_rate": 0.001, "loss": 1.8371, "step": 389872 }, { "epoch": 33.63768115942029, "grad_norm": 1.9662487506866455, "learning_rate": 0.001, "loss": 1.8369, "step": 389928 }, { "epoch": 33.64251207729468, "grad_norm": 0.33349180221557617, "learning_rate": 0.001, "loss": 1.8291, "step": 389984 }, { "epoch": 33.64734299516908, "grad_norm": 1.2231402397155762, "learning_rate": 0.001, "loss": 1.8357, "step": 390040 }, { "epoch": 33.65217391304348, "grad_norm": 1.894821286201477, "learning_rate": 0.001, "loss": 1.823, "step": 390096 }, { "epoch": 33.65700483091788, "grad_norm": 0.4181155264377594, "learning_rate": 0.001, "loss": 1.8257, "step": 390152 }, { "epoch": 33.66183574879227, "grad_norm": 0.4621347486972809, "learning_rate": 0.001, "loss": 1.8309, "step": 390208 }, { "epoch": 33.666666666666664, "grad_norm": 1.0278148651123047, "learning_rate": 0.001, "loss": 1.841, "step": 390264 }, { "epoch": 33.671497584541065, "grad_norm": 1.5917452573776245, "learning_rate": 0.001, "loss": 1.8482, "step": 390320 }, { "epoch": 33.67632850241546, "grad_norm": 0.8521987199783325, "learning_rate": 0.001, "loss": 1.85, "step": 390376 }, { "epoch": 33.68115942028985, "grad_norm": 5.376307964324951, "learning_rate": 0.001, "loss": 1.8504, "step": 390432 }, { "epoch": 33.68599033816425, "grad_norm": 0.4661087989807129, "learning_rate": 0.001, "loss": 1.8533, "step": 390488 }, { "epoch": 33.690821256038646, "grad_norm": 1.0833059549331665, "learning_rate": 0.001, "loss": 1.8512, "step": 390544 }, { "epoch": 33.69565217391305, "grad_norm": 0.5495493412017822, "learning_rate": 0.001, "loss": 1.8623, "step": 390600 }, { "epoch": 33.70048309178744, "grad_norm": 1.2597626447677612, "learning_rate": 0.001, "loss": 1.8714, "step": 390656 }, { "epoch": 33.70531400966183, "grad_norm": 0.63435298204422, "learning_rate": 0.001, "loss": 1.8547, "step": 390712 }, { "epoch": 33.710144927536234, "grad_norm": 0.4148454964160919, "learning_rate": 0.001, "loss": 1.8598, "step": 390768 }, { "epoch": 33.71497584541063, "grad_norm": 0.49057793617248535, "learning_rate": 0.001, "loss": 1.8586, "step": 390824 }, { "epoch": 33.71980676328502, "grad_norm": 0.6912051439285278, "learning_rate": 0.001, "loss": 1.8632, "step": 390880 }, { "epoch": 33.72463768115942, "grad_norm": 1.2648190259933472, "learning_rate": 0.001, "loss": 1.8678, "step": 390936 }, { "epoch": 33.729468599033815, "grad_norm": 0.5524211525917053, "learning_rate": 0.001, "loss": 1.8619, "step": 390992 }, { "epoch": 33.734299516908216, "grad_norm": 4.158597946166992, "learning_rate": 0.001, "loss": 1.8532, "step": 391048 }, { "epoch": 33.73913043478261, "grad_norm": 0.39962416887283325, "learning_rate": 0.001, "loss": 1.8541, "step": 391104 }, { "epoch": 33.743961352657, "grad_norm": 0.5502328872680664, "learning_rate": 0.001, "loss": 1.8467, "step": 391160 }, { "epoch": 33.7487922705314, "grad_norm": 0.7458182573318481, "learning_rate": 0.001, "loss": 1.8445, "step": 391216 }, { "epoch": 33.7536231884058, "grad_norm": 1.28138267993927, "learning_rate": 0.001, "loss": 1.8589, "step": 391272 }, { "epoch": 33.75845410628019, "grad_norm": 4.219057559967041, "learning_rate": 0.001, "loss": 1.8695, "step": 391328 }, { "epoch": 33.76328502415459, "grad_norm": 1.0130966901779175, "learning_rate": 0.001, "loss": 1.8705, "step": 391384 }, { "epoch": 33.768115942028984, "grad_norm": 2.883869171142578, "learning_rate": 0.001, "loss": 1.8792, "step": 391440 }, { "epoch": 33.772946859903385, "grad_norm": 1.156396746635437, "learning_rate": 0.001, "loss": 1.8757, "step": 391496 }, { "epoch": 33.77777777777778, "grad_norm": 1.3203887939453125, "learning_rate": 0.001, "loss": 1.8908, "step": 391552 }, { "epoch": 33.78260869565217, "grad_norm": 0.41138896346092224, "learning_rate": 0.001, "loss": 1.8755, "step": 391608 }, { "epoch": 33.78743961352657, "grad_norm": 0.726181149482727, "learning_rate": 0.001, "loss": 1.8689, "step": 391664 }, { "epoch": 33.792270531400966, "grad_norm": 1.0437425374984741, "learning_rate": 0.001, "loss": 1.8625, "step": 391720 }, { "epoch": 33.79710144927536, "grad_norm": 5.763835906982422, "learning_rate": 0.001, "loss": 1.8569, "step": 391776 }, { "epoch": 33.80193236714976, "grad_norm": 1.9013830423355103, "learning_rate": 0.001, "loss": 1.8527, "step": 391832 }, { "epoch": 33.806763285024154, "grad_norm": 2.055738925933838, "learning_rate": 0.001, "loss": 1.8427, "step": 391888 }, { "epoch": 33.81159420289855, "grad_norm": 0.5994840264320374, "learning_rate": 0.001, "loss": 1.8526, "step": 391944 }, { "epoch": 33.81642512077295, "grad_norm": 0.6166769862174988, "learning_rate": 0.001, "loss": 1.8474, "step": 392000 }, { "epoch": 33.82125603864734, "grad_norm": 0.9623236060142517, "learning_rate": 0.001, "loss": 1.841, "step": 392056 }, { "epoch": 33.82608695652174, "grad_norm": 0.48830509185791016, "learning_rate": 0.001, "loss": 1.8457, "step": 392112 }, { "epoch": 33.830917874396135, "grad_norm": 0.34471234679222107, "learning_rate": 0.001, "loss": 1.8409, "step": 392168 }, { "epoch": 33.83574879227053, "grad_norm": 0.6210710406303406, "learning_rate": 0.001, "loss": 1.8361, "step": 392224 }, { "epoch": 33.84057971014493, "grad_norm": 0.3446214199066162, "learning_rate": 0.001, "loss": 1.8329, "step": 392280 }, { "epoch": 33.84541062801932, "grad_norm": 1.2080656290054321, "learning_rate": 0.001, "loss": 1.8382, "step": 392336 }, { "epoch": 33.85024154589372, "grad_norm": 1.0600780248641968, "learning_rate": 0.001, "loss": 1.8455, "step": 392392 }, { "epoch": 33.85507246376812, "grad_norm": 21.053316116333008, "learning_rate": 0.001, "loss": 1.8436, "step": 392448 }, { "epoch": 33.85990338164251, "grad_norm": 0.8896178603172302, "learning_rate": 0.001, "loss": 1.8513, "step": 392504 }, { "epoch": 33.86473429951691, "grad_norm": 2.5448110103607178, "learning_rate": 0.001, "loss": 1.8424, "step": 392560 }, { "epoch": 33.869565217391305, "grad_norm": 0.6181594133377075, "learning_rate": 0.001, "loss": 1.8401, "step": 392616 }, { "epoch": 33.8743961352657, "grad_norm": 3.0872538089752197, "learning_rate": 0.001, "loss": 1.8452, "step": 392672 }, { "epoch": 33.8792270531401, "grad_norm": 0.7827709317207336, "learning_rate": 0.001, "loss": 1.8455, "step": 392728 }, { "epoch": 33.88405797101449, "grad_norm": 1.869963526725769, "learning_rate": 0.001, "loss": 1.8506, "step": 392784 }, { "epoch": 33.888888888888886, "grad_norm": 1.416790246963501, "learning_rate": 0.001, "loss": 1.8524, "step": 392840 }, { "epoch": 33.893719806763286, "grad_norm": 0.6115961074829102, "learning_rate": 0.001, "loss": 1.8407, "step": 392896 }, { "epoch": 33.89855072463768, "grad_norm": 7.0171332359313965, "learning_rate": 0.001, "loss": 1.8338, "step": 392952 }, { "epoch": 33.90338164251208, "grad_norm": 0.6242427229881287, "learning_rate": 0.001, "loss": 1.8485, "step": 393008 }, { "epoch": 33.908212560386474, "grad_norm": 0.42547935247421265, "learning_rate": 0.001, "loss": 1.8516, "step": 393064 }, { "epoch": 33.91304347826087, "grad_norm": 3.104501485824585, "learning_rate": 0.001, "loss": 1.8459, "step": 393120 }, { "epoch": 33.91787439613527, "grad_norm": 0.690721333026886, "learning_rate": 0.001, "loss": 1.8466, "step": 393176 }, { "epoch": 33.92270531400966, "grad_norm": 0.41456112265586853, "learning_rate": 0.001, "loss": 1.8448, "step": 393232 }, { "epoch": 33.927536231884055, "grad_norm": 0.5367662310600281, "learning_rate": 0.001, "loss": 1.8335, "step": 393288 }, { "epoch": 33.932367149758456, "grad_norm": 0.920475423336029, "learning_rate": 0.001, "loss": 1.833, "step": 393344 }, { "epoch": 33.93719806763285, "grad_norm": 0.7340195775032043, "learning_rate": 0.001, "loss": 1.8345, "step": 393400 }, { "epoch": 33.94202898550725, "grad_norm": 0.7892662286758423, "learning_rate": 0.001, "loss": 1.8424, "step": 393456 }, { "epoch": 33.94685990338164, "grad_norm": 0.7239735722541809, "learning_rate": 0.001, "loss": 1.8456, "step": 393512 }, { "epoch": 33.95169082125604, "grad_norm": 0.3607323467731476, "learning_rate": 0.001, "loss": 1.8513, "step": 393568 }, { "epoch": 33.95652173913044, "grad_norm": 2.1029012203216553, "learning_rate": 0.001, "loss": 1.8473, "step": 393624 }, { "epoch": 33.96135265700483, "grad_norm": 1.2454121112823486, "learning_rate": 0.001, "loss": 1.8651, "step": 393680 }, { "epoch": 33.966183574879224, "grad_norm": 0.42451608180999756, "learning_rate": 0.001, "loss": 1.8572, "step": 393736 }, { "epoch": 33.971014492753625, "grad_norm": 1.7361409664154053, "learning_rate": 0.001, "loss": 1.8419, "step": 393792 }, { "epoch": 33.97584541062802, "grad_norm": 0.506693422794342, "learning_rate": 0.001, "loss": 1.8411, "step": 393848 }, { "epoch": 33.98067632850242, "grad_norm": 1.842025876045227, "learning_rate": 0.001, "loss": 1.8599, "step": 393904 }, { "epoch": 33.98550724637681, "grad_norm": 1.9898440837860107, "learning_rate": 0.001, "loss": 1.8546, "step": 393960 }, { "epoch": 33.990338164251206, "grad_norm": 0.5598286986351013, "learning_rate": 0.001, "loss": 1.8541, "step": 394016 }, { "epoch": 33.99516908212561, "grad_norm": 0.3708111047744751, "learning_rate": 0.001, "loss": 1.8439, "step": 394072 }, { "epoch": 34.0, "grad_norm": 0.3796915113925934, "learning_rate": 0.001, "loss": 1.8486, "step": 394128 }, { "epoch": 34.00483091787439, "grad_norm": 0.5497387647628784, "learning_rate": 0.001, "loss": 1.8274, "step": 394184 }, { "epoch": 34.009661835748794, "grad_norm": 1.4706823825836182, "learning_rate": 0.001, "loss": 1.844, "step": 394240 }, { "epoch": 34.01449275362319, "grad_norm": 3.940784215927124, "learning_rate": 0.001, "loss": 1.8437, "step": 394296 }, { "epoch": 34.01932367149758, "grad_norm": 1.4999724626541138, "learning_rate": 0.001, "loss": 1.849, "step": 394352 }, { "epoch": 34.02415458937198, "grad_norm": 1.0261094570159912, "learning_rate": 0.001, "loss": 1.8548, "step": 394408 }, { "epoch": 34.028985507246375, "grad_norm": 3.463257074356079, "learning_rate": 0.001, "loss": 1.8316, "step": 394464 }, { "epoch": 34.033816425120776, "grad_norm": 14.41978645324707, "learning_rate": 0.001, "loss": 1.8307, "step": 394520 }, { "epoch": 34.03864734299517, "grad_norm": 6.772956848144531, "learning_rate": 0.001, "loss": 1.8312, "step": 394576 }, { "epoch": 34.04347826086956, "grad_norm": 1.7045167684555054, "learning_rate": 0.001, "loss": 1.8284, "step": 394632 }, { "epoch": 34.04830917874396, "grad_norm": 0.8915327191352844, "learning_rate": 0.001, "loss": 1.828, "step": 394688 }, { "epoch": 34.05314009661836, "grad_norm": 1.1439093351364136, "learning_rate": 0.001, "loss": 1.822, "step": 394744 }, { "epoch": 34.05797101449275, "grad_norm": 0.9980172514915466, "learning_rate": 0.001, "loss": 1.8313, "step": 394800 }, { "epoch": 34.06280193236715, "grad_norm": 2.4244210720062256, "learning_rate": 0.001, "loss": 1.8144, "step": 394856 }, { "epoch": 34.067632850241544, "grad_norm": 0.42881378531455994, "learning_rate": 0.001, "loss": 1.8195, "step": 394912 }, { "epoch": 34.072463768115945, "grad_norm": 29.04627227783203, "learning_rate": 0.001, "loss": 1.8217, "step": 394968 }, { "epoch": 34.07729468599034, "grad_norm": 2.9468963146209717, "learning_rate": 0.001, "loss": 1.8266, "step": 395024 }, { "epoch": 34.08212560386473, "grad_norm": 1.6556223630905151, "learning_rate": 0.001, "loss": 1.8215, "step": 395080 }, { "epoch": 34.08695652173913, "grad_norm": 0.25938257575035095, "learning_rate": 0.001, "loss": 1.8318, "step": 395136 }, { "epoch": 34.091787439613526, "grad_norm": 0.9088306427001953, "learning_rate": 0.001, "loss": 1.8188, "step": 395192 }, { "epoch": 34.09661835748792, "grad_norm": 4.958169937133789, "learning_rate": 0.001, "loss": 1.8259, "step": 395248 }, { "epoch": 34.10144927536232, "grad_norm": 3.492943525314331, "learning_rate": 0.001, "loss": 1.8239, "step": 395304 }, { "epoch": 34.106280193236714, "grad_norm": 0.99062579870224, "learning_rate": 0.001, "loss": 1.8314, "step": 395360 }, { "epoch": 34.111111111111114, "grad_norm": 1.066786766052246, "learning_rate": 0.001, "loss": 1.8308, "step": 395416 }, { "epoch": 34.11594202898551, "grad_norm": 0.7450012564659119, "learning_rate": 0.001, "loss": 1.8437, "step": 395472 }, { "epoch": 34.1207729468599, "grad_norm": 0.660764217376709, "learning_rate": 0.001, "loss": 1.8198, "step": 395528 }, { "epoch": 34.1256038647343, "grad_norm": 0.36358848214149475, "learning_rate": 0.001, "loss": 1.827, "step": 395584 }, { "epoch": 34.130434782608695, "grad_norm": 2.0411746501922607, "learning_rate": 0.001, "loss": 1.8452, "step": 395640 }, { "epoch": 34.13526570048309, "grad_norm": 1.1847214698791504, "learning_rate": 0.001, "loss": 1.8309, "step": 395696 }, { "epoch": 34.14009661835749, "grad_norm": 0.8884884715080261, "learning_rate": 0.001, "loss": 1.8361, "step": 395752 }, { "epoch": 34.14492753623188, "grad_norm": 0.6034506559371948, "learning_rate": 0.001, "loss": 1.8266, "step": 395808 }, { "epoch": 34.14975845410628, "grad_norm": 0.4040265679359436, "learning_rate": 0.001, "loss": 1.826, "step": 395864 }, { "epoch": 34.15458937198068, "grad_norm": 2.6042895317077637, "learning_rate": 0.001, "loss": 1.8299, "step": 395920 }, { "epoch": 34.15942028985507, "grad_norm": 2.66233229637146, "learning_rate": 0.001, "loss": 1.8172, "step": 395976 }, { "epoch": 34.16425120772947, "grad_norm": 0.8009576201438904, "learning_rate": 0.001, "loss": 1.8126, "step": 396032 }, { "epoch": 34.169082125603865, "grad_norm": 0.8031923770904541, "learning_rate": 0.001, "loss": 1.8174, "step": 396088 }, { "epoch": 34.17391304347826, "grad_norm": 1.4431639909744263, "learning_rate": 0.001, "loss": 1.8202, "step": 396144 }, { "epoch": 34.17874396135266, "grad_norm": 1.0435584783554077, "learning_rate": 0.001, "loss": 1.82, "step": 396200 }, { "epoch": 34.18357487922705, "grad_norm": 5.20131254196167, "learning_rate": 0.001, "loss": 1.8209, "step": 396256 }, { "epoch": 34.18840579710145, "grad_norm": 0.9241687655448914, "learning_rate": 0.001, "loss": 1.8363, "step": 396312 }, { "epoch": 34.193236714975846, "grad_norm": 0.6321746706962585, "learning_rate": 0.001, "loss": 1.8477, "step": 396368 }, { "epoch": 34.19806763285024, "grad_norm": 0.37644436955451965, "learning_rate": 0.001, "loss": 1.8311, "step": 396424 }, { "epoch": 34.20289855072464, "grad_norm": 1.5917465686798096, "learning_rate": 0.001, "loss": 1.8359, "step": 396480 }, { "epoch": 34.207729468599034, "grad_norm": 0.40337133407592773, "learning_rate": 0.001, "loss": 1.8345, "step": 396536 }, { "epoch": 34.21256038647343, "grad_norm": 0.6037850975990295, "learning_rate": 0.001, "loss": 1.822, "step": 396592 }, { "epoch": 34.21739130434783, "grad_norm": 0.4083627164363861, "learning_rate": 0.001, "loss": 1.8325, "step": 396648 }, { "epoch": 34.22222222222222, "grad_norm": 0.30457040667533875, "learning_rate": 0.001, "loss": 1.838, "step": 396704 }, { "epoch": 34.227053140096615, "grad_norm": 8.525131225585938, "learning_rate": 0.001, "loss": 1.8236, "step": 396760 }, { "epoch": 34.231884057971016, "grad_norm": 0.41897645592689514, "learning_rate": 0.001, "loss": 1.8209, "step": 396816 }, { "epoch": 34.23671497584541, "grad_norm": 0.8614577651023865, "learning_rate": 0.001, "loss": 1.8211, "step": 396872 }, { "epoch": 34.24154589371981, "grad_norm": 0.4343147873878479, "learning_rate": 0.001, "loss": 1.8334, "step": 396928 }, { "epoch": 34.2463768115942, "grad_norm": 1.933855414390564, "learning_rate": 0.001, "loss": 1.8225, "step": 396984 }, { "epoch": 34.2512077294686, "grad_norm": 0.4030202329158783, "learning_rate": 0.001, "loss": 1.8275, "step": 397040 }, { "epoch": 34.256038647343, "grad_norm": 0.3022425174713135, "learning_rate": 0.001, "loss": 1.8216, "step": 397096 }, { "epoch": 34.26086956521739, "grad_norm": 3.4483048915863037, "learning_rate": 0.001, "loss": 1.812, "step": 397152 }, { "epoch": 34.265700483091784, "grad_norm": 2.0142550468444824, "learning_rate": 0.001, "loss": 1.8161, "step": 397208 }, { "epoch": 34.270531400966185, "grad_norm": 4.08930778503418, "learning_rate": 0.001, "loss": 1.8331, "step": 397264 }, { "epoch": 34.27536231884058, "grad_norm": 5.345297813415527, "learning_rate": 0.001, "loss": 1.8406, "step": 397320 }, { "epoch": 34.28019323671498, "grad_norm": 1.770892858505249, "learning_rate": 0.001, "loss": 1.8314, "step": 397376 }, { "epoch": 34.28502415458937, "grad_norm": 0.4917718768119812, "learning_rate": 0.001, "loss": 1.8392, "step": 397432 }, { "epoch": 34.289855072463766, "grad_norm": 7.410118103027344, "learning_rate": 0.001, "loss": 1.8363, "step": 397488 }, { "epoch": 34.29468599033817, "grad_norm": 2.2117772102355957, "learning_rate": 0.001, "loss": 1.8418, "step": 397544 }, { "epoch": 34.29951690821256, "grad_norm": 0.564249575138092, "learning_rate": 0.001, "loss": 1.822, "step": 397600 }, { "epoch": 34.30434782608695, "grad_norm": 0.3040977418422699, "learning_rate": 0.001, "loss": 1.8229, "step": 397656 }, { "epoch": 34.309178743961354, "grad_norm": 1.8393222093582153, "learning_rate": 0.001, "loss": 1.82, "step": 397712 }, { "epoch": 34.31400966183575, "grad_norm": 0.8427688479423523, "learning_rate": 0.001, "loss": 1.8358, "step": 397768 }, { "epoch": 34.31884057971015, "grad_norm": 0.3609927296638489, "learning_rate": 0.001, "loss": 1.8482, "step": 397824 }, { "epoch": 34.32367149758454, "grad_norm": 0.47962257266044617, "learning_rate": 0.001, "loss": 1.8468, "step": 397880 }, { "epoch": 34.328502415458935, "grad_norm": 0.5269871950149536, "learning_rate": 0.001, "loss": 1.8397, "step": 397936 }, { "epoch": 34.333333333333336, "grad_norm": 4.408270359039307, "learning_rate": 0.001, "loss": 1.8402, "step": 397992 }, { "epoch": 34.33816425120773, "grad_norm": 1.089072823524475, "learning_rate": 0.001, "loss": 1.8259, "step": 398048 }, { "epoch": 34.34299516908212, "grad_norm": 4.2065606117248535, "learning_rate": 0.001, "loss": 1.8233, "step": 398104 }, { "epoch": 34.34782608695652, "grad_norm": 0.6628605723381042, "learning_rate": 0.001, "loss": 1.8328, "step": 398160 }, { "epoch": 34.35265700483092, "grad_norm": 1.0737597942352295, "learning_rate": 0.001, "loss": 1.8397, "step": 398216 }, { "epoch": 34.35748792270532, "grad_norm": 0.6845812797546387, "learning_rate": 0.001, "loss": 1.8359, "step": 398272 }, { "epoch": 34.36231884057971, "grad_norm": 0.5066124200820923, "learning_rate": 0.001, "loss": 1.823, "step": 398328 }, { "epoch": 34.367149758454104, "grad_norm": 1.1807925701141357, "learning_rate": 0.001, "loss": 1.8236, "step": 398384 }, { "epoch": 34.371980676328505, "grad_norm": 0.2666614055633545, "learning_rate": 0.001, "loss": 1.8318, "step": 398440 }, { "epoch": 34.3768115942029, "grad_norm": 0.846113920211792, "learning_rate": 0.001, "loss": 1.828, "step": 398496 }, { "epoch": 34.38164251207729, "grad_norm": 0.3131345510482788, "learning_rate": 0.001, "loss": 1.8238, "step": 398552 }, { "epoch": 34.38647342995169, "grad_norm": 0.6297203302383423, "learning_rate": 0.001, "loss": 1.8172, "step": 398608 }, { "epoch": 34.391304347826086, "grad_norm": 0.431806743144989, "learning_rate": 0.001, "loss": 1.8238, "step": 398664 }, { "epoch": 34.39613526570048, "grad_norm": 2.288335084915161, "learning_rate": 0.001, "loss": 1.8193, "step": 398720 }, { "epoch": 34.40096618357488, "grad_norm": 0.44937458634376526, "learning_rate": 0.001, "loss": 1.8313, "step": 398776 }, { "epoch": 34.405797101449274, "grad_norm": 0.9151427149772644, "learning_rate": 0.001, "loss": 1.8336, "step": 398832 }, { "epoch": 34.410628019323674, "grad_norm": 0.29244372248649597, "learning_rate": 0.001, "loss": 1.8295, "step": 398888 }, { "epoch": 34.41545893719807, "grad_norm": 3.320540428161621, "learning_rate": 0.001, "loss": 1.8253, "step": 398944 }, { "epoch": 34.42028985507246, "grad_norm": 0.4552459418773651, "learning_rate": 0.001, "loss": 1.827, "step": 399000 }, { "epoch": 34.42512077294686, "grad_norm": 0.31681784987449646, "learning_rate": 0.001, "loss": 1.8244, "step": 399056 }, { "epoch": 34.429951690821255, "grad_norm": 4.7241530418396, "learning_rate": 0.001, "loss": 1.8265, "step": 399112 }, { "epoch": 34.43478260869565, "grad_norm": 0.6703901290893555, "learning_rate": 0.001, "loss": 1.8242, "step": 399168 }, { "epoch": 34.43961352657005, "grad_norm": 1.2681350708007812, "learning_rate": 0.001, "loss": 1.8239, "step": 399224 }, { "epoch": 34.44444444444444, "grad_norm": 0.5211673974990845, "learning_rate": 0.001, "loss": 1.8277, "step": 399280 }, { "epoch": 34.44927536231884, "grad_norm": 0.5410476922988892, "learning_rate": 0.001, "loss": 1.8207, "step": 399336 }, { "epoch": 34.45410628019324, "grad_norm": 0.5327828526496887, "learning_rate": 0.001, "loss": 1.8147, "step": 399392 }, { "epoch": 34.45893719806763, "grad_norm": 0.6720408797264099, "learning_rate": 0.001, "loss": 1.8217, "step": 399448 }, { "epoch": 34.46376811594203, "grad_norm": 3.904264450073242, "learning_rate": 0.001, "loss": 1.8193, "step": 399504 }, { "epoch": 34.468599033816425, "grad_norm": 0.4814980924129486, "learning_rate": 0.001, "loss": 1.8255, "step": 399560 }, { "epoch": 34.47342995169082, "grad_norm": 0.5083090662956238, "learning_rate": 0.001, "loss": 1.8257, "step": 399616 }, { "epoch": 34.47826086956522, "grad_norm": 0.7455845475196838, "learning_rate": 0.001, "loss": 1.8236, "step": 399672 }, { "epoch": 34.48309178743961, "grad_norm": 1.604215145111084, "learning_rate": 0.001, "loss": 1.8228, "step": 399728 }, { "epoch": 34.48792270531401, "grad_norm": 0.41948601603507996, "learning_rate": 0.001, "loss": 1.8374, "step": 399784 }, { "epoch": 34.492753623188406, "grad_norm": 2.4225456714630127, "learning_rate": 0.001, "loss": 1.8426, "step": 399840 }, { "epoch": 34.4975845410628, "grad_norm": 0.3565793037414551, "learning_rate": 0.001, "loss": 1.8534, "step": 399896 }, { "epoch": 34.5024154589372, "grad_norm": 0.9869177937507629, "learning_rate": 0.001, "loss": 1.8407, "step": 399952 }, { "epoch": 34.507246376811594, "grad_norm": 1.5822570323944092, "learning_rate": 0.001, "loss": 1.8286, "step": 400008 }, { "epoch": 34.51207729468599, "grad_norm": 2.6658272743225098, "learning_rate": 0.001, "loss": 1.8381, "step": 400064 }, { "epoch": 34.51690821256039, "grad_norm": 8.518289566040039, "learning_rate": 0.001, "loss": 1.8413, "step": 400120 }, { "epoch": 34.52173913043478, "grad_norm": 4.847125053405762, "learning_rate": 0.001, "loss": 1.8369, "step": 400176 }, { "epoch": 34.52657004830918, "grad_norm": 0.38794419169425964, "learning_rate": 0.001, "loss": 1.8374, "step": 400232 }, { "epoch": 34.531400966183575, "grad_norm": 0.48767393827438354, "learning_rate": 0.001, "loss": 1.8364, "step": 400288 }, { "epoch": 34.53623188405797, "grad_norm": 0.4197387099266052, "learning_rate": 0.001, "loss": 1.8362, "step": 400344 }, { "epoch": 34.54106280193237, "grad_norm": 0.6299371719360352, "learning_rate": 0.001, "loss": 1.8275, "step": 400400 }, { "epoch": 34.54589371980676, "grad_norm": 1.0691109895706177, "learning_rate": 0.001, "loss": 1.8365, "step": 400456 }, { "epoch": 34.55072463768116, "grad_norm": 0.3436374068260193, "learning_rate": 0.001, "loss": 1.8301, "step": 400512 }, { "epoch": 34.55555555555556, "grad_norm": 0.3776346445083618, "learning_rate": 0.001, "loss": 1.8336, "step": 400568 }, { "epoch": 34.56038647342995, "grad_norm": 0.5395188927650452, "learning_rate": 0.001, "loss": 1.8242, "step": 400624 }, { "epoch": 34.56521739130435, "grad_norm": 0.4652628004550934, "learning_rate": 0.001, "loss": 1.8201, "step": 400680 }, { "epoch": 34.570048309178745, "grad_norm": 0.39203494787216187, "learning_rate": 0.001, "loss": 1.8187, "step": 400736 }, { "epoch": 34.57487922705314, "grad_norm": 0.46616658568382263, "learning_rate": 0.001, "loss": 1.8189, "step": 400792 }, { "epoch": 34.57971014492754, "grad_norm": 0.37062743306159973, "learning_rate": 0.001, "loss": 1.816, "step": 400848 }, { "epoch": 34.58454106280193, "grad_norm": 0.38949325680732727, "learning_rate": 0.001, "loss": 1.817, "step": 400904 }, { "epoch": 34.589371980676326, "grad_norm": 1.586822509765625, "learning_rate": 0.001, "loss": 1.8235, "step": 400960 }, { "epoch": 34.594202898550726, "grad_norm": 3.8605692386627197, "learning_rate": 0.001, "loss": 1.8201, "step": 401016 }, { "epoch": 34.59903381642512, "grad_norm": 0.6867456436157227, "learning_rate": 0.001, "loss": 1.8332, "step": 401072 }, { "epoch": 34.60386473429952, "grad_norm": 0.4530758559703827, "learning_rate": 0.001, "loss": 1.8377, "step": 401128 }, { "epoch": 34.608695652173914, "grad_norm": 1.516852855682373, "learning_rate": 0.001, "loss": 1.8313, "step": 401184 }, { "epoch": 34.61352657004831, "grad_norm": 0.44664517045021057, "learning_rate": 0.001, "loss": 1.8469, "step": 401240 }, { "epoch": 34.61835748792271, "grad_norm": 0.8147264719009399, "learning_rate": 0.001, "loss": 1.8451, "step": 401296 }, { "epoch": 34.6231884057971, "grad_norm": 1.0992869138717651, "learning_rate": 0.001, "loss": 1.8379, "step": 401352 }, { "epoch": 34.628019323671495, "grad_norm": 0.43195173144340515, "learning_rate": 0.001, "loss": 1.8423, "step": 401408 }, { "epoch": 34.632850241545896, "grad_norm": 0.3801847994327545, "learning_rate": 0.001, "loss": 1.8362, "step": 401464 }, { "epoch": 34.63768115942029, "grad_norm": 0.40044158697128296, "learning_rate": 0.001, "loss": 1.8307, "step": 401520 }, { "epoch": 34.64251207729468, "grad_norm": 0.27206578850746155, "learning_rate": 0.001, "loss": 1.8228, "step": 401576 }, { "epoch": 34.64734299516908, "grad_norm": 0.4572310745716095, "learning_rate": 0.001, "loss": 1.8287, "step": 401632 }, { "epoch": 34.65217391304348, "grad_norm": 0.5777914524078369, "learning_rate": 0.001, "loss": 1.8295, "step": 401688 }, { "epoch": 34.65700483091788, "grad_norm": 1.7568005323410034, "learning_rate": 0.001, "loss": 1.8305, "step": 401744 }, { "epoch": 34.66183574879227, "grad_norm": 0.4866204559803009, "learning_rate": 0.001, "loss": 1.8435, "step": 401800 }, { "epoch": 34.666666666666664, "grad_norm": 1.0231761932373047, "learning_rate": 0.001, "loss": 1.8542, "step": 401856 }, { "epoch": 34.671497584541065, "grad_norm": 0.41573628783226013, "learning_rate": 0.001, "loss": 1.8518, "step": 401912 }, { "epoch": 34.67632850241546, "grad_norm": 0.4632425010204315, "learning_rate": 0.001, "loss": 1.8495, "step": 401968 }, { "epoch": 34.68115942028985, "grad_norm": 0.4385789632797241, "learning_rate": 0.001, "loss": 1.8751, "step": 402024 }, { "epoch": 34.68599033816425, "grad_norm": 2.6744258403778076, "learning_rate": 0.001, "loss": 1.8463, "step": 402080 }, { "epoch": 34.690821256038646, "grad_norm": 0.3077704608440399, "learning_rate": 0.001, "loss": 1.8467, "step": 402136 }, { "epoch": 34.69565217391305, "grad_norm": 2.286090135574341, "learning_rate": 0.001, "loss": 1.8441, "step": 402192 }, { "epoch": 34.70048309178744, "grad_norm": 0.2735322415828705, "learning_rate": 0.001, "loss": 1.8442, "step": 402248 }, { "epoch": 34.70531400966183, "grad_norm": 1.4314903020858765, "learning_rate": 0.001, "loss": 1.8452, "step": 402304 }, { "epoch": 34.710144927536234, "grad_norm": 1.2184492349624634, "learning_rate": 0.001, "loss": 1.8463, "step": 402360 }, { "epoch": 34.71497584541063, "grad_norm": 2.7466936111450195, "learning_rate": 0.001, "loss": 1.8408, "step": 402416 }, { "epoch": 34.71980676328502, "grad_norm": 0.9126217365264893, "learning_rate": 0.001, "loss": 1.8502, "step": 402472 }, { "epoch": 34.72463768115942, "grad_norm": 1.0532206296920776, "learning_rate": 0.001, "loss": 1.8441, "step": 402528 }, { "epoch": 34.729468599033815, "grad_norm": 0.8165702223777771, "learning_rate": 0.001, "loss": 1.8523, "step": 402584 }, { "epoch": 34.734299516908216, "grad_norm": 0.8159556984901428, "learning_rate": 0.001, "loss": 1.8444, "step": 402640 }, { "epoch": 34.73913043478261, "grad_norm": 3.643235206604004, "learning_rate": 0.001, "loss": 1.8505, "step": 402696 }, { "epoch": 34.743961352657, "grad_norm": 0.4246162176132202, "learning_rate": 0.001, "loss": 1.8433, "step": 402752 }, { "epoch": 34.7487922705314, "grad_norm": 0.5304322242736816, "learning_rate": 0.001, "loss": 1.8352, "step": 402808 }, { "epoch": 34.7536231884058, "grad_norm": 0.595513641834259, "learning_rate": 0.001, "loss": 1.8332, "step": 402864 }, { "epoch": 34.75845410628019, "grad_norm": 0.5989309549331665, "learning_rate": 0.001, "loss": 1.8308, "step": 402920 }, { "epoch": 34.76328502415459, "grad_norm": 1.359060287475586, "learning_rate": 0.001, "loss": 1.839, "step": 402976 }, { "epoch": 34.768115942028984, "grad_norm": 8.58255672454834, "learning_rate": 0.001, "loss": 1.858, "step": 403032 }, { "epoch": 34.772946859903385, "grad_norm": 0.6457918882369995, "learning_rate": 0.001, "loss": 1.8513, "step": 403088 }, { "epoch": 34.77777777777778, "grad_norm": 1.3997337818145752, "learning_rate": 0.001, "loss": 1.8458, "step": 403144 }, { "epoch": 34.78260869565217, "grad_norm": 0.3774053752422333, "learning_rate": 0.001, "loss": 1.8415, "step": 403200 }, { "epoch": 34.78743961352657, "grad_norm": 1.7149569988250732, "learning_rate": 0.001, "loss": 1.8592, "step": 403256 }, { "epoch": 34.792270531400966, "grad_norm": 1.4429880380630493, "learning_rate": 0.001, "loss": 1.8454, "step": 403312 }, { "epoch": 34.79710144927536, "grad_norm": 1.2197086811065674, "learning_rate": 0.001, "loss": 1.8533, "step": 403368 }, { "epoch": 34.80193236714976, "grad_norm": 0.6489006280899048, "learning_rate": 0.001, "loss": 1.8459, "step": 403424 }, { "epoch": 34.806763285024154, "grad_norm": 0.42965269088745117, "learning_rate": 0.001, "loss": 1.8437, "step": 403480 }, { "epoch": 34.81159420289855, "grad_norm": 1.2669460773468018, "learning_rate": 0.001, "loss": 1.8423, "step": 403536 }, { "epoch": 34.81642512077295, "grad_norm": 22.795419692993164, "learning_rate": 0.001, "loss": 1.8592, "step": 403592 }, { "epoch": 34.82125603864734, "grad_norm": 7.510748386383057, "learning_rate": 0.001, "loss": 1.8539, "step": 403648 }, { "epoch": 34.82608695652174, "grad_norm": 0.6432771682739258, "learning_rate": 0.001, "loss": 1.8597, "step": 403704 }, { "epoch": 34.830917874396135, "grad_norm": 1.1688041687011719, "learning_rate": 0.001, "loss": 1.8607, "step": 403760 }, { "epoch": 34.83574879227053, "grad_norm": 0.5625596046447754, "learning_rate": 0.001, "loss": 1.8691, "step": 403816 }, { "epoch": 34.84057971014493, "grad_norm": 2.3385555744171143, "learning_rate": 0.001, "loss": 1.8576, "step": 403872 }, { "epoch": 34.84541062801932, "grad_norm": 1.0591169595718384, "learning_rate": 0.001, "loss": 1.8595, "step": 403928 }, { "epoch": 34.85024154589372, "grad_norm": 0.49466386437416077, "learning_rate": 0.001, "loss": 1.8548, "step": 403984 }, { "epoch": 34.85507246376812, "grad_norm": 0.7791069746017456, "learning_rate": 0.001, "loss": 1.8521, "step": 404040 }, { "epoch": 34.85990338164251, "grad_norm": 0.5880547761917114, "learning_rate": 0.001, "loss": 1.8517, "step": 404096 }, { "epoch": 34.86473429951691, "grad_norm": 0.2666136622428894, "learning_rate": 0.001, "loss": 1.8424, "step": 404152 }, { "epoch": 34.869565217391305, "grad_norm": 0.43976566195487976, "learning_rate": 0.001, "loss": 1.8359, "step": 404208 }, { "epoch": 34.8743961352657, "grad_norm": 0.33451250195503235, "learning_rate": 0.001, "loss": 1.8311, "step": 404264 }, { "epoch": 34.8792270531401, "grad_norm": 0.4396872818470001, "learning_rate": 0.001, "loss": 1.8358, "step": 404320 }, { "epoch": 34.88405797101449, "grad_norm": 0.2598714232444763, "learning_rate": 0.001, "loss": 1.8439, "step": 404376 }, { "epoch": 34.888888888888886, "grad_norm": 0.3735817074775696, "learning_rate": 0.001, "loss": 1.8348, "step": 404432 }, { "epoch": 34.893719806763286, "grad_norm": 0.7312192320823669, "learning_rate": 0.001, "loss": 1.8396, "step": 404488 }, { "epoch": 34.89855072463768, "grad_norm": 0.2945324778556824, "learning_rate": 0.001, "loss": 1.8401, "step": 404544 }, { "epoch": 34.90338164251208, "grad_norm": 2.8810641765594482, "learning_rate": 0.001, "loss": 1.8424, "step": 404600 }, { "epoch": 34.908212560386474, "grad_norm": 0.4389367699623108, "learning_rate": 0.001, "loss": 1.8394, "step": 404656 }, { "epoch": 34.91304347826087, "grad_norm": 0.6118005514144897, "learning_rate": 0.001, "loss": 1.8377, "step": 404712 }, { "epoch": 34.91787439613527, "grad_norm": 0.6925011873245239, "learning_rate": 0.001, "loss": 1.843, "step": 404768 }, { "epoch": 34.92270531400966, "grad_norm": 0.44683128595352173, "learning_rate": 0.001, "loss": 1.833, "step": 404824 }, { "epoch": 34.927536231884055, "grad_norm": 1.1117666959762573, "learning_rate": 0.001, "loss": 1.8325, "step": 404880 }, { "epoch": 34.932367149758456, "grad_norm": 0.4296211004257202, "learning_rate": 0.001, "loss": 1.839, "step": 404936 }, { "epoch": 34.93719806763285, "grad_norm": 0.4971621334552765, "learning_rate": 0.001, "loss": 1.8441, "step": 404992 }, { "epoch": 34.94202898550725, "grad_norm": 0.610191822052002, "learning_rate": 0.001, "loss": 1.8528, "step": 405048 }, { "epoch": 34.94685990338164, "grad_norm": 0.4546540081501007, "learning_rate": 0.001, "loss": 1.8546, "step": 405104 }, { "epoch": 34.95169082125604, "grad_norm": 0.30755624175071716, "learning_rate": 0.001, "loss": 1.8432, "step": 405160 }, { "epoch": 34.95652173913044, "grad_norm": 0.7772684097290039, "learning_rate": 0.001, "loss": 1.8427, "step": 405216 }, { "epoch": 34.96135265700483, "grad_norm": 0.953845739364624, "learning_rate": 0.001, "loss": 1.8354, "step": 405272 }, { "epoch": 34.966183574879224, "grad_norm": 0.40057969093322754, "learning_rate": 0.001, "loss": 1.8403, "step": 405328 }, { "epoch": 34.971014492753625, "grad_norm": 0.8585100173950195, "learning_rate": 0.001, "loss": 1.845, "step": 405384 }, { "epoch": 34.97584541062802, "grad_norm": 0.434465229511261, "learning_rate": 0.001, "loss": 1.8456, "step": 405440 }, { "epoch": 34.98067632850242, "grad_norm": 0.3140609562397003, "learning_rate": 0.001, "loss": 1.8383, "step": 405496 }, { "epoch": 34.98550724637681, "grad_norm": 0.3205801844596863, "learning_rate": 0.001, "loss": 1.8374, "step": 405552 }, { "epoch": 34.990338164251206, "grad_norm": 0.9985800981521606, "learning_rate": 0.001, "loss": 1.8306, "step": 405608 }, { "epoch": 34.99516908212561, "grad_norm": 16.679222106933594, "learning_rate": 0.001, "loss": 1.8398, "step": 405664 }, { "epoch": 35.0, "grad_norm": 0.38297221064567566, "learning_rate": 0.001, "loss": 1.8424, "step": 405720 }, { "epoch": 35.00483091787439, "grad_norm": 1.0603365898132324, "learning_rate": 0.001, "loss": 1.8053, "step": 405776 }, { "epoch": 35.009661835748794, "grad_norm": 12.936245918273926, "learning_rate": 0.001, "loss": 1.7918, "step": 405832 }, { "epoch": 35.01449275362319, "grad_norm": 1.0991921424865723, "learning_rate": 0.001, "loss": 1.7962, "step": 405888 }, { "epoch": 35.01932367149758, "grad_norm": 0.4593254625797272, "learning_rate": 0.001, "loss": 1.7941, "step": 405944 }, { "epoch": 35.02415458937198, "grad_norm": 5.74153995513916, "learning_rate": 0.001, "loss": 1.8026, "step": 406000 }, { "epoch": 35.028985507246375, "grad_norm": 0.45230138301849365, "learning_rate": 0.001, "loss": 1.798, "step": 406056 }, { "epoch": 35.033816425120776, "grad_norm": 0.3607964515686035, "learning_rate": 0.001, "loss": 1.7958, "step": 406112 }, { "epoch": 35.03864734299517, "grad_norm": 0.6346094012260437, "learning_rate": 0.001, "loss": 1.7974, "step": 406168 }, { "epoch": 35.04347826086956, "grad_norm": 0.2772428095340729, "learning_rate": 0.001, "loss": 1.8009, "step": 406224 }, { "epoch": 35.04830917874396, "grad_norm": 0.4213358461856842, "learning_rate": 0.001, "loss": 1.8023, "step": 406280 }, { "epoch": 35.05314009661836, "grad_norm": 2.2388756275177, "learning_rate": 0.001, "loss": 1.8003, "step": 406336 }, { "epoch": 35.05797101449275, "grad_norm": 1.3021376132965088, "learning_rate": 0.001, "loss": 1.8052, "step": 406392 }, { "epoch": 35.06280193236715, "grad_norm": 0.2797827124595642, "learning_rate": 0.001, "loss": 1.8051, "step": 406448 }, { "epoch": 35.067632850241544, "grad_norm": 0.5487902164459229, "learning_rate": 0.001, "loss": 1.8021, "step": 406504 }, { "epoch": 35.072463768115945, "grad_norm": 1.7535905838012695, "learning_rate": 0.001, "loss": 1.8083, "step": 406560 }, { "epoch": 35.07729468599034, "grad_norm": 0.37005677819252014, "learning_rate": 0.001, "loss": 1.8201, "step": 406616 }, { "epoch": 35.08212560386473, "grad_norm": 4.480099678039551, "learning_rate": 0.001, "loss": 1.8152, "step": 406672 }, { "epoch": 35.08695652173913, "grad_norm": 0.5715920329093933, "learning_rate": 0.001, "loss": 1.8173, "step": 406728 }, { "epoch": 35.091787439613526, "grad_norm": 0.44214823842048645, "learning_rate": 0.001, "loss": 1.8051, "step": 406784 }, { "epoch": 35.09661835748792, "grad_norm": 0.7164579629898071, "learning_rate": 0.001, "loss": 1.8042, "step": 406840 }, { "epoch": 35.10144927536232, "grad_norm": 0.6633809804916382, "learning_rate": 0.001, "loss": 1.8178, "step": 406896 }, { "epoch": 35.106280193236714, "grad_norm": 1.8472250699996948, "learning_rate": 0.001, "loss": 1.8172, "step": 406952 }, { "epoch": 35.111111111111114, "grad_norm": 0.332199364900589, "learning_rate": 0.001, "loss": 1.8154, "step": 407008 }, { "epoch": 35.11594202898551, "grad_norm": 0.3092857003211975, "learning_rate": 0.001, "loss": 1.8074, "step": 407064 }, { "epoch": 35.1207729468599, "grad_norm": 0.33781102299690247, "learning_rate": 0.001, "loss": 1.8074, "step": 407120 }, { "epoch": 35.1256038647343, "grad_norm": 0.8151071071624756, "learning_rate": 0.001, "loss": 1.8053, "step": 407176 }, { "epoch": 35.130434782608695, "grad_norm": 2.928256034851074, "learning_rate": 0.001, "loss": 1.8043, "step": 407232 }, { "epoch": 35.13526570048309, "grad_norm": 1.4900745153427124, "learning_rate": 0.001, "loss": 1.7923, "step": 407288 }, { "epoch": 35.14009661835749, "grad_norm": 0.8271536827087402, "learning_rate": 0.001, "loss": 1.8047, "step": 407344 }, { "epoch": 35.14492753623188, "grad_norm": 0.47368699312210083, "learning_rate": 0.001, "loss": 1.81, "step": 407400 }, { "epoch": 35.14975845410628, "grad_norm": 0.30208927392959595, "learning_rate": 0.001, "loss": 1.8137, "step": 407456 }, { "epoch": 35.15458937198068, "grad_norm": 1.0786821842193604, "learning_rate": 0.001, "loss": 1.8116, "step": 407512 }, { "epoch": 35.15942028985507, "grad_norm": 0.400648832321167, "learning_rate": 0.001, "loss": 1.8167, "step": 407568 }, { "epoch": 35.16425120772947, "grad_norm": 0.31820130348205566, "learning_rate": 0.001, "loss": 1.8198, "step": 407624 }, { "epoch": 35.169082125603865, "grad_norm": 0.5931256413459778, "learning_rate": 0.001, "loss": 1.8027, "step": 407680 }, { "epoch": 35.17391304347826, "grad_norm": 0.6282327175140381, "learning_rate": 0.001, "loss": 1.8139, "step": 407736 }, { "epoch": 35.17874396135266, "grad_norm": 2.359571933746338, "learning_rate": 0.001, "loss": 1.8079, "step": 407792 }, { "epoch": 35.18357487922705, "grad_norm": 0.4110291004180908, "learning_rate": 0.001, "loss": 1.8033, "step": 407848 }, { "epoch": 35.18840579710145, "grad_norm": 1.5298889875411987, "learning_rate": 0.001, "loss": 1.8232, "step": 407904 }, { "epoch": 35.193236714975846, "grad_norm": 0.580429196357727, "learning_rate": 0.001, "loss": 1.8258, "step": 407960 }, { "epoch": 35.19806763285024, "grad_norm": 0.36920854449272156, "learning_rate": 0.001, "loss": 1.8187, "step": 408016 }, { "epoch": 35.20289855072464, "grad_norm": 0.9936524033546448, "learning_rate": 0.001, "loss": 1.81, "step": 408072 }, { "epoch": 35.207729468599034, "grad_norm": 2.220777750015259, "learning_rate": 0.001, "loss": 1.8121, "step": 408128 }, { "epoch": 35.21256038647343, "grad_norm": 2.792311906814575, "learning_rate": 0.001, "loss": 1.8116, "step": 408184 }, { "epoch": 35.21739130434783, "grad_norm": 0.43975090980529785, "learning_rate": 0.001, "loss": 1.8143, "step": 408240 }, { "epoch": 35.22222222222222, "grad_norm": 0.6593931913375854, "learning_rate": 0.001, "loss": 1.8291, "step": 408296 }, { "epoch": 35.227053140096615, "grad_norm": 1.4133843183517456, "learning_rate": 0.001, "loss": 1.8267, "step": 408352 }, { "epoch": 35.231884057971016, "grad_norm": 1.3121495246887207, "learning_rate": 0.001, "loss": 1.8179, "step": 408408 }, { "epoch": 35.23671497584541, "grad_norm": 0.6508413553237915, "learning_rate": 0.001, "loss": 1.8164, "step": 408464 }, { "epoch": 35.24154589371981, "grad_norm": 0.4106164872646332, "learning_rate": 0.001, "loss": 1.8367, "step": 408520 }, { "epoch": 35.2463768115942, "grad_norm": 0.35245218873023987, "learning_rate": 0.001, "loss": 1.8295, "step": 408576 }, { "epoch": 35.2512077294686, "grad_norm": 0.5290487408638, "learning_rate": 0.001, "loss": 1.8204, "step": 408632 }, { "epoch": 35.256038647343, "grad_norm": 0.29576945304870605, "learning_rate": 0.001, "loss": 1.8223, "step": 408688 }, { "epoch": 35.26086956521739, "grad_norm": 0.35738885402679443, "learning_rate": 0.001, "loss": 1.8189, "step": 408744 }, { "epoch": 35.265700483091784, "grad_norm": 1.0058897733688354, "learning_rate": 0.001, "loss": 1.8183, "step": 408800 }, { "epoch": 35.270531400966185, "grad_norm": 0.2509610950946808, "learning_rate": 0.001, "loss": 1.8228, "step": 408856 }, { "epoch": 35.27536231884058, "grad_norm": 2.492396116256714, "learning_rate": 0.001, "loss": 1.8164, "step": 408912 }, { "epoch": 35.28019323671498, "grad_norm": 0.8020156025886536, "learning_rate": 0.001, "loss": 1.8186, "step": 408968 }, { "epoch": 35.28502415458937, "grad_norm": 0.3237086832523346, "learning_rate": 0.001, "loss": 1.816, "step": 409024 }, { "epoch": 35.289855072463766, "grad_norm": 0.9809046983718872, "learning_rate": 0.001, "loss": 1.8228, "step": 409080 }, { "epoch": 35.29468599033817, "grad_norm": 0.3647131621837616, "learning_rate": 0.001, "loss": 1.8214, "step": 409136 }, { "epoch": 35.29951690821256, "grad_norm": 0.6489964127540588, "learning_rate": 0.001, "loss": 1.8371, "step": 409192 }, { "epoch": 35.30434782608695, "grad_norm": 1.1161668300628662, "learning_rate": 0.001, "loss": 1.8323, "step": 409248 }, { "epoch": 35.309178743961354, "grad_norm": 1.0508959293365479, "learning_rate": 0.001, "loss": 1.8233, "step": 409304 }, { "epoch": 35.31400966183575, "grad_norm": 0.44561782479286194, "learning_rate": 0.001, "loss": 1.8174, "step": 409360 }, { "epoch": 35.31884057971015, "grad_norm": 0.4172346591949463, "learning_rate": 0.001, "loss": 1.8137, "step": 409416 }, { "epoch": 35.32367149758454, "grad_norm": 0.30724823474884033, "learning_rate": 0.001, "loss": 1.8201, "step": 409472 }, { "epoch": 35.328502415458935, "grad_norm": 0.7087112665176392, "learning_rate": 0.001, "loss": 1.8252, "step": 409528 }, { "epoch": 35.333333333333336, "grad_norm": 1.3969241380691528, "learning_rate": 0.001, "loss": 1.8205, "step": 409584 }, { "epoch": 35.33816425120773, "grad_norm": 0.3418963551521301, "learning_rate": 0.001, "loss": 1.8158, "step": 409640 }, { "epoch": 35.34299516908212, "grad_norm": 0.23668356239795685, "learning_rate": 0.001, "loss": 1.8093, "step": 409696 }, { "epoch": 35.34782608695652, "grad_norm": 0.2961994409561157, "learning_rate": 0.001, "loss": 1.8148, "step": 409752 }, { "epoch": 35.35265700483092, "grad_norm": 0.2485738843679428, "learning_rate": 0.001, "loss": 1.8182, "step": 409808 }, { "epoch": 35.35748792270532, "grad_norm": 0.2787158489227295, "learning_rate": 0.001, "loss": 1.8079, "step": 409864 }, { "epoch": 35.36231884057971, "grad_norm": 0.5110640525817871, "learning_rate": 0.001, "loss": 1.8052, "step": 409920 }, { "epoch": 35.367149758454104, "grad_norm": 0.48442232608795166, "learning_rate": 0.001, "loss": 1.8079, "step": 409976 }, { "epoch": 35.371980676328505, "grad_norm": 2.297004461288452, "learning_rate": 0.001, "loss": 1.8032, "step": 410032 }, { "epoch": 35.3768115942029, "grad_norm": 1.0248701572418213, "learning_rate": 0.001, "loss": 1.8065, "step": 410088 }, { "epoch": 35.38164251207729, "grad_norm": 0.42920953035354614, "learning_rate": 0.001, "loss": 1.8102, "step": 410144 }, { "epoch": 35.38647342995169, "grad_norm": 0.2690558433532715, "learning_rate": 0.001, "loss": 1.8062, "step": 410200 }, { "epoch": 35.391304347826086, "grad_norm": 0.5248808860778809, "learning_rate": 0.001, "loss": 1.8044, "step": 410256 }, { "epoch": 35.39613526570048, "grad_norm": 0.25228723883628845, "learning_rate": 0.001, "loss": 1.8008, "step": 410312 }, { "epoch": 35.40096618357488, "grad_norm": 0.36791178584098816, "learning_rate": 0.001, "loss": 1.7944, "step": 410368 }, { "epoch": 35.405797101449274, "grad_norm": 0.2855655252933502, "learning_rate": 0.001, "loss": 1.8005, "step": 410424 }, { "epoch": 35.410628019323674, "grad_norm": 0.30936798453330994, "learning_rate": 0.001, "loss": 1.795, "step": 410480 }, { "epoch": 35.41545893719807, "grad_norm": 0.2510398030281067, "learning_rate": 0.001, "loss": 1.8021, "step": 410536 }, { "epoch": 35.42028985507246, "grad_norm": 0.3093124032020569, "learning_rate": 0.001, "loss": 1.7968, "step": 410592 }, { "epoch": 35.42512077294686, "grad_norm": 0.2704859972000122, "learning_rate": 0.001, "loss": 1.8087, "step": 410648 }, { "epoch": 35.429951690821255, "grad_norm": 0.28626394271850586, "learning_rate": 0.001, "loss": 1.8113, "step": 410704 }, { "epoch": 35.43478260869565, "grad_norm": 0.31669795513153076, "learning_rate": 0.001, "loss": 1.8002, "step": 410760 }, { "epoch": 35.43961352657005, "grad_norm": 0.357285737991333, "learning_rate": 0.001, "loss": 1.8047, "step": 410816 }, { "epoch": 35.44444444444444, "grad_norm": 0.4459398686885834, "learning_rate": 0.001, "loss": 1.8027, "step": 410872 }, { "epoch": 35.44927536231884, "grad_norm": 0.4644375443458557, "learning_rate": 0.001, "loss": 1.8097, "step": 410928 }, { "epoch": 35.45410628019324, "grad_norm": 0.3312598764896393, "learning_rate": 0.001, "loss": 1.8105, "step": 410984 }, { "epoch": 35.45893719806763, "grad_norm": 0.3333911895751953, "learning_rate": 0.001, "loss": 1.8051, "step": 411040 }, { "epoch": 35.46376811594203, "grad_norm": 0.42953598499298096, "learning_rate": 0.001, "loss": 1.8084, "step": 411096 }, { "epoch": 35.468599033816425, "grad_norm": 0.2958180010318756, "learning_rate": 0.001, "loss": 1.8015, "step": 411152 }, { "epoch": 35.47342995169082, "grad_norm": 0.5956571698188782, "learning_rate": 0.001, "loss": 1.8109, "step": 411208 }, { "epoch": 35.47826086956522, "grad_norm": 4.026671886444092, "learning_rate": 0.001, "loss": 1.8057, "step": 411264 }, { "epoch": 35.48309178743961, "grad_norm": 0.2890520989894867, "learning_rate": 0.001, "loss": 1.8123, "step": 411320 }, { "epoch": 35.48792270531401, "grad_norm": 0.34171226620674133, "learning_rate": 0.001, "loss": 1.8047, "step": 411376 }, { "epoch": 35.492753623188406, "grad_norm": 0.38530147075653076, "learning_rate": 0.001, "loss": 1.8095, "step": 411432 }, { "epoch": 35.4975845410628, "grad_norm": 0.2500717043876648, "learning_rate": 0.001, "loss": 1.8121, "step": 411488 }, { "epoch": 35.5024154589372, "grad_norm": 0.34365856647491455, "learning_rate": 0.001, "loss": 1.8194, "step": 411544 }, { "epoch": 35.507246376811594, "grad_norm": 0.609130322933197, "learning_rate": 0.001, "loss": 1.8148, "step": 411600 }, { "epoch": 35.51207729468599, "grad_norm": 0.6082576513290405, "learning_rate": 0.001, "loss": 1.8183, "step": 411656 }, { "epoch": 35.51690821256039, "grad_norm": 0.23690636456012726, "learning_rate": 0.001, "loss": 1.8133, "step": 411712 }, { "epoch": 35.52173913043478, "grad_norm": 0.2796630859375, "learning_rate": 0.001, "loss": 1.8126, "step": 411768 }, { "epoch": 35.52657004830918, "grad_norm": 0.4137280583381653, "learning_rate": 0.001, "loss": 1.8143, "step": 411824 }, { "epoch": 35.531400966183575, "grad_norm": 0.30777817964553833, "learning_rate": 0.001, "loss": 1.8017, "step": 411880 }, { "epoch": 35.53623188405797, "grad_norm": 0.24710530042648315, "learning_rate": 0.001, "loss": 1.8078, "step": 411936 }, { "epoch": 35.54106280193237, "grad_norm": 0.2967704236507416, "learning_rate": 0.001, "loss": 1.8054, "step": 411992 }, { "epoch": 35.54589371980676, "grad_norm": 0.7357916831970215, "learning_rate": 0.001, "loss": 1.7985, "step": 412048 }, { "epoch": 35.55072463768116, "grad_norm": 0.5564244389533997, "learning_rate": 0.001, "loss": 1.8025, "step": 412104 }, { "epoch": 35.55555555555556, "grad_norm": 0.3299926817417145, "learning_rate": 0.001, "loss": 1.8251, "step": 412160 }, { "epoch": 35.56038647342995, "grad_norm": 0.2791455388069153, "learning_rate": 0.001, "loss": 1.862, "step": 412216 }, { "epoch": 35.56521739130435, "grad_norm": 1.2173422574996948, "learning_rate": 0.001, "loss": 1.8417, "step": 412272 }, { "epoch": 35.570048309178745, "grad_norm": 19.47728157043457, "learning_rate": 0.001, "loss": 1.8134, "step": 412328 }, { "epoch": 35.57487922705314, "grad_norm": 0.35238805413246155, "learning_rate": 0.001, "loss": 1.8201, "step": 412384 }, { "epoch": 35.57971014492754, "grad_norm": 0.29076236486434937, "learning_rate": 0.001, "loss": 1.8163, "step": 412440 }, { "epoch": 35.58454106280193, "grad_norm": 1.546376347541809, "learning_rate": 0.001, "loss": 1.8108, "step": 412496 }, { "epoch": 35.589371980676326, "grad_norm": 0.3189411163330078, "learning_rate": 0.001, "loss": 1.8119, "step": 412552 }, { "epoch": 35.594202898550726, "grad_norm": 0.32669347524642944, "learning_rate": 0.001, "loss": 1.8063, "step": 412608 }, { "epoch": 35.59903381642512, "grad_norm": 0.5219030976295471, "learning_rate": 0.001, "loss": 1.8081, "step": 412664 }, { "epoch": 35.60386473429952, "grad_norm": 0.2837288975715637, "learning_rate": 0.001, "loss": 1.8064, "step": 412720 }, { "epoch": 35.608695652173914, "grad_norm": 0.38072219491004944, "learning_rate": 0.001, "loss": 1.8065, "step": 412776 }, { "epoch": 35.61352657004831, "grad_norm": 0.3971971869468689, "learning_rate": 0.001, "loss": 1.8026, "step": 412832 }, { "epoch": 35.61835748792271, "grad_norm": 0.4511968493461609, "learning_rate": 0.001, "loss": 1.8085, "step": 412888 }, { "epoch": 35.6231884057971, "grad_norm": 0.46087291836738586, "learning_rate": 0.001, "loss": 1.8018, "step": 412944 }, { "epoch": 35.628019323671495, "grad_norm": 0.42689013481140137, "learning_rate": 0.001, "loss": 1.8005, "step": 413000 }, { "epoch": 35.632850241545896, "grad_norm": 1.3089145421981812, "learning_rate": 0.001, "loss": 1.8002, "step": 413056 }, { "epoch": 35.63768115942029, "grad_norm": 0.28892236948013306, "learning_rate": 0.001, "loss": 1.7973, "step": 413112 }, { "epoch": 35.64251207729468, "grad_norm": 0.32442688941955566, "learning_rate": 0.001, "loss": 1.8006, "step": 413168 }, { "epoch": 35.64734299516908, "grad_norm": 0.33931100368499756, "learning_rate": 0.001, "loss": 1.7914, "step": 413224 }, { "epoch": 35.65217391304348, "grad_norm": 0.3788907527923584, "learning_rate": 0.001, "loss": 1.8013, "step": 413280 }, { "epoch": 35.65700483091788, "grad_norm": 0.2726646959781647, "learning_rate": 0.001, "loss": 1.7966, "step": 413336 }, { "epoch": 35.66183574879227, "grad_norm": 2.5383167266845703, "learning_rate": 0.001, "loss": 1.813, "step": 413392 }, { "epoch": 35.666666666666664, "grad_norm": 0.336001455783844, "learning_rate": 0.001, "loss": 1.8313, "step": 413448 }, { "epoch": 35.671497584541065, "grad_norm": 0.3318134546279907, "learning_rate": 0.001, "loss": 1.8148, "step": 413504 }, { "epoch": 35.67632850241546, "grad_norm": 0.44807299971580505, "learning_rate": 0.001, "loss": 1.8208, "step": 413560 }, { "epoch": 35.68115942028985, "grad_norm": 0.3180854916572571, "learning_rate": 0.001, "loss": 1.815, "step": 413616 }, { "epoch": 35.68599033816425, "grad_norm": 0.47387954592704773, "learning_rate": 0.001, "loss": 1.8098, "step": 413672 }, { "epoch": 35.690821256038646, "grad_norm": 0.2597843110561371, "learning_rate": 0.001, "loss": 1.8255, "step": 413728 }, { "epoch": 35.69565217391305, "grad_norm": 0.5969817638397217, "learning_rate": 0.001, "loss": 1.813, "step": 413784 }, { "epoch": 35.70048309178744, "grad_norm": 3.7520675659179688, "learning_rate": 0.001, "loss": 1.8047, "step": 413840 }, { "epoch": 35.70531400966183, "grad_norm": 2.5166049003601074, "learning_rate": 0.001, "loss": 1.8089, "step": 413896 }, { "epoch": 35.710144927536234, "grad_norm": 0.5534590482711792, "learning_rate": 0.001, "loss": 1.8105, "step": 413952 }, { "epoch": 35.71497584541063, "grad_norm": 0.35649874806404114, "learning_rate": 0.001, "loss": 1.8126, "step": 414008 }, { "epoch": 35.71980676328502, "grad_norm": 0.3534491956233978, "learning_rate": 0.001, "loss": 1.8115, "step": 414064 }, { "epoch": 35.72463768115942, "grad_norm": 1.9140962362289429, "learning_rate": 0.001, "loss": 1.813, "step": 414120 }, { "epoch": 35.729468599033815, "grad_norm": 2.072105646133423, "learning_rate": 0.001, "loss": 1.8029, "step": 414176 }, { "epoch": 35.734299516908216, "grad_norm": 0.3519071638584137, "learning_rate": 0.001, "loss": 1.8105, "step": 414232 }, { "epoch": 35.73913043478261, "grad_norm": 0.5808678865432739, "learning_rate": 0.001, "loss": 1.8263, "step": 414288 }, { "epoch": 35.743961352657, "grad_norm": 0.6865488290786743, "learning_rate": 0.001, "loss": 1.8182, "step": 414344 }, { "epoch": 35.7487922705314, "grad_norm": 0.45111045241355896, "learning_rate": 0.001, "loss": 1.809, "step": 414400 }, { "epoch": 35.7536231884058, "grad_norm": 0.4726544916629791, "learning_rate": 0.001, "loss": 1.815, "step": 414456 }, { "epoch": 35.75845410628019, "grad_norm": 0.44025561213493347, "learning_rate": 0.001, "loss": 1.8119, "step": 414512 }, { "epoch": 35.76328502415459, "grad_norm": 0.3452722728252411, "learning_rate": 0.001, "loss": 1.8093, "step": 414568 }, { "epoch": 35.768115942028984, "grad_norm": 0.39103955030441284, "learning_rate": 0.001, "loss": 1.8152, "step": 414624 }, { "epoch": 35.772946859903385, "grad_norm": 0.5738186836242676, "learning_rate": 0.001, "loss": 1.811, "step": 414680 }, { "epoch": 35.77777777777778, "grad_norm": 0.27434220910072327, "learning_rate": 0.001, "loss": 1.8224, "step": 414736 }, { "epoch": 35.78260869565217, "grad_norm": 0.38616466522216797, "learning_rate": 0.001, "loss": 1.8218, "step": 414792 }, { "epoch": 35.78743961352657, "grad_norm": 2.38551926612854, "learning_rate": 0.001, "loss": 1.8031, "step": 414848 }, { "epoch": 35.792270531400966, "grad_norm": 0.522350013256073, "learning_rate": 0.001, "loss": 1.8206, "step": 414904 }, { "epoch": 35.79710144927536, "grad_norm": 1.2795474529266357, "learning_rate": 0.001, "loss": 1.8225, "step": 414960 }, { "epoch": 35.80193236714976, "grad_norm": 0.2650989592075348, "learning_rate": 0.001, "loss": 1.8179, "step": 415016 }, { "epoch": 35.806763285024154, "grad_norm": 0.2709149122238159, "learning_rate": 0.001, "loss": 1.8238, "step": 415072 }, { "epoch": 35.81159420289855, "grad_norm": 1.7122024297714233, "learning_rate": 0.001, "loss": 1.8287, "step": 415128 }, { "epoch": 35.81642512077295, "grad_norm": 0.8686914443969727, "learning_rate": 0.001, "loss": 1.8402, "step": 415184 }, { "epoch": 35.82125603864734, "grad_norm": 3.722557783126831, "learning_rate": 0.001, "loss": 1.8555, "step": 415240 }, { "epoch": 35.82608695652174, "grad_norm": 0.35540807247161865, "learning_rate": 0.001, "loss": 1.8392, "step": 415296 }, { "epoch": 35.830917874396135, "grad_norm": 0.9539128541946411, "learning_rate": 0.001, "loss": 1.8316, "step": 415352 }, { "epoch": 35.83574879227053, "grad_norm": 0.4823375642299652, "learning_rate": 0.001, "loss": 1.8266, "step": 415408 }, { "epoch": 35.84057971014493, "grad_norm": 0.5761464834213257, "learning_rate": 0.001, "loss": 1.8289, "step": 415464 }, { "epoch": 35.84541062801932, "grad_norm": 0.5835826992988586, "learning_rate": 0.001, "loss": 1.8207, "step": 415520 }, { "epoch": 35.85024154589372, "grad_norm": 0.6022976040840149, "learning_rate": 0.001, "loss": 1.815, "step": 415576 }, { "epoch": 35.85507246376812, "grad_norm": 0.7785156965255737, "learning_rate": 0.001, "loss": 1.8235, "step": 415632 }, { "epoch": 35.85990338164251, "grad_norm": 0.9020271897315979, "learning_rate": 0.001, "loss": 1.821, "step": 415688 }, { "epoch": 35.86473429951691, "grad_norm": 0.46804189682006836, "learning_rate": 0.001, "loss": 1.8197, "step": 415744 }, { "epoch": 35.869565217391305, "grad_norm": 1.654260516166687, "learning_rate": 0.001, "loss": 1.8183, "step": 415800 }, { "epoch": 35.8743961352657, "grad_norm": 1.5682464838027954, "learning_rate": 0.001, "loss": 1.8387, "step": 415856 }, { "epoch": 35.8792270531401, "grad_norm": 0.4145030379295349, "learning_rate": 0.001, "loss": 1.8415, "step": 415912 }, { "epoch": 35.88405797101449, "grad_norm": 0.4621835947036743, "learning_rate": 0.001, "loss": 1.8683, "step": 415968 }, { "epoch": 35.888888888888886, "grad_norm": 2.6306676864624023, "learning_rate": 0.001, "loss": 1.8738, "step": 416024 }, { "epoch": 35.893719806763286, "grad_norm": 0.4296230971813202, "learning_rate": 0.001, "loss": 1.8524, "step": 416080 }, { "epoch": 35.89855072463768, "grad_norm": 0.7701650261878967, "learning_rate": 0.001, "loss": 1.8414, "step": 416136 }, { "epoch": 35.90338164251208, "grad_norm": 1.5234531164169312, "learning_rate": 0.001, "loss": 1.8372, "step": 416192 }, { "epoch": 35.908212560386474, "grad_norm": 0.4869275391101837, "learning_rate": 0.001, "loss": 1.8349, "step": 416248 }, { "epoch": 35.91304347826087, "grad_norm": 0.40160831809043884, "learning_rate": 0.001, "loss": 1.8403, "step": 416304 }, { "epoch": 35.91787439613527, "grad_norm": 0.3963029980659485, "learning_rate": 0.001, "loss": 1.8248, "step": 416360 }, { "epoch": 35.92270531400966, "grad_norm": 0.30414238572120667, "learning_rate": 0.001, "loss": 1.8296, "step": 416416 }, { "epoch": 35.927536231884055, "grad_norm": 1.4931467771530151, "learning_rate": 0.001, "loss": 1.8267, "step": 416472 }, { "epoch": 35.932367149758456, "grad_norm": 1.6522365808486938, "learning_rate": 0.001, "loss": 1.834, "step": 416528 }, { "epoch": 35.93719806763285, "grad_norm": 2.1042068004608154, "learning_rate": 0.001, "loss": 1.8537, "step": 416584 }, { "epoch": 35.94202898550725, "grad_norm": 2.761247396469116, "learning_rate": 0.001, "loss": 1.8499, "step": 416640 }, { "epoch": 35.94685990338164, "grad_norm": 0.8252613544464111, "learning_rate": 0.001, "loss": 1.85, "step": 416696 }, { "epoch": 35.95169082125604, "grad_norm": 0.9316199421882629, "learning_rate": 0.001, "loss": 1.8398, "step": 416752 }, { "epoch": 35.95652173913044, "grad_norm": 1.2772455215454102, "learning_rate": 0.001, "loss": 1.8522, "step": 416808 }, { "epoch": 35.96135265700483, "grad_norm": 9.804481506347656, "learning_rate": 0.001, "loss": 1.8425, "step": 416864 }, { "epoch": 35.966183574879224, "grad_norm": 0.7426204681396484, "learning_rate": 0.001, "loss": 1.8501, "step": 416920 }, { "epoch": 35.971014492753625, "grad_norm": 3.051438093185425, "learning_rate": 0.001, "loss": 1.8506, "step": 416976 }, { "epoch": 35.97584541062802, "grad_norm": 0.717117190361023, "learning_rate": 0.001, "loss": 1.8541, "step": 417032 }, { "epoch": 35.98067632850242, "grad_norm": 0.6426759362220764, "learning_rate": 0.001, "loss": 1.8578, "step": 417088 }, { "epoch": 35.98550724637681, "grad_norm": 0.5064529180526733, "learning_rate": 0.001, "loss": 1.8434, "step": 417144 }, { "epoch": 35.990338164251206, "grad_norm": 0.2840609550476074, "learning_rate": 0.001, "loss": 1.8316, "step": 417200 }, { "epoch": 35.99516908212561, "grad_norm": 0.4005604386329651, "learning_rate": 0.001, "loss": 1.8351, "step": 417256 }, { "epoch": 36.0, "grad_norm": 0.3010460138320923, "learning_rate": 0.001, "loss": 1.8277, "step": 417312 }, { "epoch": 36.00483091787439, "grad_norm": 0.37541258335113525, "learning_rate": 0.001, "loss": 1.7876, "step": 417368 }, { "epoch": 36.009661835748794, "grad_norm": 1.3438811302185059, "learning_rate": 0.001, "loss": 1.8063, "step": 417424 }, { "epoch": 36.01449275362319, "grad_norm": 0.7876741886138916, "learning_rate": 0.001, "loss": 1.8022, "step": 417480 }, { "epoch": 36.01932367149758, "grad_norm": 0.3538321554660797, "learning_rate": 0.001, "loss": 1.7949, "step": 417536 }, { "epoch": 36.02415458937198, "grad_norm": 61.62012481689453, "learning_rate": 0.001, "loss": 1.7868, "step": 417592 }, { "epoch": 36.028985507246375, "grad_norm": 0.4933997690677643, "learning_rate": 0.001, "loss": 1.7861, "step": 417648 }, { "epoch": 36.033816425120776, "grad_norm": 5.336723804473877, "learning_rate": 0.001, "loss": 1.8, "step": 417704 }, { "epoch": 36.03864734299517, "grad_norm": 14.08191967010498, "learning_rate": 0.001, "loss": 1.8164, "step": 417760 }, { "epoch": 36.04347826086956, "grad_norm": 0.4196595251560211, "learning_rate": 0.001, "loss": 1.8127, "step": 417816 }, { "epoch": 36.04830917874396, "grad_norm": 3.8787102699279785, "learning_rate": 0.001, "loss": 1.8057, "step": 417872 }, { "epoch": 36.05314009661836, "grad_norm": 0.4622533321380615, "learning_rate": 0.001, "loss": 1.8252, "step": 417928 }, { "epoch": 36.05797101449275, "grad_norm": 0.4794529676437378, "learning_rate": 0.001, "loss": 1.8181, "step": 417984 }, { "epoch": 36.06280193236715, "grad_norm": 0.3200092315673828, "learning_rate": 0.001, "loss": 1.8248, "step": 418040 }, { "epoch": 36.067632850241544, "grad_norm": 2.194531202316284, "learning_rate": 0.001, "loss": 1.8301, "step": 418096 }, { "epoch": 36.072463768115945, "grad_norm": 1.1515486240386963, "learning_rate": 0.001, "loss": 1.8244, "step": 418152 }, { "epoch": 36.07729468599034, "grad_norm": 0.5132242441177368, "learning_rate": 0.001, "loss": 1.8189, "step": 418208 }, { "epoch": 36.08212560386473, "grad_norm": 0.650173008441925, "learning_rate": 0.001, "loss": 1.8081, "step": 418264 }, { "epoch": 36.08695652173913, "grad_norm": 0.8986338973045349, "learning_rate": 0.001, "loss": 1.8157, "step": 418320 }, { "epoch": 36.091787439613526, "grad_norm": 3.483461380004883, "learning_rate": 0.001, "loss": 1.8257, "step": 418376 }, { "epoch": 36.09661835748792, "grad_norm": 0.5482420325279236, "learning_rate": 0.001, "loss": 1.8149, "step": 418432 }, { "epoch": 36.10144927536232, "grad_norm": 4.823991775512695, "learning_rate": 0.001, "loss": 1.8213, "step": 418488 }, { "epoch": 36.106280193236714, "grad_norm": 2.0775492191314697, "learning_rate": 0.001, "loss": 1.8186, "step": 418544 }, { "epoch": 36.111111111111114, "grad_norm": 0.9503189921379089, "learning_rate": 0.001, "loss": 1.8131, "step": 418600 }, { "epoch": 36.11594202898551, "grad_norm": 0.6944476366043091, "learning_rate": 0.001, "loss": 1.8154, "step": 418656 }, { "epoch": 36.1207729468599, "grad_norm": 2.9918415546417236, "learning_rate": 0.001, "loss": 1.8081, "step": 418712 }, { "epoch": 36.1256038647343, "grad_norm": 0.4789454936981201, "learning_rate": 0.001, "loss": 1.8208, "step": 418768 }, { "epoch": 36.130434782608695, "grad_norm": 0.8901446461677551, "learning_rate": 0.001, "loss": 1.8209, "step": 418824 }, { "epoch": 36.13526570048309, "grad_norm": 1.2794877290725708, "learning_rate": 0.001, "loss": 1.8099, "step": 418880 }, { "epoch": 36.14009661835749, "grad_norm": 2.1572601795196533, "learning_rate": 0.001, "loss": 1.8145, "step": 418936 }, { "epoch": 36.14492753623188, "grad_norm": 4.109430313110352, "learning_rate": 0.001, "loss": 1.8123, "step": 418992 }, { "epoch": 36.14975845410628, "grad_norm": 0.6066772937774658, "learning_rate": 0.001, "loss": 1.8026, "step": 419048 }, { "epoch": 36.15458937198068, "grad_norm": 0.46949502825737, "learning_rate": 0.001, "loss": 1.8141, "step": 419104 }, { "epoch": 36.15942028985507, "grad_norm": 2.2978317737579346, "learning_rate": 0.001, "loss": 1.8042, "step": 419160 }, { "epoch": 36.16425120772947, "grad_norm": 0.3962641954421997, "learning_rate": 0.001, "loss": 1.8016, "step": 419216 }, { "epoch": 36.169082125603865, "grad_norm": 0.8893486857414246, "learning_rate": 0.001, "loss": 1.8792, "step": 419272 }, { "epoch": 36.17391304347826, "grad_norm": 12.20644474029541, "learning_rate": 0.001, "loss": 1.8596, "step": 419328 }, { "epoch": 36.17874396135266, "grad_norm": 0.47220736742019653, "learning_rate": 0.001, "loss": 1.8402, "step": 419384 }, { "epoch": 36.18357487922705, "grad_norm": 1.4701601266860962, "learning_rate": 0.001, "loss": 1.8121, "step": 419440 }, { "epoch": 36.18840579710145, "grad_norm": 0.8864219784736633, "learning_rate": 0.001, "loss": 1.8011, "step": 419496 }, { "epoch": 36.193236714975846, "grad_norm": 0.9630461931228638, "learning_rate": 0.001, "loss": 1.8003, "step": 419552 }, { "epoch": 36.19806763285024, "grad_norm": 0.4893719255924225, "learning_rate": 0.001, "loss": 1.8002, "step": 419608 }, { "epoch": 36.20289855072464, "grad_norm": 4.209807872772217, "learning_rate": 0.001, "loss": 1.8001, "step": 419664 }, { "epoch": 36.207729468599034, "grad_norm": 1.1285202503204346, "learning_rate": 0.001, "loss": 1.804, "step": 419720 }, { "epoch": 36.21256038647343, "grad_norm": 1.0526671409606934, "learning_rate": 0.001, "loss": 1.7944, "step": 419776 }, { "epoch": 36.21739130434783, "grad_norm": 1.1785130500793457, "learning_rate": 0.001, "loss": 1.8031, "step": 419832 }, { "epoch": 36.22222222222222, "grad_norm": 0.6127132177352905, "learning_rate": 0.001, "loss": 1.803, "step": 419888 }, { "epoch": 36.227053140096615, "grad_norm": 0.37304431200027466, "learning_rate": 0.001, "loss": 1.8224, "step": 419944 }, { "epoch": 36.231884057971016, "grad_norm": 0.5405763387680054, "learning_rate": 0.001, "loss": 1.8096, "step": 420000 }, { "epoch": 36.23671497584541, "grad_norm": 0.36455366015434265, "learning_rate": 0.001, "loss": 1.8087, "step": 420056 }, { "epoch": 36.24154589371981, "grad_norm": 0.4383017420768738, "learning_rate": 0.001, "loss": 1.8012, "step": 420112 }, { "epoch": 36.2463768115942, "grad_norm": 1.4162043333053589, "learning_rate": 0.001, "loss": 1.7993, "step": 420168 }, { "epoch": 36.2512077294686, "grad_norm": 0.556699812412262, "learning_rate": 0.001, "loss": 1.7967, "step": 420224 }, { "epoch": 36.256038647343, "grad_norm": 0.366416335105896, "learning_rate": 0.001, "loss": 1.8016, "step": 420280 }, { "epoch": 36.26086956521739, "grad_norm": 2.401747465133667, "learning_rate": 0.001, "loss": 1.8022, "step": 420336 }, { "epoch": 36.265700483091784, "grad_norm": 0.486751914024353, "learning_rate": 0.001, "loss": 1.809, "step": 420392 }, { "epoch": 36.270531400966185, "grad_norm": 0.4724934995174408, "learning_rate": 0.001, "loss": 1.8289, "step": 420448 }, { "epoch": 36.27536231884058, "grad_norm": 2.7811198234558105, "learning_rate": 0.001, "loss": 1.8298, "step": 420504 }, { "epoch": 36.28019323671498, "grad_norm": 1.90841805934906, "learning_rate": 0.001, "loss": 1.8269, "step": 420560 }, { "epoch": 36.28502415458937, "grad_norm": 0.9812134504318237, "learning_rate": 0.001, "loss": 1.8338, "step": 420616 }, { "epoch": 36.289855072463766, "grad_norm": 0.5965414643287659, "learning_rate": 0.001, "loss": 1.8346, "step": 420672 }, { "epoch": 36.29468599033817, "grad_norm": 0.5134904980659485, "learning_rate": 0.001, "loss": 1.8276, "step": 420728 }, { "epoch": 36.29951690821256, "grad_norm": 3.909770965576172, "learning_rate": 0.001, "loss": 1.8186, "step": 420784 }, { "epoch": 36.30434782608695, "grad_norm": 0.4676063060760498, "learning_rate": 0.001, "loss": 1.8262, "step": 420840 }, { "epoch": 36.309178743961354, "grad_norm": 0.7271302938461304, "learning_rate": 0.001, "loss": 1.8252, "step": 420896 }, { "epoch": 36.31400966183575, "grad_norm": 0.6256305575370789, "learning_rate": 0.001, "loss": 1.8227, "step": 420952 }, { "epoch": 36.31884057971015, "grad_norm": 0.8160049915313721, "learning_rate": 0.001, "loss": 1.8155, "step": 421008 }, { "epoch": 36.32367149758454, "grad_norm": 0.34517383575439453, "learning_rate": 0.001, "loss": 1.8126, "step": 421064 }, { "epoch": 36.328502415458935, "grad_norm": 8.307565689086914, "learning_rate": 0.001, "loss": 1.8276, "step": 421120 }, { "epoch": 36.333333333333336, "grad_norm": 3.8583412170410156, "learning_rate": 0.001, "loss": 1.8129, "step": 421176 }, { "epoch": 36.33816425120773, "grad_norm": 0.5957732796669006, "learning_rate": 0.001, "loss": 1.8189, "step": 421232 }, { "epoch": 36.34299516908212, "grad_norm": 1.0673693418502808, "learning_rate": 0.001, "loss": 1.8175, "step": 421288 }, { "epoch": 36.34782608695652, "grad_norm": 0.28790944814682007, "learning_rate": 0.001, "loss": 1.8061, "step": 421344 }, { "epoch": 36.35265700483092, "grad_norm": 0.797018826007843, "learning_rate": 0.001, "loss": 1.8072, "step": 421400 }, { "epoch": 36.35748792270532, "grad_norm": 0.3776380121707916, "learning_rate": 0.001, "loss": 1.8097, "step": 421456 }, { "epoch": 36.36231884057971, "grad_norm": 1.2925693988800049, "learning_rate": 0.001, "loss": 1.8007, "step": 421512 }, { "epoch": 36.367149758454104, "grad_norm": 0.8501543402671814, "learning_rate": 0.001, "loss": 1.8117, "step": 421568 }, { "epoch": 36.371980676328505, "grad_norm": 0.9536029100418091, "learning_rate": 0.001, "loss": 1.8112, "step": 421624 }, { "epoch": 36.3768115942029, "grad_norm": 0.48492881655693054, "learning_rate": 0.001, "loss": 1.8098, "step": 421680 }, { "epoch": 36.38164251207729, "grad_norm": 0.4595077335834503, "learning_rate": 0.001, "loss": 1.8107, "step": 421736 }, { "epoch": 36.38647342995169, "grad_norm": 0.3828184902667999, "learning_rate": 0.001, "loss": 1.8033, "step": 421792 }, { "epoch": 36.391304347826086, "grad_norm": 1.0317989587783813, "learning_rate": 0.001, "loss": 1.7962, "step": 421848 }, { "epoch": 36.39613526570048, "grad_norm": 0.27856922149658203, "learning_rate": 0.001, "loss": 1.8094, "step": 421904 }, { "epoch": 36.40096618357488, "grad_norm": 0.30614838004112244, "learning_rate": 0.001, "loss": 1.8022, "step": 421960 }, { "epoch": 36.405797101449274, "grad_norm": 0.3831099271774292, "learning_rate": 0.001, "loss": 1.7988, "step": 422016 }, { "epoch": 36.410628019323674, "grad_norm": 0.5046437978744507, "learning_rate": 0.001, "loss": 1.8169, "step": 422072 }, { "epoch": 36.41545893719807, "grad_norm": 3.933488368988037, "learning_rate": 0.001, "loss": 1.8106, "step": 422128 }, { "epoch": 36.42028985507246, "grad_norm": 0.696277916431427, "learning_rate": 0.001, "loss": 1.805, "step": 422184 }, { "epoch": 36.42512077294686, "grad_norm": 0.2524656653404236, "learning_rate": 0.001, "loss": 1.811, "step": 422240 }, { "epoch": 36.429951690821255, "grad_norm": 0.4835737943649292, "learning_rate": 0.001, "loss": 1.8072, "step": 422296 }, { "epoch": 36.43478260869565, "grad_norm": 0.8354586362838745, "learning_rate": 0.001, "loss": 1.8095, "step": 422352 }, { "epoch": 36.43961352657005, "grad_norm": 1.903119444847107, "learning_rate": 0.001, "loss": 1.8116, "step": 422408 }, { "epoch": 36.44444444444444, "grad_norm": 0.8190333247184753, "learning_rate": 0.001, "loss": 1.8181, "step": 422464 }, { "epoch": 36.44927536231884, "grad_norm": 3.3435781002044678, "learning_rate": 0.001, "loss": 1.8369, "step": 422520 }, { "epoch": 36.45410628019324, "grad_norm": 0.7233803272247314, "learning_rate": 0.001, "loss": 1.8728, "step": 422576 }, { "epoch": 36.45893719806763, "grad_norm": 0.6402056217193604, "learning_rate": 0.001, "loss": 1.8759, "step": 422632 }, { "epoch": 36.46376811594203, "grad_norm": 1.071313738822937, "learning_rate": 0.001, "loss": 1.8741, "step": 422688 }, { "epoch": 36.468599033816425, "grad_norm": 1.8663617372512817, "learning_rate": 0.001, "loss": 1.8469, "step": 422744 }, { "epoch": 36.47342995169082, "grad_norm": 0.7930673360824585, "learning_rate": 0.001, "loss": 1.8342, "step": 422800 }, { "epoch": 36.47826086956522, "grad_norm": 1.5684564113616943, "learning_rate": 0.001, "loss": 1.8354, "step": 422856 }, { "epoch": 36.48309178743961, "grad_norm": 0.44366708397865295, "learning_rate": 0.001, "loss": 1.8368, "step": 422912 }, { "epoch": 36.48792270531401, "grad_norm": 0.47196897864341736, "learning_rate": 0.001, "loss": 1.835, "step": 422968 }, { "epoch": 36.492753623188406, "grad_norm": 0.5209424495697021, "learning_rate": 0.001, "loss": 1.8282, "step": 423024 }, { "epoch": 36.4975845410628, "grad_norm": 0.5942379832267761, "learning_rate": 0.001, "loss": 1.8302, "step": 423080 }, { "epoch": 36.5024154589372, "grad_norm": 0.960195004940033, "learning_rate": 0.001, "loss": 1.8233, "step": 423136 }, { "epoch": 36.507246376811594, "grad_norm": 1.280127763748169, "learning_rate": 0.001, "loss": 1.8192, "step": 423192 }, { "epoch": 36.51207729468599, "grad_norm": 0.5658990740776062, "learning_rate": 0.001, "loss": 1.8141, "step": 423248 }, { "epoch": 36.51690821256039, "grad_norm": 0.8378584980964661, "learning_rate": 0.001, "loss": 1.8133, "step": 423304 }, { "epoch": 36.52173913043478, "grad_norm": 1.4449589252471924, "learning_rate": 0.001, "loss": 1.8073, "step": 423360 }, { "epoch": 36.52657004830918, "grad_norm": 0.705717146396637, "learning_rate": 0.001, "loss": 1.8161, "step": 423416 }, { "epoch": 36.531400966183575, "grad_norm": 0.5583307147026062, "learning_rate": 0.001, "loss": 1.8139, "step": 423472 }, { "epoch": 36.53623188405797, "grad_norm": 0.4237203299999237, "learning_rate": 0.001, "loss": 1.8044, "step": 423528 }, { "epoch": 36.54106280193237, "grad_norm": 0.7137573957443237, "learning_rate": 0.001, "loss": 1.8061, "step": 423584 }, { "epoch": 36.54589371980676, "grad_norm": 2.2076714038848877, "learning_rate": 0.001, "loss": 1.8137, "step": 423640 }, { "epoch": 36.55072463768116, "grad_norm": 0.6838551759719849, "learning_rate": 0.001, "loss": 1.8076, "step": 423696 }, { "epoch": 36.55555555555556, "grad_norm": 1.1682318449020386, "learning_rate": 0.001, "loss": 1.8051, "step": 423752 }, { "epoch": 36.56038647342995, "grad_norm": 0.30587345361709595, "learning_rate": 0.001, "loss": 1.809, "step": 423808 }, { "epoch": 36.56521739130435, "grad_norm": 0.8469343185424805, "learning_rate": 0.001, "loss": 1.8143, "step": 423864 }, { "epoch": 36.570048309178745, "grad_norm": 0.5489840507507324, "learning_rate": 0.001, "loss": 1.8191, "step": 423920 }, { "epoch": 36.57487922705314, "grad_norm": 0.38876935839653015, "learning_rate": 0.001, "loss": 1.81, "step": 423976 }, { "epoch": 36.57971014492754, "grad_norm": 0.3187427222728729, "learning_rate": 0.001, "loss": 1.8089, "step": 424032 }, { "epoch": 36.58454106280193, "grad_norm": 0.3336658775806427, "learning_rate": 0.001, "loss": 1.8055, "step": 424088 }, { "epoch": 36.589371980676326, "grad_norm": 1.0279488563537598, "learning_rate": 0.001, "loss": 1.801, "step": 424144 }, { "epoch": 36.594202898550726, "grad_norm": 0.9203669428825378, "learning_rate": 0.001, "loss": 1.8038, "step": 424200 }, { "epoch": 36.59903381642512, "grad_norm": 4.256526947021484, "learning_rate": 0.001, "loss": 1.7985, "step": 424256 }, { "epoch": 36.60386473429952, "grad_norm": 0.6080761551856995, "learning_rate": 0.001, "loss": 1.8059, "step": 424312 }, { "epoch": 36.608695652173914, "grad_norm": 0.28674986958503723, "learning_rate": 0.001, "loss": 1.8108, "step": 424368 }, { "epoch": 36.61352657004831, "grad_norm": 0.8129708766937256, "learning_rate": 0.001, "loss": 1.7938, "step": 424424 }, { "epoch": 36.61835748792271, "grad_norm": 0.29786789417266846, "learning_rate": 0.001, "loss": 1.8046, "step": 424480 }, { "epoch": 36.6231884057971, "grad_norm": 0.3867568373680115, "learning_rate": 0.001, "loss": 1.7977, "step": 424536 }, { "epoch": 36.628019323671495, "grad_norm": 0.29274699091911316, "learning_rate": 0.001, "loss": 1.7992, "step": 424592 }, { "epoch": 36.632850241545896, "grad_norm": 0.8310989737510681, "learning_rate": 0.001, "loss": 1.7985, "step": 424648 }, { "epoch": 36.63768115942029, "grad_norm": 1.585929274559021, "learning_rate": 0.001, "loss": 1.8078, "step": 424704 }, { "epoch": 36.64251207729468, "grad_norm": 0.7017877101898193, "learning_rate": 0.001, "loss": 1.8077, "step": 424760 }, { "epoch": 36.64734299516908, "grad_norm": 0.3629225492477417, "learning_rate": 0.001, "loss": 1.8095, "step": 424816 }, { "epoch": 36.65217391304348, "grad_norm": 0.2989633083343506, "learning_rate": 0.001, "loss": 1.8084, "step": 424872 }, { "epoch": 36.65700483091788, "grad_norm": 1.017148733139038, "learning_rate": 0.001, "loss": 1.8, "step": 424928 }, { "epoch": 36.66183574879227, "grad_norm": 0.502622663974762, "learning_rate": 0.001, "loss": 1.8083, "step": 424984 }, { "epoch": 36.666666666666664, "grad_norm": 0.35630670189857483, "learning_rate": 0.001, "loss": 1.8099, "step": 425040 }, { "epoch": 36.671497584541065, "grad_norm": 1.0991458892822266, "learning_rate": 0.001, "loss": 1.8005, "step": 425096 }, { "epoch": 36.67632850241546, "grad_norm": 0.6963735222816467, "learning_rate": 0.001, "loss": 1.8053, "step": 425152 }, { "epoch": 36.68115942028985, "grad_norm": 0.2799973487854004, "learning_rate": 0.001, "loss": 1.8112, "step": 425208 }, { "epoch": 36.68599033816425, "grad_norm": 3.464158773422241, "learning_rate": 0.001, "loss": 1.8115, "step": 425264 }, { "epoch": 36.690821256038646, "grad_norm": 0.6238375902175903, "learning_rate": 0.001, "loss": 1.8061, "step": 425320 }, { "epoch": 36.69565217391305, "grad_norm": 0.5292114019393921, "learning_rate": 0.001, "loss": 1.7986, "step": 425376 }, { "epoch": 36.70048309178744, "grad_norm": 1.1414953470230103, "learning_rate": 0.001, "loss": 1.8019, "step": 425432 }, { "epoch": 36.70531400966183, "grad_norm": 1.150097131729126, "learning_rate": 0.001, "loss": 1.7889, "step": 425488 }, { "epoch": 36.710144927536234, "grad_norm": 5.404383182525635, "learning_rate": 0.001, "loss": 1.8005, "step": 425544 }, { "epoch": 36.71497584541063, "grad_norm": 0.7964653968811035, "learning_rate": 0.001, "loss": 1.8069, "step": 425600 }, { "epoch": 36.71980676328502, "grad_norm": 0.2718655467033386, "learning_rate": 0.001, "loss": 1.7989, "step": 425656 }, { "epoch": 36.72463768115942, "grad_norm": 0.38797837495803833, "learning_rate": 0.001, "loss": 1.8122, "step": 425712 }, { "epoch": 36.729468599033815, "grad_norm": 0.6828791499137878, "learning_rate": 0.001, "loss": 1.8065, "step": 425768 }, { "epoch": 36.734299516908216, "grad_norm": 1.314735770225525, "learning_rate": 0.001, "loss": 1.8129, "step": 425824 }, { "epoch": 36.73913043478261, "grad_norm": 0.4683690667152405, "learning_rate": 0.001, "loss": 1.8095, "step": 425880 }, { "epoch": 36.743961352657, "grad_norm": 0.2769481837749481, "learning_rate": 0.001, "loss": 1.8142, "step": 425936 }, { "epoch": 36.7487922705314, "grad_norm": 1.662924885749817, "learning_rate": 0.001, "loss": 1.8028, "step": 425992 }, { "epoch": 36.7536231884058, "grad_norm": 0.5976274013519287, "learning_rate": 0.001, "loss": 1.7935, "step": 426048 }, { "epoch": 36.75845410628019, "grad_norm": 1.263322114944458, "learning_rate": 0.001, "loss": 1.8041, "step": 426104 }, { "epoch": 36.76328502415459, "grad_norm": 0.3656352162361145, "learning_rate": 0.001, "loss": 1.8042, "step": 426160 }, { "epoch": 36.768115942028984, "grad_norm": 0.5080867409706116, "learning_rate": 0.001, "loss": 1.8072, "step": 426216 }, { "epoch": 36.772946859903385, "grad_norm": 0.34757837653160095, "learning_rate": 0.001, "loss": 1.808, "step": 426272 }, { "epoch": 36.77777777777778, "grad_norm": 0.3898159861564636, "learning_rate": 0.001, "loss": 1.8102, "step": 426328 }, { "epoch": 36.78260869565217, "grad_norm": 0.2943791151046753, "learning_rate": 0.001, "loss": 1.8095, "step": 426384 }, { "epoch": 36.78743961352657, "grad_norm": 0.6318961381912231, "learning_rate": 0.001, "loss": 1.816, "step": 426440 }, { "epoch": 36.792270531400966, "grad_norm": 0.43459683656692505, "learning_rate": 0.001, "loss": 1.8065, "step": 426496 }, { "epoch": 36.79710144927536, "grad_norm": 0.2659796476364136, "learning_rate": 0.001, "loss": 1.8201, "step": 426552 }, { "epoch": 36.80193236714976, "grad_norm": 0.8089146614074707, "learning_rate": 0.001, "loss": 1.8235, "step": 426608 }, { "epoch": 36.806763285024154, "grad_norm": 0.2565155029296875, "learning_rate": 0.001, "loss": 1.8061, "step": 426664 }, { "epoch": 36.81159420289855, "grad_norm": 0.35102909803390503, "learning_rate": 0.001, "loss": 1.8084, "step": 426720 }, { "epoch": 36.81642512077295, "grad_norm": 0.5686193108558655, "learning_rate": 0.001, "loss": 1.8143, "step": 426776 }, { "epoch": 36.82125603864734, "grad_norm": 0.5286378860473633, "learning_rate": 0.001, "loss": 1.8079, "step": 426832 }, { "epoch": 36.82608695652174, "grad_norm": 0.6484923958778381, "learning_rate": 0.001, "loss": 1.8037, "step": 426888 }, { "epoch": 36.830917874396135, "grad_norm": 0.5301809310913086, "learning_rate": 0.001, "loss": 1.7958, "step": 426944 }, { "epoch": 36.83574879227053, "grad_norm": 0.38552746176719666, "learning_rate": 0.001, "loss": 1.8119, "step": 427000 }, { "epoch": 36.84057971014493, "grad_norm": 0.7235148549079895, "learning_rate": 0.001, "loss": 1.8102, "step": 427056 }, { "epoch": 36.84541062801932, "grad_norm": 0.6579404473304749, "learning_rate": 0.001, "loss": 1.8123, "step": 427112 }, { "epoch": 36.85024154589372, "grad_norm": 1.8766202926635742, "learning_rate": 0.001, "loss": 1.8374, "step": 427168 }, { "epoch": 36.85507246376812, "grad_norm": 0.3992861211299896, "learning_rate": 0.001, "loss": 1.8374, "step": 427224 }, { "epoch": 36.85990338164251, "grad_norm": 1.6838467121124268, "learning_rate": 0.001, "loss": 1.8244, "step": 427280 }, { "epoch": 36.86473429951691, "grad_norm": 0.29545336961746216, "learning_rate": 0.001, "loss": 1.8143, "step": 427336 }, { "epoch": 36.869565217391305, "grad_norm": 1.104663610458374, "learning_rate": 0.001, "loss": 1.8053, "step": 427392 }, { "epoch": 36.8743961352657, "grad_norm": 0.2821395695209503, "learning_rate": 0.001, "loss": 1.8073, "step": 427448 }, { "epoch": 36.8792270531401, "grad_norm": 0.3605702817440033, "learning_rate": 0.001, "loss": 1.8081, "step": 427504 }, { "epoch": 36.88405797101449, "grad_norm": 0.33109819889068604, "learning_rate": 0.001, "loss": 1.8139, "step": 427560 }, { "epoch": 36.888888888888886, "grad_norm": 0.6582963466644287, "learning_rate": 0.001, "loss": 1.8093, "step": 427616 }, { "epoch": 36.893719806763286, "grad_norm": 0.33953168988227844, "learning_rate": 0.001, "loss": 1.819, "step": 427672 }, { "epoch": 36.89855072463768, "grad_norm": 1.5330380201339722, "learning_rate": 0.001, "loss": 1.8527, "step": 427728 }, { "epoch": 36.90338164251208, "grad_norm": 0.5256544947624207, "learning_rate": 0.001, "loss": 1.8125, "step": 427784 }, { "epoch": 36.908212560386474, "grad_norm": 0.2494855523109436, "learning_rate": 0.001, "loss": 1.8136, "step": 427840 }, { "epoch": 36.91304347826087, "grad_norm": 0.2984012961387634, "learning_rate": 0.001, "loss": 1.8136, "step": 427896 }, { "epoch": 36.91787439613527, "grad_norm": 6.267683029174805, "learning_rate": 0.001, "loss": 1.8028, "step": 427952 }, { "epoch": 36.92270531400966, "grad_norm": 0.5449560880661011, "learning_rate": 0.001, "loss": 1.8178, "step": 428008 }, { "epoch": 36.927536231884055, "grad_norm": 0.2684186100959778, "learning_rate": 0.001, "loss": 1.8222, "step": 428064 }, { "epoch": 36.932367149758456, "grad_norm": 5.631681442260742, "learning_rate": 0.001, "loss": 1.8066, "step": 428120 }, { "epoch": 36.93719806763285, "grad_norm": 0.2734910249710083, "learning_rate": 0.001, "loss": 1.8114, "step": 428176 }, { "epoch": 36.94202898550725, "grad_norm": 0.31353959441185, "learning_rate": 0.001, "loss": 1.7969, "step": 428232 }, { "epoch": 36.94685990338164, "grad_norm": 0.3809851408004761, "learning_rate": 0.001, "loss": 1.807, "step": 428288 }, { "epoch": 36.95169082125604, "grad_norm": 0.2942412197589874, "learning_rate": 0.001, "loss": 1.8062, "step": 428344 }, { "epoch": 36.95652173913044, "grad_norm": 0.42585289478302, "learning_rate": 0.001, "loss": 1.8037, "step": 428400 }, { "epoch": 36.96135265700483, "grad_norm": 0.4983316957950592, "learning_rate": 0.001, "loss": 1.797, "step": 428456 }, { "epoch": 36.966183574879224, "grad_norm": 0.6737427115440369, "learning_rate": 0.001, "loss": 1.8007, "step": 428512 }, { "epoch": 36.971014492753625, "grad_norm": 4.854911804199219, "learning_rate": 0.001, "loss": 1.803, "step": 428568 }, { "epoch": 36.97584541062802, "grad_norm": 1.1266233921051025, "learning_rate": 0.001, "loss": 1.8027, "step": 428624 }, { "epoch": 36.98067632850242, "grad_norm": 0.8535734415054321, "learning_rate": 0.001, "loss": 1.8232, "step": 428680 }, { "epoch": 36.98550724637681, "grad_norm": 3.8211865425109863, "learning_rate": 0.001, "loss": 1.8207, "step": 428736 }, { "epoch": 36.990338164251206, "grad_norm": 0.5237981677055359, "learning_rate": 0.001, "loss": 1.802, "step": 428792 }, { "epoch": 36.99516908212561, "grad_norm": 0.5662748217582703, "learning_rate": 0.001, "loss": 1.8176, "step": 428848 }, { "epoch": 37.0, "grad_norm": 2.2902872562408447, "learning_rate": 0.001, "loss": 1.8188, "step": 428904 }, { "epoch": 37.00483091787439, "grad_norm": 0.8495247960090637, "learning_rate": 0.001, "loss": 1.775, "step": 428960 }, { "epoch": 37.009661835748794, "grad_norm": 0.8331407308578491, "learning_rate": 0.001, "loss": 1.7783, "step": 429016 }, { "epoch": 37.01449275362319, "grad_norm": 0.9239394068717957, "learning_rate": 0.001, "loss": 1.7812, "step": 429072 }, { "epoch": 37.01932367149758, "grad_norm": 0.9420920014381409, "learning_rate": 0.001, "loss": 1.7809, "step": 429128 }, { "epoch": 37.02415458937198, "grad_norm": 1.253441333770752, "learning_rate": 0.001, "loss": 1.7789, "step": 429184 }, { "epoch": 37.028985507246375, "grad_norm": 11.27698802947998, "learning_rate": 0.001, "loss": 1.7901, "step": 429240 }, { "epoch": 37.033816425120776, "grad_norm": 1.17238187789917, "learning_rate": 0.001, "loss": 1.799, "step": 429296 }, { "epoch": 37.03864734299517, "grad_norm": 1.3603156805038452, "learning_rate": 0.001, "loss": 1.8049, "step": 429352 }, { "epoch": 37.04347826086956, "grad_norm": 1.2879185676574707, "learning_rate": 0.001, "loss": 1.8043, "step": 429408 }, { "epoch": 37.04830917874396, "grad_norm": 0.2640153765678406, "learning_rate": 0.001, "loss": 1.7995, "step": 429464 }, { "epoch": 37.05314009661836, "grad_norm": 0.8613932132720947, "learning_rate": 0.001, "loss": 1.783, "step": 429520 }, { "epoch": 37.05797101449275, "grad_norm": 3.0839366912841797, "learning_rate": 0.001, "loss": 1.7828, "step": 429576 }, { "epoch": 37.06280193236715, "grad_norm": 0.9025628566741943, "learning_rate": 0.001, "loss": 1.7917, "step": 429632 }, { "epoch": 37.067632850241544, "grad_norm": 1.1744378805160522, "learning_rate": 0.001, "loss": 1.7927, "step": 429688 }, { "epoch": 37.072463768115945, "grad_norm": 0.666778028011322, "learning_rate": 0.001, "loss": 1.7883, "step": 429744 }, { "epoch": 37.07729468599034, "grad_norm": 1.390675663948059, "learning_rate": 0.001, "loss": 1.8038, "step": 429800 }, { "epoch": 37.08212560386473, "grad_norm": 1.0713013410568237, "learning_rate": 0.001, "loss": 1.7908, "step": 429856 }, { "epoch": 37.08695652173913, "grad_norm": 1.0665810108184814, "learning_rate": 0.001, "loss": 1.7945, "step": 429912 }, { "epoch": 37.091787439613526, "grad_norm": 0.47662216424942017, "learning_rate": 0.001, "loss": 1.7993, "step": 429968 }, { "epoch": 37.09661835748792, "grad_norm": 0.25070974230766296, "learning_rate": 0.001, "loss": 1.7874, "step": 430024 }, { "epoch": 37.10144927536232, "grad_norm": 4.5656633377075195, "learning_rate": 0.001, "loss": 1.7946, "step": 430080 }, { "epoch": 37.106280193236714, "grad_norm": 0.3864899277687073, "learning_rate": 0.001, "loss": 1.8021, "step": 430136 }, { "epoch": 37.111111111111114, "grad_norm": 0.8839205503463745, "learning_rate": 0.001, "loss": 1.8033, "step": 430192 }, { "epoch": 37.11594202898551, "grad_norm": 2.609945774078369, "learning_rate": 0.001, "loss": 1.7942, "step": 430248 }, { "epoch": 37.1207729468599, "grad_norm": 0.31405994296073914, "learning_rate": 0.001, "loss": 1.8031, "step": 430304 }, { "epoch": 37.1256038647343, "grad_norm": 0.5954448580741882, "learning_rate": 0.001, "loss": 1.7905, "step": 430360 }, { "epoch": 37.130434782608695, "grad_norm": 0.6253069043159485, "learning_rate": 0.001, "loss": 1.7963, "step": 430416 }, { "epoch": 37.13526570048309, "grad_norm": 0.607184112071991, "learning_rate": 0.001, "loss": 1.7978, "step": 430472 }, { "epoch": 37.14009661835749, "grad_norm": 1.179418921470642, "learning_rate": 0.001, "loss": 1.7856, "step": 430528 }, { "epoch": 37.14492753623188, "grad_norm": 1.3099792003631592, "learning_rate": 0.001, "loss": 1.7841, "step": 430584 }, { "epoch": 37.14975845410628, "grad_norm": 4.476251602172852, "learning_rate": 0.001, "loss": 1.7907, "step": 430640 }, { "epoch": 37.15458937198068, "grad_norm": 0.4609124958515167, "learning_rate": 0.001, "loss": 1.7949, "step": 430696 }, { "epoch": 37.15942028985507, "grad_norm": 0.329669326543808, "learning_rate": 0.001, "loss": 1.789, "step": 430752 }, { "epoch": 37.16425120772947, "grad_norm": 0.7025365829467773, "learning_rate": 0.001, "loss": 1.7933, "step": 430808 }, { "epoch": 37.169082125603865, "grad_norm": 0.4039738178253174, "learning_rate": 0.001, "loss": 1.8042, "step": 430864 }, { "epoch": 37.17391304347826, "grad_norm": 0.42757540941238403, "learning_rate": 0.001, "loss": 1.8012, "step": 430920 }, { "epoch": 37.17874396135266, "grad_norm": 0.4611194133758545, "learning_rate": 0.001, "loss": 1.8018, "step": 430976 }, { "epoch": 37.18357487922705, "grad_norm": 0.293576180934906, "learning_rate": 0.001, "loss": 1.8003, "step": 431032 }, { "epoch": 37.18840579710145, "grad_norm": 0.27772918343544006, "learning_rate": 0.001, "loss": 1.8137, "step": 431088 }, { "epoch": 37.193236714975846, "grad_norm": 0.37366431951522827, "learning_rate": 0.001, "loss": 1.8152, "step": 431144 }, { "epoch": 37.19806763285024, "grad_norm": 0.3329792618751526, "learning_rate": 0.001, "loss": 1.8063, "step": 431200 }, { "epoch": 37.20289855072464, "grad_norm": 0.41718485951423645, "learning_rate": 0.001, "loss": 1.7985, "step": 431256 }, { "epoch": 37.207729468599034, "grad_norm": 0.40818530321121216, "learning_rate": 0.001, "loss": 1.7845, "step": 431312 }, { "epoch": 37.21256038647343, "grad_norm": 2.585310697555542, "learning_rate": 0.001, "loss": 1.7871, "step": 431368 }, { "epoch": 37.21739130434783, "grad_norm": 0.973480761051178, "learning_rate": 0.001, "loss": 1.7868, "step": 431424 }, { "epoch": 37.22222222222222, "grad_norm": 0.24061378836631775, "learning_rate": 0.001, "loss": 1.7902, "step": 431480 }, { "epoch": 37.227053140096615, "grad_norm": 0.8591878414154053, "learning_rate": 0.001, "loss": 1.7931, "step": 431536 }, { "epoch": 37.231884057971016, "grad_norm": 0.6007813215255737, "learning_rate": 0.001, "loss": 1.798, "step": 431592 }, { "epoch": 37.23671497584541, "grad_norm": 0.4660511314868927, "learning_rate": 0.001, "loss": 1.7982, "step": 431648 }, { "epoch": 37.24154589371981, "grad_norm": 0.4099147319793701, "learning_rate": 0.001, "loss": 1.8043, "step": 431704 }, { "epoch": 37.2463768115942, "grad_norm": 0.3252065181732178, "learning_rate": 0.001, "loss": 1.802, "step": 431760 }, { "epoch": 37.2512077294686, "grad_norm": 0.38114872574806213, "learning_rate": 0.001, "loss": 1.8052, "step": 431816 }, { "epoch": 37.256038647343, "grad_norm": 1.8023308515548706, "learning_rate": 0.001, "loss": 1.8165, "step": 431872 }, { "epoch": 37.26086956521739, "grad_norm": 0.3416430950164795, "learning_rate": 0.001, "loss": 1.8023, "step": 431928 }, { "epoch": 37.265700483091784, "grad_norm": 0.28233957290649414, "learning_rate": 0.001, "loss": 1.8145, "step": 431984 }, { "epoch": 37.270531400966185, "grad_norm": 2.651761293411255, "learning_rate": 0.001, "loss": 1.7969, "step": 432040 }, { "epoch": 37.27536231884058, "grad_norm": 1.3144161701202393, "learning_rate": 0.001, "loss": 1.8034, "step": 432096 }, { "epoch": 37.28019323671498, "grad_norm": 0.48560410737991333, "learning_rate": 0.001, "loss": 1.7999, "step": 432152 }, { "epoch": 37.28502415458937, "grad_norm": 2.9764418601989746, "learning_rate": 0.001, "loss": 1.7947, "step": 432208 }, { "epoch": 37.289855072463766, "grad_norm": 0.43643227219581604, "learning_rate": 0.001, "loss": 1.8198, "step": 432264 }, { "epoch": 37.29468599033817, "grad_norm": 0.27631834149360657, "learning_rate": 0.001, "loss": 1.8048, "step": 432320 }, { "epoch": 37.29951690821256, "grad_norm": 2.0431299209594727, "learning_rate": 0.001, "loss": 1.8011, "step": 432376 }, { "epoch": 37.30434782608695, "grad_norm": 0.9546424746513367, "learning_rate": 0.001, "loss": 1.7984, "step": 432432 }, { "epoch": 37.309178743961354, "grad_norm": 0.5638821721076965, "learning_rate": 0.001, "loss": 1.8115, "step": 432488 }, { "epoch": 37.31400966183575, "grad_norm": 0.2640678584575653, "learning_rate": 0.001, "loss": 1.8161, "step": 432544 }, { "epoch": 37.31884057971015, "grad_norm": 0.3586031198501587, "learning_rate": 0.001, "loss": 1.8118, "step": 432600 }, { "epoch": 37.32367149758454, "grad_norm": 1.2178837060928345, "learning_rate": 0.001, "loss": 1.8083, "step": 432656 }, { "epoch": 37.328502415458935, "grad_norm": 0.4629884660243988, "learning_rate": 0.001, "loss": 1.7981, "step": 432712 }, { "epoch": 37.333333333333336, "grad_norm": 0.9612137079238892, "learning_rate": 0.001, "loss": 1.8076, "step": 432768 }, { "epoch": 37.33816425120773, "grad_norm": 1.936285138130188, "learning_rate": 0.001, "loss": 1.804, "step": 432824 }, { "epoch": 37.34299516908212, "grad_norm": 0.35145169496536255, "learning_rate": 0.001, "loss": 1.7977, "step": 432880 }, { "epoch": 37.34782608695652, "grad_norm": 0.29318633675575256, "learning_rate": 0.001, "loss": 1.8123, "step": 432936 }, { "epoch": 37.35265700483092, "grad_norm": 5.442259788513184, "learning_rate": 0.001, "loss": 1.8193, "step": 432992 }, { "epoch": 37.35748792270532, "grad_norm": 0.3427492082118988, "learning_rate": 0.001, "loss": 1.8012, "step": 433048 }, { "epoch": 37.36231884057971, "grad_norm": 0.6948018670082092, "learning_rate": 0.001, "loss": 1.7986, "step": 433104 }, { "epoch": 37.367149758454104, "grad_norm": 3.041978120803833, "learning_rate": 0.001, "loss": 1.8059, "step": 433160 }, { "epoch": 37.371980676328505, "grad_norm": 0.5605401396751404, "learning_rate": 0.001, "loss": 1.8107, "step": 433216 }, { "epoch": 37.3768115942029, "grad_norm": 0.474217027425766, "learning_rate": 0.001, "loss": 1.804, "step": 433272 }, { "epoch": 37.38164251207729, "grad_norm": 0.9622037410736084, "learning_rate": 0.001, "loss": 1.7973, "step": 433328 }, { "epoch": 37.38647342995169, "grad_norm": 0.6046218276023865, "learning_rate": 0.001, "loss": 1.7996, "step": 433384 }, { "epoch": 37.391304347826086, "grad_norm": 0.431861013174057, "learning_rate": 0.001, "loss": 1.8021, "step": 433440 }, { "epoch": 37.39613526570048, "grad_norm": 0.5137316584587097, "learning_rate": 0.001, "loss": 1.7906, "step": 433496 }, { "epoch": 37.40096618357488, "grad_norm": 0.7693114280700684, "learning_rate": 0.001, "loss": 1.7961, "step": 433552 }, { "epoch": 37.405797101449274, "grad_norm": 3.450913906097412, "learning_rate": 0.001, "loss": 1.7956, "step": 433608 }, { "epoch": 37.410628019323674, "grad_norm": 0.5693427324295044, "learning_rate": 0.001, "loss": 1.7947, "step": 433664 }, { "epoch": 37.41545893719807, "grad_norm": 0.4693599045276642, "learning_rate": 0.001, "loss": 1.7928, "step": 433720 }, { "epoch": 37.42028985507246, "grad_norm": 0.5289928913116455, "learning_rate": 0.001, "loss": 1.7972, "step": 433776 }, { "epoch": 37.42512077294686, "grad_norm": 0.7103660702705383, "learning_rate": 0.001, "loss": 1.7978, "step": 433832 }, { "epoch": 37.429951690821255, "grad_norm": 0.40108683705329895, "learning_rate": 0.001, "loss": 1.8, "step": 433888 }, { "epoch": 37.43478260869565, "grad_norm": 0.39831897616386414, "learning_rate": 0.001, "loss": 1.8046, "step": 433944 }, { "epoch": 37.43961352657005, "grad_norm": 0.31081414222717285, "learning_rate": 0.001, "loss": 1.809, "step": 434000 }, { "epoch": 37.44444444444444, "grad_norm": 0.352687805891037, "learning_rate": 0.001, "loss": 1.8034, "step": 434056 }, { "epoch": 37.44927536231884, "grad_norm": 0.2835441827774048, "learning_rate": 0.001, "loss": 1.7932, "step": 434112 }, { "epoch": 37.45410628019324, "grad_norm": 0.41584137082099915, "learning_rate": 0.001, "loss": 1.7963, "step": 434168 }, { "epoch": 37.45893719806763, "grad_norm": 0.6223008632659912, "learning_rate": 0.001, "loss": 1.7895, "step": 434224 }, { "epoch": 37.46376811594203, "grad_norm": 0.583615779876709, "learning_rate": 0.001, "loss": 1.7862, "step": 434280 }, { "epoch": 37.468599033816425, "grad_norm": 0.6217259168624878, "learning_rate": 0.001, "loss": 1.785, "step": 434336 }, { "epoch": 37.47342995169082, "grad_norm": 0.5850480794906616, "learning_rate": 0.001, "loss": 1.8086, "step": 434392 }, { "epoch": 37.47826086956522, "grad_norm": 0.6462048888206482, "learning_rate": 0.001, "loss": 1.8152, "step": 434448 }, { "epoch": 37.48309178743961, "grad_norm": 0.29878631234169006, "learning_rate": 0.001, "loss": 1.8147, "step": 434504 }, { "epoch": 37.48792270531401, "grad_norm": 0.536068856716156, "learning_rate": 0.001, "loss": 1.7956, "step": 434560 }, { "epoch": 37.492753623188406, "grad_norm": 4.235079288482666, "learning_rate": 0.001, "loss": 1.7907, "step": 434616 }, { "epoch": 37.4975845410628, "grad_norm": 3.0241127014160156, "learning_rate": 0.001, "loss": 1.803, "step": 434672 }, { "epoch": 37.5024154589372, "grad_norm": 0.44283756613731384, "learning_rate": 0.001, "loss": 1.8089, "step": 434728 }, { "epoch": 37.507246376811594, "grad_norm": 1.614266276359558, "learning_rate": 0.001, "loss": 1.8133, "step": 434784 }, { "epoch": 37.51207729468599, "grad_norm": 1.0691124200820923, "learning_rate": 0.001, "loss": 1.8129, "step": 434840 }, { "epoch": 37.51690821256039, "grad_norm": 0.4427920877933502, "learning_rate": 0.001, "loss": 1.8094, "step": 434896 }, { "epoch": 37.52173913043478, "grad_norm": 0.7578710317611694, "learning_rate": 0.001, "loss": 1.8117, "step": 434952 }, { "epoch": 37.52657004830918, "grad_norm": 0.7158226370811462, "learning_rate": 0.001, "loss": 1.8096, "step": 435008 }, { "epoch": 37.531400966183575, "grad_norm": 0.2901918590068817, "learning_rate": 0.001, "loss": 1.8226, "step": 435064 }, { "epoch": 37.53623188405797, "grad_norm": 2.127894163131714, "learning_rate": 0.001, "loss": 1.8172, "step": 435120 }, { "epoch": 37.54106280193237, "grad_norm": 0.6813138723373413, "learning_rate": 0.001, "loss": 1.8341, "step": 435176 }, { "epoch": 37.54589371980676, "grad_norm": 5.986727714538574, "learning_rate": 0.001, "loss": 1.8368, "step": 435232 }, { "epoch": 37.55072463768116, "grad_norm": 2.1489498615264893, "learning_rate": 0.001, "loss": 1.8319, "step": 435288 }, { "epoch": 37.55555555555556, "grad_norm": 0.4706360995769501, "learning_rate": 0.001, "loss": 1.8235, "step": 435344 }, { "epoch": 37.56038647342995, "grad_norm": 0.3429426848888397, "learning_rate": 0.001, "loss": 1.8246, "step": 435400 }, { "epoch": 37.56521739130435, "grad_norm": 0.8397558927536011, "learning_rate": 0.001, "loss": 1.8211, "step": 435456 }, { "epoch": 37.570048309178745, "grad_norm": 0.3322698771953583, "learning_rate": 0.001, "loss": 1.8294, "step": 435512 }, { "epoch": 37.57487922705314, "grad_norm": 0.5323711633682251, "learning_rate": 0.001, "loss": 1.8247, "step": 435568 }, { "epoch": 37.57971014492754, "grad_norm": 3.6785521507263184, "learning_rate": 0.001, "loss": 1.8132, "step": 435624 }, { "epoch": 37.58454106280193, "grad_norm": 0.3592097759246826, "learning_rate": 0.001, "loss": 1.8153, "step": 435680 }, { "epoch": 37.589371980676326, "grad_norm": 0.3200095295906067, "learning_rate": 0.001, "loss": 1.8093, "step": 435736 }, { "epoch": 37.594202898550726, "grad_norm": 0.38976943492889404, "learning_rate": 0.001, "loss": 1.821, "step": 435792 }, { "epoch": 37.59903381642512, "grad_norm": 1.1410948038101196, "learning_rate": 0.001, "loss": 1.8096, "step": 435848 }, { "epoch": 37.60386473429952, "grad_norm": 0.6095148324966431, "learning_rate": 0.001, "loss": 1.8093, "step": 435904 }, { "epoch": 37.608695652173914, "grad_norm": 18.66442108154297, "learning_rate": 0.001, "loss": 1.8018, "step": 435960 }, { "epoch": 37.61352657004831, "grad_norm": 0.5409610867500305, "learning_rate": 0.001, "loss": 1.8067, "step": 436016 }, { "epoch": 37.61835748792271, "grad_norm": 1.8446381092071533, "learning_rate": 0.001, "loss": 1.8061, "step": 436072 }, { "epoch": 37.6231884057971, "grad_norm": 0.6559701561927795, "learning_rate": 0.001, "loss": 1.8105, "step": 436128 }, { "epoch": 37.628019323671495, "grad_norm": 9.215534210205078, "learning_rate": 0.001, "loss": 1.8133, "step": 436184 }, { "epoch": 37.632850241545896, "grad_norm": 1.4116722345352173, "learning_rate": 0.001, "loss": 1.8015, "step": 436240 }, { "epoch": 37.63768115942029, "grad_norm": 0.5056512951850891, "learning_rate": 0.001, "loss": 1.7962, "step": 436296 }, { "epoch": 37.64251207729468, "grad_norm": 4.850395202636719, "learning_rate": 0.001, "loss": 1.8011, "step": 436352 }, { "epoch": 37.64734299516908, "grad_norm": 0.42203807830810547, "learning_rate": 0.001, "loss": 1.7961, "step": 436408 }, { "epoch": 37.65217391304348, "grad_norm": 1.1552599668502808, "learning_rate": 0.001, "loss": 1.8017, "step": 436464 }, { "epoch": 37.65700483091788, "grad_norm": 3.7905099391937256, "learning_rate": 0.001, "loss": 1.8094, "step": 436520 }, { "epoch": 37.66183574879227, "grad_norm": 2.45000958442688, "learning_rate": 0.001, "loss": 1.8208, "step": 436576 }, { "epoch": 37.666666666666664, "grad_norm": 6.391603946685791, "learning_rate": 0.001, "loss": 1.8252, "step": 436632 }, { "epoch": 37.671497584541065, "grad_norm": 2.3429369926452637, "learning_rate": 0.001, "loss": 1.8248, "step": 436688 }, { "epoch": 37.67632850241546, "grad_norm": 0.877004086971283, "learning_rate": 0.001, "loss": 1.8201, "step": 436744 }, { "epoch": 37.68115942028985, "grad_norm": 1.5893133878707886, "learning_rate": 0.001, "loss": 1.8314, "step": 436800 }, { "epoch": 37.68599033816425, "grad_norm": 0.9538088440895081, "learning_rate": 0.001, "loss": 1.8352, "step": 436856 }, { "epoch": 37.690821256038646, "grad_norm": 2.330740451812744, "learning_rate": 0.001, "loss": 1.8218, "step": 436912 }, { "epoch": 37.69565217391305, "grad_norm": 0.978886604309082, "learning_rate": 0.001, "loss": 1.8295, "step": 436968 }, { "epoch": 37.70048309178744, "grad_norm": 0.2669030427932739, "learning_rate": 0.001, "loss": 1.8212, "step": 437024 }, { "epoch": 37.70531400966183, "grad_norm": 0.3462893068790436, "learning_rate": 0.001, "loss": 1.8221, "step": 437080 }, { "epoch": 37.710144927536234, "grad_norm": 3.944455623626709, "learning_rate": 0.001, "loss": 1.8215, "step": 437136 }, { "epoch": 37.71497584541063, "grad_norm": 1.0964136123657227, "learning_rate": 0.001, "loss": 1.8406, "step": 437192 }, { "epoch": 37.71980676328502, "grad_norm": 0.624636709690094, "learning_rate": 0.001, "loss": 1.8371, "step": 437248 }, { "epoch": 37.72463768115942, "grad_norm": 0.44444236159324646, "learning_rate": 0.001, "loss": 1.8433, "step": 437304 }, { "epoch": 37.729468599033815, "grad_norm": 1.2471266984939575, "learning_rate": 0.001, "loss": 1.8325, "step": 437360 }, { "epoch": 37.734299516908216, "grad_norm": 0.4111259877681732, "learning_rate": 0.001, "loss": 1.8339, "step": 437416 }, { "epoch": 37.73913043478261, "grad_norm": 0.7006278038024902, "learning_rate": 0.001, "loss": 1.8322, "step": 437472 }, { "epoch": 37.743961352657, "grad_norm": 5.651107311248779, "learning_rate": 0.001, "loss": 1.8262, "step": 437528 }, { "epoch": 37.7487922705314, "grad_norm": 0.7450050115585327, "learning_rate": 0.001, "loss": 1.8208, "step": 437584 }, { "epoch": 37.7536231884058, "grad_norm": 0.4873601794242859, "learning_rate": 0.001, "loss": 1.8117, "step": 437640 }, { "epoch": 37.75845410628019, "grad_norm": 4.328714847564697, "learning_rate": 0.001, "loss": 1.8181, "step": 437696 }, { "epoch": 37.76328502415459, "grad_norm": 3.2995386123657227, "learning_rate": 0.001, "loss": 1.8228, "step": 437752 }, { "epoch": 37.768115942028984, "grad_norm": 0.4106883704662323, "learning_rate": 0.001, "loss": 1.8327, "step": 437808 }, { "epoch": 37.772946859903385, "grad_norm": 4.684328556060791, "learning_rate": 0.001, "loss": 1.8316, "step": 437864 }, { "epoch": 37.77777777777778, "grad_norm": 2.6593568325042725, "learning_rate": 0.001, "loss": 1.8191, "step": 437920 }, { "epoch": 37.78260869565217, "grad_norm": 1.1616255044937134, "learning_rate": 0.001, "loss": 1.8286, "step": 437976 }, { "epoch": 37.78743961352657, "grad_norm": 0.3611791431903839, "learning_rate": 0.001, "loss": 1.8464, "step": 438032 }, { "epoch": 37.792270531400966, "grad_norm": 1.3245964050292969, "learning_rate": 0.001, "loss": 1.8454, "step": 438088 }, { "epoch": 37.79710144927536, "grad_norm": 2.312638998031616, "learning_rate": 0.001, "loss": 1.8241, "step": 438144 }, { "epoch": 37.80193236714976, "grad_norm": 0.4818379282951355, "learning_rate": 0.001, "loss": 1.8168, "step": 438200 }, { "epoch": 37.806763285024154, "grad_norm": 0.840203046798706, "learning_rate": 0.001, "loss": 1.8181, "step": 438256 }, { "epoch": 37.81159420289855, "grad_norm": 1.3683149814605713, "learning_rate": 0.001, "loss": 1.8234, "step": 438312 }, { "epoch": 37.81642512077295, "grad_norm": 0.42114439606666565, "learning_rate": 0.001, "loss": 1.8244, "step": 438368 }, { "epoch": 37.82125603864734, "grad_norm": 2.7872049808502197, "learning_rate": 0.001, "loss": 1.8222, "step": 438424 }, { "epoch": 37.82608695652174, "grad_norm": 0.3368990123271942, "learning_rate": 0.001, "loss": 1.8215, "step": 438480 }, { "epoch": 37.830917874396135, "grad_norm": 2.037951946258545, "learning_rate": 0.001, "loss": 1.8196, "step": 438536 }, { "epoch": 37.83574879227053, "grad_norm": 2.174989700317383, "learning_rate": 0.001, "loss": 1.8128, "step": 438592 }, { "epoch": 37.84057971014493, "grad_norm": 7.981287479400635, "learning_rate": 0.001, "loss": 1.8251, "step": 438648 }, { "epoch": 37.84541062801932, "grad_norm": 0.345838725566864, "learning_rate": 0.001, "loss": 1.8234, "step": 438704 }, { "epoch": 37.85024154589372, "grad_norm": 0.6326851844787598, "learning_rate": 0.001, "loss": 1.8334, "step": 438760 }, { "epoch": 37.85507246376812, "grad_norm": 0.7998091578483582, "learning_rate": 0.001, "loss": 1.8256, "step": 438816 }, { "epoch": 37.85990338164251, "grad_norm": 0.409668505191803, "learning_rate": 0.001, "loss": 1.832, "step": 438872 }, { "epoch": 37.86473429951691, "grad_norm": 0.9348124265670776, "learning_rate": 0.001, "loss": 1.8295, "step": 438928 }, { "epoch": 37.869565217391305, "grad_norm": 1.647357702255249, "learning_rate": 0.001, "loss": 1.8163, "step": 438984 }, { "epoch": 37.8743961352657, "grad_norm": 3.369560718536377, "learning_rate": 0.001, "loss": 1.8163, "step": 439040 }, { "epoch": 37.8792270531401, "grad_norm": 0.2613995373249054, "learning_rate": 0.001, "loss": 1.8299, "step": 439096 }, { "epoch": 37.88405797101449, "grad_norm": 1.1269335746765137, "learning_rate": 0.001, "loss": 1.821, "step": 439152 }, { "epoch": 37.888888888888886, "grad_norm": 1.0456128120422363, "learning_rate": 0.001, "loss": 1.8262, "step": 439208 }, { "epoch": 37.893719806763286, "grad_norm": 0.6296815872192383, "learning_rate": 0.001, "loss": 1.8157, "step": 439264 }, { "epoch": 37.89855072463768, "grad_norm": 0.9080009460449219, "learning_rate": 0.001, "loss": 1.8186, "step": 439320 }, { "epoch": 37.90338164251208, "grad_norm": 2.7807295322418213, "learning_rate": 0.001, "loss": 1.8136, "step": 439376 }, { "epoch": 37.908212560386474, "grad_norm": 0.3830980956554413, "learning_rate": 0.001, "loss": 1.815, "step": 439432 }, { "epoch": 37.91304347826087, "grad_norm": 1.161338210105896, "learning_rate": 0.001, "loss": 1.8177, "step": 439488 }, { "epoch": 37.91787439613527, "grad_norm": 0.2936462163925171, "learning_rate": 0.001, "loss": 1.8078, "step": 439544 }, { "epoch": 37.92270531400966, "grad_norm": 0.42556387186050415, "learning_rate": 0.001, "loss": 1.8162, "step": 439600 }, { "epoch": 37.927536231884055, "grad_norm": 0.28779345750808716, "learning_rate": 0.001, "loss": 1.8254, "step": 439656 }, { "epoch": 37.932367149758456, "grad_norm": 3.3047831058502197, "learning_rate": 0.001, "loss": 1.8157, "step": 439712 }, { "epoch": 37.93719806763285, "grad_norm": 0.4491784870624542, "learning_rate": 0.001, "loss": 1.8183, "step": 439768 }, { "epoch": 37.94202898550725, "grad_norm": 0.6522452235221863, "learning_rate": 0.001, "loss": 1.8112, "step": 439824 }, { "epoch": 37.94685990338164, "grad_norm": 4.115870952606201, "learning_rate": 0.001, "loss": 1.8056, "step": 439880 }, { "epoch": 37.95169082125604, "grad_norm": 0.29169243574142456, "learning_rate": 0.001, "loss": 1.8111, "step": 439936 }, { "epoch": 37.95652173913044, "grad_norm": 0.29916447401046753, "learning_rate": 0.001, "loss": 1.8048, "step": 439992 }, { "epoch": 37.96135265700483, "grad_norm": 0.3145538568496704, "learning_rate": 0.001, "loss": 1.8082, "step": 440048 }, { "epoch": 37.966183574879224, "grad_norm": 1.2632999420166016, "learning_rate": 0.001, "loss": 1.8085, "step": 440104 }, { "epoch": 37.971014492753625, "grad_norm": 0.5762110948562622, "learning_rate": 0.001, "loss": 1.8175, "step": 440160 }, { "epoch": 37.97584541062802, "grad_norm": 0.4545021951198578, "learning_rate": 0.001, "loss": 1.8072, "step": 440216 }, { "epoch": 37.98067632850242, "grad_norm": 0.355076402425766, "learning_rate": 0.001, "loss": 1.8085, "step": 440272 }, { "epoch": 37.98550724637681, "grad_norm": 0.864734947681427, "learning_rate": 0.001, "loss": 1.8002, "step": 440328 }, { "epoch": 37.990338164251206, "grad_norm": 0.3194849193096161, "learning_rate": 0.001, "loss": 1.8037, "step": 440384 }, { "epoch": 37.99516908212561, "grad_norm": 3.473010540008545, "learning_rate": 0.001, "loss": 1.8042, "step": 440440 }, { "epoch": 38.0, "grad_norm": 0.332484632730484, "learning_rate": 0.001, "loss": 1.8108, "step": 440496 }, { "epoch": 38.00483091787439, "grad_norm": 0.2825302183628082, "learning_rate": 0.001, "loss": 1.7689, "step": 440552 }, { "epoch": 38.009661835748794, "grad_norm": 0.2533167004585266, "learning_rate": 0.001, "loss": 1.7675, "step": 440608 }, { "epoch": 38.01449275362319, "grad_norm": 0.35121768712997437, "learning_rate": 0.001, "loss": 1.774, "step": 440664 }, { "epoch": 38.01932367149758, "grad_norm": 0.6180436611175537, "learning_rate": 0.001, "loss": 1.7751, "step": 440720 }, { "epoch": 38.02415458937198, "grad_norm": 1.3262364864349365, "learning_rate": 0.001, "loss": 1.7715, "step": 440776 }, { "epoch": 38.028985507246375, "grad_norm": 0.6194058656692505, "learning_rate": 0.001, "loss": 1.7787, "step": 440832 }, { "epoch": 38.033816425120776, "grad_norm": 1.1052429676055908, "learning_rate": 0.001, "loss": 1.7802, "step": 440888 }, { "epoch": 38.03864734299517, "grad_norm": 0.3435405492782593, "learning_rate": 0.001, "loss": 1.7802, "step": 440944 }, { "epoch": 38.04347826086956, "grad_norm": 0.6084678769111633, "learning_rate": 0.001, "loss": 1.7722, "step": 441000 }, { "epoch": 38.04830917874396, "grad_norm": 0.2961541414260864, "learning_rate": 0.001, "loss": 1.7763, "step": 441056 }, { "epoch": 38.05314009661836, "grad_norm": 0.6770376563072205, "learning_rate": 0.001, "loss": 1.7762, "step": 441112 }, { "epoch": 38.05797101449275, "grad_norm": 0.408786416053772, "learning_rate": 0.001, "loss": 1.7786, "step": 441168 }, { "epoch": 38.06280193236715, "grad_norm": 6.636822700500488, "learning_rate": 0.001, "loss": 1.7731, "step": 441224 }, { "epoch": 38.067632850241544, "grad_norm": 0.7064849138259888, "learning_rate": 0.001, "loss": 1.7744, "step": 441280 }, { "epoch": 38.072463768115945, "grad_norm": 0.28918537497520447, "learning_rate": 0.001, "loss": 1.7723, "step": 441336 }, { "epoch": 38.07729468599034, "grad_norm": 2.697930335998535, "learning_rate": 0.001, "loss": 1.7797, "step": 441392 }, { "epoch": 38.08212560386473, "grad_norm": 1.2852022647857666, "learning_rate": 0.001, "loss": 1.8785, "step": 441448 }, { "epoch": 38.08695652173913, "grad_norm": 0.3710173964500427, "learning_rate": 0.001, "loss": 1.792, "step": 441504 }, { "epoch": 38.091787439613526, "grad_norm": 1.2837814092636108, "learning_rate": 0.001, "loss": 1.8071, "step": 441560 }, { "epoch": 38.09661835748792, "grad_norm": 0.7191460132598877, "learning_rate": 0.001, "loss": 1.8064, "step": 441616 }, { "epoch": 38.10144927536232, "grad_norm": 0.28805872797966003, "learning_rate": 0.001, "loss": 1.7816, "step": 441672 }, { "epoch": 38.106280193236714, "grad_norm": 0.2820066809654236, "learning_rate": 0.001, "loss": 1.7803, "step": 441728 }, { "epoch": 38.111111111111114, "grad_norm": 0.4016064703464508, "learning_rate": 0.001, "loss": 1.7923, "step": 441784 }, { "epoch": 38.11594202898551, "grad_norm": 0.364282488822937, "learning_rate": 0.001, "loss": 1.7808, "step": 441840 }, { "epoch": 38.1207729468599, "grad_norm": 0.4857824742794037, "learning_rate": 0.001, "loss": 1.7788, "step": 441896 }, { "epoch": 38.1256038647343, "grad_norm": 1.052676796913147, "learning_rate": 0.001, "loss": 1.7754, "step": 441952 }, { "epoch": 38.130434782608695, "grad_norm": 0.30089664459228516, "learning_rate": 0.001, "loss": 1.7835, "step": 442008 }, { "epoch": 38.13526570048309, "grad_norm": 0.8481897711753845, "learning_rate": 0.001, "loss": 1.7737, "step": 442064 }, { "epoch": 38.14009661835749, "grad_norm": 0.33267903327941895, "learning_rate": 0.001, "loss": 1.7757, "step": 442120 }, { "epoch": 38.14492753623188, "grad_norm": 0.990314245223999, "learning_rate": 0.001, "loss": 1.7883, "step": 442176 }, { "epoch": 38.14975845410628, "grad_norm": 5.142291069030762, "learning_rate": 0.001, "loss": 1.785, "step": 442232 }, { "epoch": 38.15458937198068, "grad_norm": 0.24889129400253296, "learning_rate": 0.001, "loss": 1.778, "step": 442288 }, { "epoch": 38.15942028985507, "grad_norm": 0.291190505027771, "learning_rate": 0.001, "loss": 1.7728, "step": 442344 }, { "epoch": 38.16425120772947, "grad_norm": 0.2937721014022827, "learning_rate": 0.001, "loss": 1.7762, "step": 442400 }, { "epoch": 38.169082125603865, "grad_norm": 1.039170265197754, "learning_rate": 0.001, "loss": 1.785, "step": 442456 }, { "epoch": 38.17391304347826, "grad_norm": 0.39599063992500305, "learning_rate": 0.001, "loss": 1.779, "step": 442512 }, { "epoch": 38.17874396135266, "grad_norm": 1.6691745519638062, "learning_rate": 0.001, "loss": 1.7779, "step": 442568 }, { "epoch": 38.18357487922705, "grad_norm": 0.6476718187332153, "learning_rate": 0.001, "loss": 1.8102, "step": 442624 }, { "epoch": 38.18840579710145, "grad_norm": 0.2565389573574066, "learning_rate": 0.001, "loss": 1.7822, "step": 442680 }, { "epoch": 38.193236714975846, "grad_norm": 1.7020325660705566, "learning_rate": 0.001, "loss": 1.7774, "step": 442736 }, { "epoch": 38.19806763285024, "grad_norm": 0.2932084798812866, "learning_rate": 0.001, "loss": 1.7862, "step": 442792 }, { "epoch": 38.20289855072464, "grad_norm": 1.7871392965316772, "learning_rate": 0.001, "loss": 1.7955, "step": 442848 }, { "epoch": 38.207729468599034, "grad_norm": 0.4320087730884552, "learning_rate": 0.001, "loss": 1.7818, "step": 442904 }, { "epoch": 38.21256038647343, "grad_norm": 5.316151142120361, "learning_rate": 0.001, "loss": 1.7836, "step": 442960 }, { "epoch": 38.21739130434783, "grad_norm": 0.3735989034175873, "learning_rate": 0.001, "loss": 1.7914, "step": 443016 }, { "epoch": 38.22222222222222, "grad_norm": 0.3353160619735718, "learning_rate": 0.001, "loss": 1.7765, "step": 443072 }, { "epoch": 38.227053140096615, "grad_norm": 1.1306794881820679, "learning_rate": 0.001, "loss": 1.779, "step": 443128 }, { "epoch": 38.231884057971016, "grad_norm": 0.3718976378440857, "learning_rate": 0.001, "loss": 1.7741, "step": 443184 }, { "epoch": 38.23671497584541, "grad_norm": 0.314972460269928, "learning_rate": 0.001, "loss": 1.7859, "step": 443240 }, { "epoch": 38.24154589371981, "grad_norm": 0.3694324493408203, "learning_rate": 0.001, "loss": 1.7819, "step": 443296 }, { "epoch": 38.2463768115942, "grad_norm": 0.3555459976196289, "learning_rate": 0.001, "loss": 1.7874, "step": 443352 }, { "epoch": 38.2512077294686, "grad_norm": 0.32692813873291016, "learning_rate": 0.001, "loss": 1.776, "step": 443408 }, { "epoch": 38.256038647343, "grad_norm": 0.34501805901527405, "learning_rate": 0.001, "loss": 1.7816, "step": 443464 }, { "epoch": 38.26086956521739, "grad_norm": 0.30052995681762695, "learning_rate": 0.001, "loss": 1.7749, "step": 443520 }, { "epoch": 38.265700483091784, "grad_norm": 0.461503803730011, "learning_rate": 0.001, "loss": 1.7776, "step": 443576 }, { "epoch": 38.270531400966185, "grad_norm": 0.948097825050354, "learning_rate": 0.001, "loss": 1.7719, "step": 443632 }, { "epoch": 38.27536231884058, "grad_norm": 1.2265740633010864, "learning_rate": 0.001, "loss": 1.7774, "step": 443688 }, { "epoch": 38.28019323671498, "grad_norm": 0.27604612708091736, "learning_rate": 0.001, "loss": 1.7754, "step": 443744 }, { "epoch": 38.28502415458937, "grad_norm": 0.3369390070438385, "learning_rate": 0.001, "loss": 1.7773, "step": 443800 }, { "epoch": 38.289855072463766, "grad_norm": 0.5619394183158875, "learning_rate": 0.001, "loss": 1.7741, "step": 443856 }, { "epoch": 38.29468599033817, "grad_norm": 1.4710376262664795, "learning_rate": 0.001, "loss": 1.7719, "step": 443912 }, { "epoch": 38.29951690821256, "grad_norm": 0.9131851196289062, "learning_rate": 0.001, "loss": 1.7867, "step": 443968 }, { "epoch": 38.30434782608695, "grad_norm": 0.29235509037971497, "learning_rate": 0.001, "loss": 1.7812, "step": 444024 }, { "epoch": 38.309178743961354, "grad_norm": 0.7638712525367737, "learning_rate": 0.001, "loss": 1.7784, "step": 444080 }, { "epoch": 38.31400966183575, "grad_norm": 0.7243824601173401, "learning_rate": 0.001, "loss": 1.7809, "step": 444136 }, { "epoch": 38.31884057971015, "grad_norm": 0.5621315240859985, "learning_rate": 0.001, "loss": 1.7798, "step": 444192 }, { "epoch": 38.32367149758454, "grad_norm": 0.5194664001464844, "learning_rate": 0.001, "loss": 1.782, "step": 444248 }, { "epoch": 38.328502415458935, "grad_norm": 0.25789618492126465, "learning_rate": 0.001, "loss": 1.7753, "step": 444304 }, { "epoch": 38.333333333333336, "grad_norm": 0.3064212501049042, "learning_rate": 0.001, "loss": 1.771, "step": 444360 }, { "epoch": 38.33816425120773, "grad_norm": 0.7157604694366455, "learning_rate": 0.001, "loss": 1.7749, "step": 444416 }, { "epoch": 38.34299516908212, "grad_norm": 0.6054872274398804, "learning_rate": 0.001, "loss": 1.7764, "step": 444472 }, { "epoch": 38.34782608695652, "grad_norm": 0.3910652995109558, "learning_rate": 0.001, "loss": 1.7729, "step": 444528 }, { "epoch": 38.35265700483092, "grad_norm": 2.0726563930511475, "learning_rate": 0.001, "loss": 1.765, "step": 444584 }, { "epoch": 38.35748792270532, "grad_norm": 0.8810856938362122, "learning_rate": 0.001, "loss": 1.775, "step": 444640 }, { "epoch": 38.36231884057971, "grad_norm": 0.35898667573928833, "learning_rate": 0.001, "loss": 1.7724, "step": 444696 }, { "epoch": 38.367149758454104, "grad_norm": 0.291034072637558, "learning_rate": 0.001, "loss": 1.7765, "step": 444752 }, { "epoch": 38.371980676328505, "grad_norm": 0.32668593525886536, "learning_rate": 0.001, "loss": 1.7778, "step": 444808 }, { "epoch": 38.3768115942029, "grad_norm": 0.3217102587223053, "learning_rate": 0.001, "loss": 1.7677, "step": 444864 }, { "epoch": 38.38164251207729, "grad_norm": 0.3390410244464874, "learning_rate": 0.001, "loss": 1.7914, "step": 444920 }, { "epoch": 38.38647342995169, "grad_norm": 13.918383598327637, "learning_rate": 0.001, "loss": 1.808, "step": 444976 }, { "epoch": 38.391304347826086, "grad_norm": 0.9293938279151917, "learning_rate": 0.001, "loss": 1.786, "step": 445032 }, { "epoch": 38.39613526570048, "grad_norm": 0.3694336414337158, "learning_rate": 0.001, "loss": 1.7859, "step": 445088 }, { "epoch": 38.40096618357488, "grad_norm": 0.7076948881149292, "learning_rate": 0.001, "loss": 1.7839, "step": 445144 }, { "epoch": 38.405797101449274, "grad_norm": 1.7282414436340332, "learning_rate": 0.001, "loss": 1.8068, "step": 445200 }, { "epoch": 38.410628019323674, "grad_norm": 1.0252416133880615, "learning_rate": 0.001, "loss": 1.8011, "step": 445256 }, { "epoch": 38.41545893719807, "grad_norm": 2.7271792888641357, "learning_rate": 0.001, "loss": 1.797, "step": 445312 }, { "epoch": 38.42028985507246, "grad_norm": 2.166074275970459, "learning_rate": 0.001, "loss": 1.817, "step": 445368 }, { "epoch": 38.42512077294686, "grad_norm": 0.7717810273170471, "learning_rate": 0.001, "loss": 1.819, "step": 445424 }, { "epoch": 38.429951690821255, "grad_norm": 0.3540348708629608, "learning_rate": 0.001, "loss": 1.7968, "step": 445480 }, { "epoch": 38.43478260869565, "grad_norm": 0.3778064250946045, "learning_rate": 0.001, "loss": 1.7916, "step": 445536 }, { "epoch": 38.43961352657005, "grad_norm": 0.38323456048965454, "learning_rate": 0.001, "loss": 1.7911, "step": 445592 }, { "epoch": 38.44444444444444, "grad_norm": 0.45393216609954834, "learning_rate": 0.001, "loss": 1.796, "step": 445648 }, { "epoch": 38.44927536231884, "grad_norm": 0.28253409266471863, "learning_rate": 0.001, "loss": 1.793, "step": 445704 }, { "epoch": 38.45410628019324, "grad_norm": 1.1290203332901, "learning_rate": 0.001, "loss": 1.7912, "step": 445760 }, { "epoch": 38.45893719806763, "grad_norm": 1.317454218864441, "learning_rate": 0.001, "loss": 1.7873, "step": 445816 }, { "epoch": 38.46376811594203, "grad_norm": 0.3048466742038727, "learning_rate": 0.001, "loss": 1.7959, "step": 445872 }, { "epoch": 38.468599033816425, "grad_norm": 0.3637180030345917, "learning_rate": 0.001, "loss": 1.7958, "step": 445928 }, { "epoch": 38.47342995169082, "grad_norm": 0.6282294392585754, "learning_rate": 0.001, "loss": 1.791, "step": 445984 }, { "epoch": 38.47826086956522, "grad_norm": 0.36203038692474365, "learning_rate": 0.001, "loss": 1.7847, "step": 446040 }, { "epoch": 38.48309178743961, "grad_norm": 0.6138783693313599, "learning_rate": 0.001, "loss": 1.7842, "step": 446096 }, { "epoch": 38.48792270531401, "grad_norm": 0.5937867760658264, "learning_rate": 0.001, "loss": 1.785, "step": 446152 }, { "epoch": 38.492753623188406, "grad_norm": 0.2579716145992279, "learning_rate": 0.001, "loss": 1.7968, "step": 446208 }, { "epoch": 38.4975845410628, "grad_norm": 0.27835163474082947, "learning_rate": 0.001, "loss": 1.7888, "step": 446264 }, { "epoch": 38.5024154589372, "grad_norm": 1.1484776735305786, "learning_rate": 0.001, "loss": 1.7935, "step": 446320 }, { "epoch": 38.507246376811594, "grad_norm": 0.6424139738082886, "learning_rate": 0.001, "loss": 1.7849, "step": 446376 }, { "epoch": 38.51207729468599, "grad_norm": 2.0825445652008057, "learning_rate": 0.001, "loss": 1.7953, "step": 446432 }, { "epoch": 38.51690821256039, "grad_norm": 1.0100948810577393, "learning_rate": 0.001, "loss": 1.782, "step": 446488 }, { "epoch": 38.52173913043478, "grad_norm": 5.140964984893799, "learning_rate": 0.001, "loss": 1.7905, "step": 446544 }, { "epoch": 38.52657004830918, "grad_norm": 0.67718505859375, "learning_rate": 0.001, "loss": 1.8007, "step": 446600 }, { "epoch": 38.531400966183575, "grad_norm": 0.3639608919620514, "learning_rate": 0.001, "loss": 1.7993, "step": 446656 }, { "epoch": 38.53623188405797, "grad_norm": 2.3734257221221924, "learning_rate": 0.001, "loss": 1.7981, "step": 446712 }, { "epoch": 38.54106280193237, "grad_norm": 0.9096109867095947, "learning_rate": 0.001, "loss": 1.8082, "step": 446768 }, { "epoch": 38.54589371980676, "grad_norm": 0.24499398469924927, "learning_rate": 0.001, "loss": 1.7974, "step": 446824 }, { "epoch": 38.55072463768116, "grad_norm": 0.6300747990608215, "learning_rate": 0.001, "loss": 1.82, "step": 446880 }, { "epoch": 38.55555555555556, "grad_norm": 1.2933855056762695, "learning_rate": 0.001, "loss": 1.8093, "step": 446936 }, { "epoch": 38.56038647342995, "grad_norm": 0.4451667070388794, "learning_rate": 0.001, "loss": 1.7919, "step": 446992 }, { "epoch": 38.56521739130435, "grad_norm": 0.8109222054481506, "learning_rate": 0.001, "loss": 1.804, "step": 447048 }, { "epoch": 38.570048309178745, "grad_norm": 2.574232339859009, "learning_rate": 0.001, "loss": 1.7835, "step": 447104 }, { "epoch": 38.57487922705314, "grad_norm": 0.6178728342056274, "learning_rate": 0.001, "loss": 1.7902, "step": 447160 }, { "epoch": 38.57971014492754, "grad_norm": 0.3826315999031067, "learning_rate": 0.001, "loss": 1.7842, "step": 447216 }, { "epoch": 38.58454106280193, "grad_norm": 0.554108738899231, "learning_rate": 0.001, "loss": 1.7956, "step": 447272 }, { "epoch": 38.589371980676326, "grad_norm": 0.6082323789596558, "learning_rate": 0.001, "loss": 1.7986, "step": 447328 }, { "epoch": 38.594202898550726, "grad_norm": 0.25101929903030396, "learning_rate": 0.001, "loss": 1.7934, "step": 447384 }, { "epoch": 38.59903381642512, "grad_norm": 0.3121839165687561, "learning_rate": 0.001, "loss": 1.7992, "step": 447440 }, { "epoch": 38.60386473429952, "grad_norm": 1.3409173488616943, "learning_rate": 0.001, "loss": 1.8043, "step": 447496 }, { "epoch": 38.608695652173914, "grad_norm": 1.3905839920043945, "learning_rate": 0.001, "loss": 1.7956, "step": 447552 }, { "epoch": 38.61352657004831, "grad_norm": 0.3227103054523468, "learning_rate": 0.001, "loss": 1.7891, "step": 447608 }, { "epoch": 38.61835748792271, "grad_norm": 3.4038619995117188, "learning_rate": 0.001, "loss": 1.7875, "step": 447664 }, { "epoch": 38.6231884057971, "grad_norm": 0.47102850675582886, "learning_rate": 0.001, "loss": 1.7845, "step": 447720 }, { "epoch": 38.628019323671495, "grad_norm": 0.42438656091690063, "learning_rate": 0.001, "loss": 1.7825, "step": 447776 }, { "epoch": 38.632850241545896, "grad_norm": 0.4077920913696289, "learning_rate": 0.001, "loss": 1.7966, "step": 447832 }, { "epoch": 38.63768115942029, "grad_norm": 0.2559696435928345, "learning_rate": 0.001, "loss": 1.7868, "step": 447888 }, { "epoch": 38.64251207729468, "grad_norm": 0.7494938969612122, "learning_rate": 0.001, "loss": 1.7937, "step": 447944 }, { "epoch": 38.64734299516908, "grad_norm": 1.1823296546936035, "learning_rate": 0.001, "loss": 1.7922, "step": 448000 }, { "epoch": 38.65217391304348, "grad_norm": 0.3893846571445465, "learning_rate": 0.001, "loss": 1.7848, "step": 448056 }, { "epoch": 38.65700483091788, "grad_norm": 0.5964910984039307, "learning_rate": 0.001, "loss": 1.7875, "step": 448112 }, { "epoch": 38.66183574879227, "grad_norm": 6.036163330078125, "learning_rate": 0.001, "loss": 1.79, "step": 448168 }, { "epoch": 38.666666666666664, "grad_norm": 0.3920939564704895, "learning_rate": 0.001, "loss": 1.7881, "step": 448224 }, { "epoch": 38.671497584541065, "grad_norm": 2.3875820636749268, "learning_rate": 0.001, "loss": 1.7931, "step": 448280 }, { "epoch": 38.67632850241546, "grad_norm": 0.54966801404953, "learning_rate": 0.001, "loss": 1.7957, "step": 448336 }, { "epoch": 38.68115942028985, "grad_norm": 0.34609076380729675, "learning_rate": 0.001, "loss": 1.7956, "step": 448392 }, { "epoch": 38.68599033816425, "grad_norm": 0.30398377776145935, "learning_rate": 0.001, "loss": 1.789, "step": 448448 }, { "epoch": 38.690821256038646, "grad_norm": 4.372628211975098, "learning_rate": 0.001, "loss": 1.7921, "step": 448504 }, { "epoch": 38.69565217391305, "grad_norm": 0.35899874567985535, "learning_rate": 0.001, "loss": 1.8016, "step": 448560 }, { "epoch": 38.70048309178744, "grad_norm": 0.6010347008705139, "learning_rate": 0.001, "loss": 1.7974, "step": 448616 }, { "epoch": 38.70531400966183, "grad_norm": 1.5906885862350464, "learning_rate": 0.001, "loss": 1.7986, "step": 448672 }, { "epoch": 38.710144927536234, "grad_norm": 0.32103240489959717, "learning_rate": 0.001, "loss": 1.7982, "step": 448728 }, { "epoch": 38.71497584541063, "grad_norm": 0.5428378582000732, "learning_rate": 0.001, "loss": 1.8044, "step": 448784 }, { "epoch": 38.71980676328502, "grad_norm": 0.8739556670188904, "learning_rate": 0.001, "loss": 1.8074, "step": 448840 }, { "epoch": 38.72463768115942, "grad_norm": 1.605176568031311, "learning_rate": 0.001, "loss": 1.8207, "step": 448896 }, { "epoch": 38.729468599033815, "grad_norm": 0.6749762296676636, "learning_rate": 0.001, "loss": 1.81, "step": 448952 }, { "epoch": 38.734299516908216, "grad_norm": 0.4253573417663574, "learning_rate": 0.001, "loss": 1.8054, "step": 449008 }, { "epoch": 38.73913043478261, "grad_norm": 0.5799744725227356, "learning_rate": 0.001, "loss": 1.8014, "step": 449064 }, { "epoch": 38.743961352657, "grad_norm": 1.4793938398361206, "learning_rate": 0.001, "loss": 1.7994, "step": 449120 }, { "epoch": 38.7487922705314, "grad_norm": 0.6273636221885681, "learning_rate": 0.001, "loss": 1.8021, "step": 449176 }, { "epoch": 38.7536231884058, "grad_norm": 1.5249005556106567, "learning_rate": 0.001, "loss": 1.9177, "step": 449232 }, { "epoch": 38.75845410628019, "grad_norm": 3.6143224239349365, "learning_rate": 0.001, "loss": 1.9515, "step": 449288 }, { "epoch": 38.76328502415459, "grad_norm": 0.4091202914714813, "learning_rate": 0.001, "loss": 1.8506, "step": 449344 }, { "epoch": 38.768115942028984, "grad_norm": 3.436753988265991, "learning_rate": 0.001, "loss": 1.8187, "step": 449400 }, { "epoch": 38.772946859903385, "grad_norm": 0.710066020488739, "learning_rate": 0.001, "loss": 1.8209, "step": 449456 }, { "epoch": 38.77777777777778, "grad_norm": 0.4235520362854004, "learning_rate": 0.001, "loss": 1.8059, "step": 449512 }, { "epoch": 38.78260869565217, "grad_norm": 0.5013096332550049, "learning_rate": 0.001, "loss": 1.8003, "step": 449568 }, { "epoch": 38.78743961352657, "grad_norm": 1.1082162857055664, "learning_rate": 0.001, "loss": 1.802, "step": 449624 }, { "epoch": 38.792270531400966, "grad_norm": 0.8056930303573608, "learning_rate": 0.001, "loss": 1.7974, "step": 449680 }, { "epoch": 38.79710144927536, "grad_norm": 0.756944477558136, "learning_rate": 0.001, "loss": 1.8018, "step": 449736 }, { "epoch": 38.80193236714976, "grad_norm": 0.9262315034866333, "learning_rate": 0.001, "loss": 1.7931, "step": 449792 }, { "epoch": 38.806763285024154, "grad_norm": 2.1059131622314453, "learning_rate": 0.001, "loss": 1.7953, "step": 449848 }, { "epoch": 38.81159420289855, "grad_norm": 0.38653042912483215, "learning_rate": 0.001, "loss": 1.8032, "step": 449904 }, { "epoch": 38.81642512077295, "grad_norm": 2.1234281063079834, "learning_rate": 0.001, "loss": 1.7962, "step": 449960 }, { "epoch": 38.82125603864734, "grad_norm": 0.9611732959747314, "learning_rate": 0.001, "loss": 1.8014, "step": 450016 }, { "epoch": 38.82608695652174, "grad_norm": 0.8607473373413086, "learning_rate": 0.001, "loss": 1.801, "step": 450072 }, { "epoch": 38.830917874396135, "grad_norm": 0.47827741503715515, "learning_rate": 0.001, "loss": 1.8044, "step": 450128 }, { "epoch": 38.83574879227053, "grad_norm": 0.3833787143230438, "learning_rate": 0.001, "loss": 1.8078, "step": 450184 }, { "epoch": 38.84057971014493, "grad_norm": 0.42741018533706665, "learning_rate": 0.001, "loss": 1.8107, "step": 450240 }, { "epoch": 38.84541062801932, "grad_norm": 0.7493306994438171, "learning_rate": 0.001, "loss": 1.8147, "step": 450296 }, { "epoch": 38.85024154589372, "grad_norm": 2.5808846950531006, "learning_rate": 0.001, "loss": 1.8343, "step": 450352 }, { "epoch": 38.85507246376812, "grad_norm": 8.199564933776855, "learning_rate": 0.001, "loss": 1.8322, "step": 450408 }, { "epoch": 38.85990338164251, "grad_norm": 0.903209388256073, "learning_rate": 0.001, "loss": 1.8278, "step": 450464 }, { "epoch": 38.86473429951691, "grad_norm": 1.4929951429367065, "learning_rate": 0.001, "loss": 1.8297, "step": 450520 }, { "epoch": 38.869565217391305, "grad_norm": 0.43569737672805786, "learning_rate": 0.001, "loss": 1.8065, "step": 450576 }, { "epoch": 38.8743961352657, "grad_norm": 5.928103446960449, "learning_rate": 0.001, "loss": 1.8004, "step": 450632 }, { "epoch": 38.8792270531401, "grad_norm": 2.4554736614227295, "learning_rate": 0.001, "loss": 1.8094, "step": 450688 }, { "epoch": 38.88405797101449, "grad_norm": 0.6193541288375854, "learning_rate": 0.001, "loss": 1.8152, "step": 450744 }, { "epoch": 38.888888888888886, "grad_norm": 0.43015071749687195, "learning_rate": 0.001, "loss": 1.8133, "step": 450800 }, { "epoch": 38.893719806763286, "grad_norm": 0.7441524863243103, "learning_rate": 0.001, "loss": 1.8072, "step": 450856 }, { "epoch": 38.89855072463768, "grad_norm": 0.844596803188324, "learning_rate": 0.001, "loss": 1.8017, "step": 450912 }, { "epoch": 38.90338164251208, "grad_norm": 2.394152879714966, "learning_rate": 0.001, "loss": 1.8003, "step": 450968 }, { "epoch": 38.908212560386474, "grad_norm": 0.4558674991130829, "learning_rate": 0.001, "loss": 1.8006, "step": 451024 }, { "epoch": 38.91304347826087, "grad_norm": 0.5908657312393188, "learning_rate": 0.001, "loss": 1.7947, "step": 451080 }, { "epoch": 38.91787439613527, "grad_norm": 0.33254092931747437, "learning_rate": 0.001, "loss": 1.8083, "step": 451136 }, { "epoch": 38.92270531400966, "grad_norm": 0.29405105113983154, "learning_rate": 0.001, "loss": 1.8203, "step": 451192 }, { "epoch": 38.927536231884055, "grad_norm": 0.26439738273620605, "learning_rate": 0.001, "loss": 1.8114, "step": 451248 }, { "epoch": 38.932367149758456, "grad_norm": 0.5527209043502808, "learning_rate": 0.001, "loss": 1.8205, "step": 451304 }, { "epoch": 38.93719806763285, "grad_norm": 7.17139196395874, "learning_rate": 0.001, "loss": 1.8339, "step": 451360 }, { "epoch": 38.94202898550725, "grad_norm": 1.848127007484436, "learning_rate": 0.001, "loss": 1.822, "step": 451416 }, { "epoch": 38.94685990338164, "grad_norm": 0.6753201484680176, "learning_rate": 0.001, "loss": 1.8375, "step": 451472 }, { "epoch": 38.95169082125604, "grad_norm": 0.3073612451553345, "learning_rate": 0.001, "loss": 1.8229, "step": 451528 }, { "epoch": 38.95652173913044, "grad_norm": 2.1036770343780518, "learning_rate": 0.001, "loss": 1.8185, "step": 451584 }, { "epoch": 38.96135265700483, "grad_norm": 0.7363268136978149, "learning_rate": 0.001, "loss": 1.821, "step": 451640 }, { "epoch": 38.966183574879224, "grad_norm": 0.38741424679756165, "learning_rate": 0.001, "loss": 1.8229, "step": 451696 }, { "epoch": 38.971014492753625, "grad_norm": 0.4927924573421478, "learning_rate": 0.001, "loss": 1.8089, "step": 451752 }, { "epoch": 38.97584541062802, "grad_norm": 7.602481365203857, "learning_rate": 0.001, "loss": 1.8157, "step": 451808 }, { "epoch": 38.98067632850242, "grad_norm": 1.3888112306594849, "learning_rate": 0.001, "loss": 1.8111, "step": 451864 }, { "epoch": 38.98550724637681, "grad_norm": 0.40749290585517883, "learning_rate": 0.001, "loss": 1.8118, "step": 451920 }, { "epoch": 38.990338164251206, "grad_norm": 0.43355992436408997, "learning_rate": 0.001, "loss": 1.8156, "step": 451976 }, { "epoch": 38.99516908212561, "grad_norm": 1.8002430200576782, "learning_rate": 0.001, "loss": 1.8295, "step": 452032 }, { "epoch": 39.0, "grad_norm": 0.8947835564613342, "learning_rate": 0.001, "loss": 1.8296, "step": 452088 }, { "epoch": 39.00483091787439, "grad_norm": 1.102235198020935, "learning_rate": 0.001, "loss": 1.8046, "step": 452144 }, { "epoch": 39.009661835748794, "grad_norm": 0.9161561727523804, "learning_rate": 0.001, "loss": 1.7958, "step": 452200 }, { "epoch": 39.01449275362319, "grad_norm": 2.39451265335083, "learning_rate": 0.001, "loss": 1.7968, "step": 452256 }, { "epoch": 39.01932367149758, "grad_norm": 0.5123676061630249, "learning_rate": 0.001, "loss": 1.7882, "step": 452312 }, { "epoch": 39.02415458937198, "grad_norm": 0.7145623564720154, "learning_rate": 0.001, "loss": 1.7977, "step": 452368 }, { "epoch": 39.028985507246375, "grad_norm": 4.258519172668457, "learning_rate": 0.001, "loss": 1.7865, "step": 452424 }, { "epoch": 39.033816425120776, "grad_norm": 1.5658979415893555, "learning_rate": 0.001, "loss": 1.7954, "step": 452480 }, { "epoch": 39.03864734299517, "grad_norm": 0.4580766260623932, "learning_rate": 0.001, "loss": 1.7927, "step": 452536 }, { "epoch": 39.04347826086956, "grad_norm": 0.417779803276062, "learning_rate": 0.001, "loss": 1.7843, "step": 452592 }, { "epoch": 39.04830917874396, "grad_norm": 1.5431866645812988, "learning_rate": 0.001, "loss": 1.7782, "step": 452648 }, { "epoch": 39.05314009661836, "grad_norm": 7.4389824867248535, "learning_rate": 0.001, "loss": 1.7849, "step": 452704 }, { "epoch": 39.05797101449275, "grad_norm": 2.431720495223999, "learning_rate": 0.001, "loss": 1.7893, "step": 452760 }, { "epoch": 39.06280193236715, "grad_norm": 1.2876051664352417, "learning_rate": 0.001, "loss": 1.7806, "step": 452816 }, { "epoch": 39.067632850241544, "grad_norm": 0.9265264272689819, "learning_rate": 0.001, "loss": 1.7944, "step": 452872 }, { "epoch": 39.072463768115945, "grad_norm": 1.4943867921829224, "learning_rate": 0.001, "loss": 1.7761, "step": 452928 }, { "epoch": 39.07729468599034, "grad_norm": 0.446413516998291, "learning_rate": 0.001, "loss": 1.7803, "step": 452984 }, { "epoch": 39.08212560386473, "grad_norm": 0.9982115626335144, "learning_rate": 0.001, "loss": 1.7971, "step": 453040 }, { "epoch": 39.08695652173913, "grad_norm": 2.5845043659210205, "learning_rate": 0.001, "loss": 1.7827, "step": 453096 }, { "epoch": 39.091787439613526, "grad_norm": 3.0803334712982178, "learning_rate": 0.001, "loss": 1.7906, "step": 453152 }, { "epoch": 39.09661835748792, "grad_norm": 0.5121610760688782, "learning_rate": 0.001, "loss": 1.7858, "step": 453208 }, { "epoch": 39.10144927536232, "grad_norm": 0.8989846110343933, "learning_rate": 0.001, "loss": 1.787, "step": 453264 }, { "epoch": 39.106280193236714, "grad_norm": 0.5431614518165588, "learning_rate": 0.001, "loss": 1.8048, "step": 453320 }, { "epoch": 39.111111111111114, "grad_norm": 2.233323097229004, "learning_rate": 0.001, "loss": 1.7948, "step": 453376 }, { "epoch": 39.11594202898551, "grad_norm": 1.3849501609802246, "learning_rate": 0.001, "loss": 1.778, "step": 453432 }, { "epoch": 39.1207729468599, "grad_norm": 1.2357500791549683, "learning_rate": 0.001, "loss": 1.7787, "step": 453488 }, { "epoch": 39.1256038647343, "grad_norm": 0.7278436422348022, "learning_rate": 0.001, "loss": 1.7842, "step": 453544 }, { "epoch": 39.130434782608695, "grad_norm": 0.4302615523338318, "learning_rate": 0.001, "loss": 1.7861, "step": 453600 }, { "epoch": 39.13526570048309, "grad_norm": 5.418882369995117, "learning_rate": 0.001, "loss": 1.7796, "step": 453656 }, { "epoch": 39.14009661835749, "grad_norm": 0.3702942430973053, "learning_rate": 0.001, "loss": 1.7869, "step": 453712 }, { "epoch": 39.14492753623188, "grad_norm": 0.675091564655304, "learning_rate": 0.001, "loss": 1.7856, "step": 453768 }, { "epoch": 39.14975845410628, "grad_norm": 0.9066087007522583, "learning_rate": 0.001, "loss": 1.7731, "step": 453824 }, { "epoch": 39.15458937198068, "grad_norm": 0.8583076596260071, "learning_rate": 0.001, "loss": 1.7719, "step": 453880 }, { "epoch": 39.15942028985507, "grad_norm": 0.532241940498352, "learning_rate": 0.001, "loss": 1.7764, "step": 453936 }, { "epoch": 39.16425120772947, "grad_norm": 0.698300302028656, "learning_rate": 0.001, "loss": 1.7805, "step": 453992 }, { "epoch": 39.169082125603865, "grad_norm": 3.055135726928711, "learning_rate": 0.001, "loss": 1.7759, "step": 454048 }, { "epoch": 39.17391304347826, "grad_norm": 0.418081670999527, "learning_rate": 0.001, "loss": 1.7826, "step": 454104 }, { "epoch": 39.17874396135266, "grad_norm": 0.5559875965118408, "learning_rate": 0.001, "loss": 1.7862, "step": 454160 }, { "epoch": 39.18357487922705, "grad_norm": 0.8645333051681519, "learning_rate": 0.001, "loss": 1.7945, "step": 454216 }, { "epoch": 39.18840579710145, "grad_norm": 0.4972476065158844, "learning_rate": 0.001, "loss": 1.7928, "step": 454272 }, { "epoch": 39.193236714975846, "grad_norm": 1.5201612710952759, "learning_rate": 0.001, "loss": 1.7739, "step": 454328 }, { "epoch": 39.19806763285024, "grad_norm": 1.2820297479629517, "learning_rate": 0.001, "loss": 1.7776, "step": 454384 }, { "epoch": 39.20289855072464, "grad_norm": 0.688642680644989, "learning_rate": 0.001, "loss": 1.7769, "step": 454440 }, { "epoch": 39.207729468599034, "grad_norm": 0.5572932958602905, "learning_rate": 0.001, "loss": 1.7661, "step": 454496 }, { "epoch": 39.21256038647343, "grad_norm": 3.2622170448303223, "learning_rate": 0.001, "loss": 1.7725, "step": 454552 }, { "epoch": 39.21739130434783, "grad_norm": 0.5168890953063965, "learning_rate": 0.001, "loss": 1.769, "step": 454608 }, { "epoch": 39.22222222222222, "grad_norm": 0.7060826420783997, "learning_rate": 0.001, "loss": 1.7653, "step": 454664 }, { "epoch": 39.227053140096615, "grad_norm": 0.5047960877418518, "learning_rate": 0.001, "loss": 1.7761, "step": 454720 }, { "epoch": 39.231884057971016, "grad_norm": 10.524388313293457, "learning_rate": 0.001, "loss": 1.7848, "step": 454776 }, { "epoch": 39.23671497584541, "grad_norm": 0.7524645924568176, "learning_rate": 0.001, "loss": 1.795, "step": 454832 }, { "epoch": 39.24154589371981, "grad_norm": 0.500701367855072, "learning_rate": 0.001, "loss": 1.7883, "step": 454888 }, { "epoch": 39.2463768115942, "grad_norm": 4.089268207550049, "learning_rate": 0.001, "loss": 1.7888, "step": 454944 }, { "epoch": 39.2512077294686, "grad_norm": 0.7452492713928223, "learning_rate": 0.001, "loss": 1.801, "step": 455000 }, { "epoch": 39.256038647343, "grad_norm": 0.44097328186035156, "learning_rate": 0.001, "loss": 1.8018, "step": 455056 }, { "epoch": 39.26086956521739, "grad_norm": 0.4278869926929474, "learning_rate": 0.001, "loss": 1.7893, "step": 455112 }, { "epoch": 39.265700483091784, "grad_norm": 0.9720302820205688, "learning_rate": 0.001, "loss": 1.7817, "step": 455168 }, { "epoch": 39.270531400966185, "grad_norm": 0.5431703925132751, "learning_rate": 0.001, "loss": 1.7834, "step": 455224 }, { "epoch": 39.27536231884058, "grad_norm": 0.44795337319374084, "learning_rate": 0.001, "loss": 1.7807, "step": 455280 }, { "epoch": 39.28019323671498, "grad_norm": 0.4862595200538635, "learning_rate": 0.001, "loss": 1.7852, "step": 455336 }, { "epoch": 39.28502415458937, "grad_norm": 0.3776932656764984, "learning_rate": 0.001, "loss": 1.7888, "step": 455392 }, { "epoch": 39.289855072463766, "grad_norm": 0.3562157154083252, "learning_rate": 0.001, "loss": 1.7749, "step": 455448 }, { "epoch": 39.29468599033817, "grad_norm": 0.6022749543190002, "learning_rate": 0.001, "loss": 1.7815, "step": 455504 }, { "epoch": 39.29951690821256, "grad_norm": 0.5138041973114014, "learning_rate": 0.001, "loss": 1.7892, "step": 455560 }, { "epoch": 39.30434782608695, "grad_norm": 0.7644832730293274, "learning_rate": 0.001, "loss": 1.7837, "step": 455616 }, { "epoch": 39.309178743961354, "grad_norm": 1.4737321138381958, "learning_rate": 0.001, "loss": 1.7907, "step": 455672 }, { "epoch": 39.31400966183575, "grad_norm": 0.7813582420349121, "learning_rate": 0.001, "loss": 1.7827, "step": 455728 }, { "epoch": 39.31884057971015, "grad_norm": 0.5834698677062988, "learning_rate": 0.001, "loss": 1.7805, "step": 455784 }, { "epoch": 39.32367149758454, "grad_norm": 0.407444030046463, "learning_rate": 0.001, "loss": 1.7774, "step": 455840 }, { "epoch": 39.328502415458935, "grad_norm": 0.5704084038734436, "learning_rate": 0.001, "loss": 1.7784, "step": 455896 }, { "epoch": 39.333333333333336, "grad_norm": 6.085973262786865, "learning_rate": 0.001, "loss": 1.7754, "step": 455952 }, { "epoch": 39.33816425120773, "grad_norm": 2.780090808868408, "learning_rate": 0.001, "loss": 1.7864, "step": 456008 }, { "epoch": 39.34299516908212, "grad_norm": 1.311748743057251, "learning_rate": 0.001, "loss": 1.793, "step": 456064 }, { "epoch": 39.34782608695652, "grad_norm": 0.4682284891605377, "learning_rate": 0.001, "loss": 1.7823, "step": 456120 }, { "epoch": 39.35265700483092, "grad_norm": 0.34444138407707214, "learning_rate": 0.001, "loss": 1.7957, "step": 456176 }, { "epoch": 39.35748792270532, "grad_norm": 0.4472052752971649, "learning_rate": 0.001, "loss": 1.7975, "step": 456232 }, { "epoch": 39.36231884057971, "grad_norm": 0.700936496257782, "learning_rate": 0.001, "loss": 1.7865, "step": 456288 }, { "epoch": 39.367149758454104, "grad_norm": 0.29438868165016174, "learning_rate": 0.001, "loss": 1.7828, "step": 456344 }, { "epoch": 39.371980676328505, "grad_norm": 0.6125770807266235, "learning_rate": 0.001, "loss": 1.7792, "step": 456400 }, { "epoch": 39.3768115942029, "grad_norm": 0.24931997060775757, "learning_rate": 0.001, "loss": 1.7783, "step": 456456 }, { "epoch": 39.38164251207729, "grad_norm": 0.2730088531970978, "learning_rate": 0.001, "loss": 1.7811, "step": 456512 }, { "epoch": 39.38647342995169, "grad_norm": 0.29912522435188293, "learning_rate": 0.001, "loss": 1.7771, "step": 456568 }, { "epoch": 39.391304347826086, "grad_norm": 0.7951462268829346, "learning_rate": 0.001, "loss": 1.7812, "step": 456624 }, { "epoch": 39.39613526570048, "grad_norm": 2.0140445232391357, "learning_rate": 0.001, "loss": 1.7793, "step": 456680 }, { "epoch": 39.40096618357488, "grad_norm": 1.847533106803894, "learning_rate": 0.001, "loss": 1.7799, "step": 456736 }, { "epoch": 39.405797101449274, "grad_norm": 0.8291811347007751, "learning_rate": 0.001, "loss": 1.7869, "step": 456792 }, { "epoch": 39.410628019323674, "grad_norm": 0.35937872529029846, "learning_rate": 0.001, "loss": 1.7822, "step": 456848 }, { "epoch": 39.41545893719807, "grad_norm": 0.4862651228904724, "learning_rate": 0.001, "loss": 1.7808, "step": 456904 }, { "epoch": 39.42028985507246, "grad_norm": 0.6624985337257385, "learning_rate": 0.001, "loss": 1.777, "step": 456960 }, { "epoch": 39.42512077294686, "grad_norm": 0.4744476079940796, "learning_rate": 0.001, "loss": 1.7846, "step": 457016 }, { "epoch": 39.429951690821255, "grad_norm": 0.6915600895881653, "learning_rate": 0.001, "loss": 1.7869, "step": 457072 }, { "epoch": 39.43478260869565, "grad_norm": 0.9727147221565247, "learning_rate": 0.001, "loss": 1.7867, "step": 457128 }, { "epoch": 39.43961352657005, "grad_norm": 0.9777208566665649, "learning_rate": 0.001, "loss": 1.7813, "step": 457184 }, { "epoch": 39.44444444444444, "grad_norm": 0.8607946038246155, "learning_rate": 0.001, "loss": 1.7859, "step": 457240 }, { "epoch": 39.44927536231884, "grad_norm": 0.3632735013961792, "learning_rate": 0.001, "loss": 1.7874, "step": 457296 }, { "epoch": 39.45410628019324, "grad_norm": 0.4249671399593353, "learning_rate": 0.001, "loss": 1.7867, "step": 457352 }, { "epoch": 39.45893719806763, "grad_norm": 0.4214588701725006, "learning_rate": 0.001, "loss": 1.7755, "step": 457408 }, { "epoch": 39.46376811594203, "grad_norm": 0.26164811849594116, "learning_rate": 0.001, "loss": 1.7777, "step": 457464 }, { "epoch": 39.468599033816425, "grad_norm": 0.29496949911117554, "learning_rate": 0.001, "loss": 1.782, "step": 457520 }, { "epoch": 39.47342995169082, "grad_norm": 0.5179944634437561, "learning_rate": 0.001, "loss": 1.778, "step": 457576 }, { "epoch": 39.47826086956522, "grad_norm": 0.40170150995254517, "learning_rate": 0.001, "loss": 1.7899, "step": 457632 }, { "epoch": 39.48309178743961, "grad_norm": 2.452028274536133, "learning_rate": 0.001, "loss": 1.7875, "step": 457688 }, { "epoch": 39.48792270531401, "grad_norm": 0.6511350274085999, "learning_rate": 0.001, "loss": 1.7934, "step": 457744 }, { "epoch": 39.492753623188406, "grad_norm": 0.6816292405128479, "learning_rate": 0.001, "loss": 1.769, "step": 457800 }, { "epoch": 39.4975845410628, "grad_norm": 0.8402189612388611, "learning_rate": 0.001, "loss": 1.7751, "step": 457856 }, { "epoch": 39.5024154589372, "grad_norm": 0.6438387036323547, "learning_rate": 0.001, "loss": 1.7815, "step": 457912 }, { "epoch": 39.507246376811594, "grad_norm": 0.5256146788597107, "learning_rate": 0.001, "loss": 1.7923, "step": 457968 }, { "epoch": 39.51207729468599, "grad_norm": 0.823054313659668, "learning_rate": 0.001, "loss": 1.7911, "step": 458024 }, { "epoch": 39.51690821256039, "grad_norm": 2.2309350967407227, "learning_rate": 0.001, "loss": 1.8001, "step": 458080 }, { "epoch": 39.52173913043478, "grad_norm": 0.6493335962295532, "learning_rate": 0.001, "loss": 1.8206, "step": 458136 }, { "epoch": 39.52657004830918, "grad_norm": 0.6404735445976257, "learning_rate": 0.001, "loss": 1.8164, "step": 458192 }, { "epoch": 39.531400966183575, "grad_norm": 1.2641183137893677, "learning_rate": 0.001, "loss": 1.8087, "step": 458248 }, { "epoch": 39.53623188405797, "grad_norm": 2.2141754627227783, "learning_rate": 0.001, "loss": 1.8046, "step": 458304 }, { "epoch": 39.54106280193237, "grad_norm": 0.6644294857978821, "learning_rate": 0.001, "loss": 1.7954, "step": 458360 }, { "epoch": 39.54589371980676, "grad_norm": 0.8572977781295776, "learning_rate": 0.001, "loss": 1.7893, "step": 458416 }, { "epoch": 39.55072463768116, "grad_norm": 0.6831353306770325, "learning_rate": 0.001, "loss": 1.7894, "step": 458472 }, { "epoch": 39.55555555555556, "grad_norm": 0.9897552132606506, "learning_rate": 0.001, "loss": 1.8106, "step": 458528 }, { "epoch": 39.56038647342995, "grad_norm": 1.106087327003479, "learning_rate": 0.001, "loss": 1.8136, "step": 458584 }, { "epoch": 39.56521739130435, "grad_norm": 2.1643118858337402, "learning_rate": 0.001, "loss": 1.8137, "step": 458640 }, { "epoch": 39.570048309178745, "grad_norm": 1.3748916387557983, "learning_rate": 0.001, "loss": 1.8167, "step": 458696 }, { "epoch": 39.57487922705314, "grad_norm": 0.73423832654953, "learning_rate": 0.001, "loss": 1.8181, "step": 458752 }, { "epoch": 39.57971014492754, "grad_norm": 0.4277721643447876, "learning_rate": 0.001, "loss": 1.8303, "step": 458808 }, { "epoch": 39.58454106280193, "grad_norm": 1.349006175994873, "learning_rate": 0.001, "loss": 1.8311, "step": 458864 }, { "epoch": 39.589371980676326, "grad_norm": 0.4151976406574249, "learning_rate": 0.001, "loss": 1.8233, "step": 458920 }, { "epoch": 39.594202898550726, "grad_norm": 1.5622711181640625, "learning_rate": 0.001, "loss": 1.8172, "step": 458976 }, { "epoch": 39.59903381642512, "grad_norm": 0.6670666337013245, "learning_rate": 0.001, "loss": 1.8163, "step": 459032 }, { "epoch": 39.60386473429952, "grad_norm": 0.34776684641838074, "learning_rate": 0.001, "loss": 1.8317, "step": 459088 }, { "epoch": 39.608695652173914, "grad_norm": 0.40206941962242126, "learning_rate": 0.001, "loss": 1.822, "step": 459144 }, { "epoch": 39.61352657004831, "grad_norm": 0.3365606963634491, "learning_rate": 0.001, "loss": 1.8275, "step": 459200 }, { "epoch": 39.61835748792271, "grad_norm": 0.35529863834381104, "learning_rate": 0.001, "loss": 1.8277, "step": 459256 }, { "epoch": 39.6231884057971, "grad_norm": 0.3912605941295624, "learning_rate": 0.001, "loss": 1.823, "step": 459312 }, { "epoch": 39.628019323671495, "grad_norm": 0.3111409544944763, "learning_rate": 0.001, "loss": 1.8197, "step": 459368 }, { "epoch": 39.632850241545896, "grad_norm": 0.3650505542755127, "learning_rate": 0.001, "loss": 1.8064, "step": 459424 }, { "epoch": 39.63768115942029, "grad_norm": 2.255704641342163, "learning_rate": 0.001, "loss": 1.8109, "step": 459480 }, { "epoch": 39.64251207729468, "grad_norm": 0.4701070189476013, "learning_rate": 0.001, "loss": 1.7979, "step": 459536 }, { "epoch": 39.64734299516908, "grad_norm": 0.44536930322647095, "learning_rate": 0.001, "loss": 1.7985, "step": 459592 }, { "epoch": 39.65217391304348, "grad_norm": 0.3291632831096649, "learning_rate": 0.001, "loss": 1.8007, "step": 459648 }, { "epoch": 39.65700483091788, "grad_norm": 6.458952903747559, "learning_rate": 0.001, "loss": 1.7984, "step": 459704 }, { "epoch": 39.66183574879227, "grad_norm": 0.22720588743686676, "learning_rate": 0.001, "loss": 1.7937, "step": 459760 }, { "epoch": 39.666666666666664, "grad_norm": 1.5068660974502563, "learning_rate": 0.001, "loss": 1.7856, "step": 459816 }, { "epoch": 39.671497584541065, "grad_norm": 0.455994576215744, "learning_rate": 0.001, "loss": 1.7899, "step": 459872 }, { "epoch": 39.67632850241546, "grad_norm": 0.902661919593811, "learning_rate": 0.001, "loss": 1.8073, "step": 459928 }, { "epoch": 39.68115942028985, "grad_norm": 1.2910200357437134, "learning_rate": 0.001, "loss": 1.797, "step": 459984 }, { "epoch": 39.68599033816425, "grad_norm": 0.8921377062797546, "learning_rate": 0.001, "loss": 1.8023, "step": 460040 }, { "epoch": 39.690821256038646, "grad_norm": 0.42187631130218506, "learning_rate": 0.001, "loss": 1.7963, "step": 460096 }, { "epoch": 39.69565217391305, "grad_norm": 0.6834074854850769, "learning_rate": 0.001, "loss": 1.796, "step": 460152 }, { "epoch": 39.70048309178744, "grad_norm": 1.0055701732635498, "learning_rate": 0.001, "loss": 1.7999, "step": 460208 }, { "epoch": 39.70531400966183, "grad_norm": 0.44658946990966797, "learning_rate": 0.001, "loss": 1.7977, "step": 460264 }, { "epoch": 39.710144927536234, "grad_norm": 1.421464443206787, "learning_rate": 0.001, "loss": 1.8068, "step": 460320 }, { "epoch": 39.71497584541063, "grad_norm": 0.26028600335121155, "learning_rate": 0.001, "loss": 1.8006, "step": 460376 }, { "epoch": 39.71980676328502, "grad_norm": 0.4798416793346405, "learning_rate": 0.001, "loss": 1.7922, "step": 460432 }, { "epoch": 39.72463768115942, "grad_norm": 11.575304985046387, "learning_rate": 0.001, "loss": 1.7938, "step": 460488 }, { "epoch": 39.729468599033815, "grad_norm": 0.4292560815811157, "learning_rate": 0.001, "loss": 1.7969, "step": 460544 }, { "epoch": 39.734299516908216, "grad_norm": 0.5057810544967651, "learning_rate": 0.001, "loss": 1.7958, "step": 460600 }, { "epoch": 39.73913043478261, "grad_norm": 1.2123405933380127, "learning_rate": 0.001, "loss": 1.7925, "step": 460656 }, { "epoch": 39.743961352657, "grad_norm": 0.3548462986946106, "learning_rate": 0.001, "loss": 1.7876, "step": 460712 }, { "epoch": 39.7487922705314, "grad_norm": 0.2897131145000458, "learning_rate": 0.001, "loss": 1.7923, "step": 460768 }, { "epoch": 39.7536231884058, "grad_norm": 0.459045946598053, "learning_rate": 0.001, "loss": 1.7952, "step": 460824 }, { "epoch": 39.75845410628019, "grad_norm": 0.3890233635902405, "learning_rate": 0.001, "loss": 1.79, "step": 460880 }, { "epoch": 39.76328502415459, "grad_norm": 0.4458432197570801, "learning_rate": 0.001, "loss": 1.7921, "step": 460936 }, { "epoch": 39.768115942028984, "grad_norm": 1.3525525331497192, "learning_rate": 0.001, "loss": 1.7921, "step": 460992 }, { "epoch": 39.772946859903385, "grad_norm": 0.3510584533214569, "learning_rate": 0.001, "loss": 1.7936, "step": 461048 }, { "epoch": 39.77777777777778, "grad_norm": 0.3776356279850006, "learning_rate": 0.001, "loss": 1.7868, "step": 461104 }, { "epoch": 39.78260869565217, "grad_norm": 0.34673061966896057, "learning_rate": 0.001, "loss": 1.786, "step": 461160 }, { "epoch": 39.78743961352657, "grad_norm": 0.26899057626724243, "learning_rate": 0.001, "loss": 1.7861, "step": 461216 }, { "epoch": 39.792270531400966, "grad_norm": 0.3279809057712555, "learning_rate": 0.001, "loss": 1.7921, "step": 461272 }, { "epoch": 39.79710144927536, "grad_norm": 0.40119341015815735, "learning_rate": 0.001, "loss": 1.7894, "step": 461328 }, { "epoch": 39.80193236714976, "grad_norm": 0.35732772946357727, "learning_rate": 0.001, "loss": 1.7824, "step": 461384 }, { "epoch": 39.806763285024154, "grad_norm": 0.3697347342967987, "learning_rate": 0.001, "loss": 1.7818, "step": 461440 }, { "epoch": 39.81159420289855, "grad_norm": 0.3744228184223175, "learning_rate": 0.001, "loss": 1.7832, "step": 461496 }, { "epoch": 39.81642512077295, "grad_norm": 0.331001877784729, "learning_rate": 0.001, "loss": 1.7913, "step": 461552 }, { "epoch": 39.82125603864734, "grad_norm": 0.44698405265808105, "learning_rate": 0.001, "loss": 1.7904, "step": 461608 }, { "epoch": 39.82608695652174, "grad_norm": 0.48755109310150146, "learning_rate": 0.001, "loss": 1.7897, "step": 461664 }, { "epoch": 39.830917874396135, "grad_norm": 0.35192379355430603, "learning_rate": 0.001, "loss": 1.7816, "step": 461720 }, { "epoch": 39.83574879227053, "grad_norm": 3.45180082321167, "learning_rate": 0.001, "loss": 1.793, "step": 461776 }, { "epoch": 39.84057971014493, "grad_norm": 3.187631607055664, "learning_rate": 0.001, "loss": 1.8097, "step": 461832 }, { "epoch": 39.84541062801932, "grad_norm": 0.43015211820602417, "learning_rate": 0.001, "loss": 1.8097, "step": 461888 }, { "epoch": 39.85024154589372, "grad_norm": 1.5903823375701904, "learning_rate": 0.001, "loss": 1.8046, "step": 461944 }, { "epoch": 39.85507246376812, "grad_norm": 0.40889662504196167, "learning_rate": 0.001, "loss": 1.799, "step": 462000 }, { "epoch": 39.85990338164251, "grad_norm": 0.60368412733078, "learning_rate": 0.001, "loss": 1.7985, "step": 462056 }, { "epoch": 39.86473429951691, "grad_norm": 1.835952877998352, "learning_rate": 0.001, "loss": 1.8048, "step": 462112 }, { "epoch": 39.869565217391305, "grad_norm": 0.8176502585411072, "learning_rate": 0.001, "loss": 1.8114, "step": 462168 }, { "epoch": 39.8743961352657, "grad_norm": 1.0162155628204346, "learning_rate": 0.001, "loss": 1.8158, "step": 462224 }, { "epoch": 39.8792270531401, "grad_norm": 0.3615024983882904, "learning_rate": 0.001, "loss": 1.8043, "step": 462280 }, { "epoch": 39.88405797101449, "grad_norm": 0.9517819881439209, "learning_rate": 0.001, "loss": 1.8246, "step": 462336 }, { "epoch": 39.888888888888886, "grad_norm": 0.7721591591835022, "learning_rate": 0.001, "loss": 1.8096, "step": 462392 }, { "epoch": 39.893719806763286, "grad_norm": 0.3732524514198303, "learning_rate": 0.001, "loss": 1.8057, "step": 462448 }, { "epoch": 39.89855072463768, "grad_norm": 0.34945225715637207, "learning_rate": 0.001, "loss": 1.809, "step": 462504 }, { "epoch": 39.90338164251208, "grad_norm": 0.3488682508468628, "learning_rate": 0.001, "loss": 1.8053, "step": 462560 }, { "epoch": 39.908212560386474, "grad_norm": 0.501899778842926, "learning_rate": 0.001, "loss": 1.7992, "step": 462616 }, { "epoch": 39.91304347826087, "grad_norm": 0.34453535079956055, "learning_rate": 0.001, "loss": 1.7933, "step": 462672 }, { "epoch": 39.91787439613527, "grad_norm": 0.5333484411239624, "learning_rate": 0.001, "loss": 1.7998, "step": 462728 }, { "epoch": 39.92270531400966, "grad_norm": 1.104191780090332, "learning_rate": 0.001, "loss": 1.8008, "step": 462784 }, { "epoch": 39.927536231884055, "grad_norm": 0.4531182646751404, "learning_rate": 0.001, "loss": 1.7959, "step": 462840 }, { "epoch": 39.932367149758456, "grad_norm": 0.481452614068985, "learning_rate": 0.001, "loss": 1.787, "step": 462896 }, { "epoch": 39.93719806763285, "grad_norm": 4.912936687469482, "learning_rate": 0.001, "loss": 1.7818, "step": 462952 }, { "epoch": 39.94202898550725, "grad_norm": 1.993960976600647, "learning_rate": 0.001, "loss": 1.7933, "step": 463008 }, { "epoch": 39.94685990338164, "grad_norm": 0.8945672512054443, "learning_rate": 0.001, "loss": 1.7847, "step": 463064 }, { "epoch": 39.95169082125604, "grad_norm": 0.610833466053009, "learning_rate": 0.001, "loss": 1.7887, "step": 463120 }, { "epoch": 39.95652173913044, "grad_norm": 0.3845690190792084, "learning_rate": 0.001, "loss": 1.7837, "step": 463176 }, { "epoch": 39.96135265700483, "grad_norm": 0.3148888051509857, "learning_rate": 0.001, "loss": 1.7848, "step": 463232 }, { "epoch": 39.966183574879224, "grad_norm": 0.5330688953399658, "learning_rate": 0.001, "loss": 1.7895, "step": 463288 }, { "epoch": 39.971014492753625, "grad_norm": 0.5150025486946106, "learning_rate": 0.001, "loss": 1.7939, "step": 463344 }, { "epoch": 39.97584541062802, "grad_norm": 0.2669714391231537, "learning_rate": 0.001, "loss": 1.7924, "step": 463400 }, { "epoch": 39.98067632850242, "grad_norm": 0.5796805024147034, "learning_rate": 0.001, "loss": 1.7975, "step": 463456 }, { "epoch": 39.98550724637681, "grad_norm": 0.29214203357696533, "learning_rate": 0.001, "loss": 1.8189, "step": 463512 }, { "epoch": 39.990338164251206, "grad_norm": 0.6958968639373779, "learning_rate": 0.001, "loss": 1.8422, "step": 463568 }, { "epoch": 39.99516908212561, "grad_norm": 0.3184243440628052, "learning_rate": 0.001, "loss": 1.8248, "step": 463624 }, { "epoch": 40.0, "grad_norm": 1.4207526445388794, "learning_rate": 0.001, "loss": 1.812, "step": 463680 }, { "epoch": 40.00483091787439, "grad_norm": 0.6524496674537659, "learning_rate": 0.001, "loss": 1.7646, "step": 463736 }, { "epoch": 40.009661835748794, "grad_norm": 1.4772812128067017, "learning_rate": 0.001, "loss": 1.7513, "step": 463792 }, { "epoch": 40.01449275362319, "grad_norm": 0.7522541880607605, "learning_rate": 0.001, "loss": 1.7561, "step": 463848 }, { "epoch": 40.01932367149758, "grad_norm": 0.5458579659461975, "learning_rate": 0.001, "loss": 1.7706, "step": 463904 }, { "epoch": 40.02415458937198, "grad_norm": 0.2897782325744629, "learning_rate": 0.001, "loss": 1.7693, "step": 463960 }, { "epoch": 40.028985507246375, "grad_norm": 0.28634926676750183, "learning_rate": 0.001, "loss": 1.762, "step": 464016 }, { "epoch": 40.033816425120776, "grad_norm": 0.3169400095939636, "learning_rate": 0.001, "loss": 1.7561, "step": 464072 }, { "epoch": 40.03864734299517, "grad_norm": 0.3103554844856262, "learning_rate": 0.001, "loss": 1.759, "step": 464128 }, { "epoch": 40.04347826086956, "grad_norm": 2.0770516395568848, "learning_rate": 0.001, "loss": 1.7607, "step": 464184 }, { "epoch": 40.04830917874396, "grad_norm": 0.2822380065917969, "learning_rate": 0.001, "loss": 1.7501, "step": 464240 }, { "epoch": 40.05314009661836, "grad_norm": 0.44424065947532654, "learning_rate": 0.001, "loss": 1.7483, "step": 464296 }, { "epoch": 40.05797101449275, "grad_norm": 0.8618985414505005, "learning_rate": 0.001, "loss": 1.7461, "step": 464352 }, { "epoch": 40.06280193236715, "grad_norm": 0.24674537777900696, "learning_rate": 0.001, "loss": 1.7593, "step": 464408 }, { "epoch": 40.067632850241544, "grad_norm": 2.078155040740967, "learning_rate": 0.001, "loss": 1.7589, "step": 464464 }, { "epoch": 40.072463768115945, "grad_norm": 1.1528406143188477, "learning_rate": 0.001, "loss": 1.7637, "step": 464520 }, { "epoch": 40.07729468599034, "grad_norm": 0.2665976881980896, "learning_rate": 0.001, "loss": 1.7581, "step": 464576 }, { "epoch": 40.08212560386473, "grad_norm": 0.31531381607055664, "learning_rate": 0.001, "loss": 1.7512, "step": 464632 }, { "epoch": 40.08695652173913, "grad_norm": 0.5856656432151794, "learning_rate": 0.001, "loss": 1.7607, "step": 464688 }, { "epoch": 40.091787439613526, "grad_norm": 0.6434282660484314, "learning_rate": 0.001, "loss": 1.7638, "step": 464744 }, { "epoch": 40.09661835748792, "grad_norm": 0.2845962643623352, "learning_rate": 0.001, "loss": 1.7502, "step": 464800 }, { "epoch": 40.10144927536232, "grad_norm": 1.4034231901168823, "learning_rate": 0.001, "loss": 1.7581, "step": 464856 }, { "epoch": 40.106280193236714, "grad_norm": 0.9407950639724731, "learning_rate": 0.001, "loss": 1.7573, "step": 464912 }, { "epoch": 40.111111111111114, "grad_norm": 0.3429856598377228, "learning_rate": 0.001, "loss": 1.7568, "step": 464968 }, { "epoch": 40.11594202898551, "grad_norm": 1.6717718839645386, "learning_rate": 0.001, "loss": 1.7726, "step": 465024 }, { "epoch": 40.1207729468599, "grad_norm": 0.45053860545158386, "learning_rate": 0.001, "loss": 1.7653, "step": 465080 }, { "epoch": 40.1256038647343, "grad_norm": 0.4006911516189575, "learning_rate": 0.001, "loss": 1.769, "step": 465136 }, { "epoch": 40.130434782608695, "grad_norm": 2.572669744491577, "learning_rate": 0.001, "loss": 1.7688, "step": 465192 }, { "epoch": 40.13526570048309, "grad_norm": 0.3920019268989563, "learning_rate": 0.001, "loss": 1.7681, "step": 465248 }, { "epoch": 40.14009661835749, "grad_norm": 3.3301730155944824, "learning_rate": 0.001, "loss": 1.7549, "step": 465304 }, { "epoch": 40.14492753623188, "grad_norm": 9.085259437561035, "learning_rate": 0.001, "loss": 1.7686, "step": 465360 }, { "epoch": 40.14975845410628, "grad_norm": 0.5766760110855103, "learning_rate": 0.001, "loss": 1.8207, "step": 465416 }, { "epoch": 40.15458937198068, "grad_norm": 0.7689758539199829, "learning_rate": 0.001, "loss": 1.8316, "step": 465472 }, { "epoch": 40.15942028985507, "grad_norm": 0.3896240293979645, "learning_rate": 0.001, "loss": 1.8178, "step": 465528 }, { "epoch": 40.16425120772947, "grad_norm": 0.6707692742347717, "learning_rate": 0.001, "loss": 1.7952, "step": 465584 }, { "epoch": 40.169082125603865, "grad_norm": 4.146088600158691, "learning_rate": 0.001, "loss": 1.7849, "step": 465640 }, { "epoch": 40.17391304347826, "grad_norm": 1.4844402074813843, "learning_rate": 0.001, "loss": 1.7821, "step": 465696 }, { "epoch": 40.17874396135266, "grad_norm": 0.82992023229599, "learning_rate": 0.001, "loss": 1.7904, "step": 465752 }, { "epoch": 40.18357487922705, "grad_norm": 0.7022607922554016, "learning_rate": 0.001, "loss": 1.7903, "step": 465808 }, { "epoch": 40.18840579710145, "grad_norm": 0.40158897638320923, "learning_rate": 0.001, "loss": 1.7918, "step": 465864 }, { "epoch": 40.193236714975846, "grad_norm": 2.5403244495391846, "learning_rate": 0.001, "loss": 1.7901, "step": 465920 }, { "epoch": 40.19806763285024, "grad_norm": 0.3751315176486969, "learning_rate": 0.001, "loss": 1.7909, "step": 465976 }, { "epoch": 40.20289855072464, "grad_norm": 0.5983942747116089, "learning_rate": 0.001, "loss": 1.7861, "step": 466032 }, { "epoch": 40.207729468599034, "grad_norm": 2.069592237472534, "learning_rate": 0.001, "loss": 1.783, "step": 466088 }, { "epoch": 40.21256038647343, "grad_norm": 2.0024001598358154, "learning_rate": 0.001, "loss": 1.7809, "step": 466144 }, { "epoch": 40.21739130434783, "grad_norm": 0.6388764381408691, "learning_rate": 0.001, "loss": 1.7822, "step": 466200 }, { "epoch": 40.22222222222222, "grad_norm": 1.505611538887024, "learning_rate": 0.001, "loss": 1.7861, "step": 466256 }, { "epoch": 40.227053140096615, "grad_norm": 0.5152513384819031, "learning_rate": 0.001, "loss": 1.7867, "step": 466312 }, { "epoch": 40.231884057971016, "grad_norm": 1.0379621982574463, "learning_rate": 0.001, "loss": 1.7821, "step": 466368 }, { "epoch": 40.23671497584541, "grad_norm": 1.443725824356079, "learning_rate": 0.001, "loss": 1.7804, "step": 466424 }, { "epoch": 40.24154589371981, "grad_norm": 2.7233238220214844, "learning_rate": 0.001, "loss": 1.7757, "step": 466480 }, { "epoch": 40.2463768115942, "grad_norm": 0.9201911091804504, "learning_rate": 0.001, "loss": 1.7792, "step": 466536 }, { "epoch": 40.2512077294686, "grad_norm": 4.018593788146973, "learning_rate": 0.001, "loss": 1.7679, "step": 466592 }, { "epoch": 40.256038647343, "grad_norm": 0.6747997999191284, "learning_rate": 0.001, "loss": 1.7832, "step": 466648 }, { "epoch": 40.26086956521739, "grad_norm": 3.794398546218872, "learning_rate": 0.001, "loss": 1.7907, "step": 466704 }, { "epoch": 40.265700483091784, "grad_norm": 0.8058980107307434, "learning_rate": 0.001, "loss": 1.7781, "step": 466760 }, { "epoch": 40.270531400966185, "grad_norm": 6.252180576324463, "learning_rate": 0.001, "loss": 1.7838, "step": 466816 }, { "epoch": 40.27536231884058, "grad_norm": 0.6024144291877747, "learning_rate": 0.001, "loss": 1.771, "step": 466872 }, { "epoch": 40.28019323671498, "grad_norm": 0.868556559085846, "learning_rate": 0.001, "loss": 1.7791, "step": 466928 }, { "epoch": 40.28502415458937, "grad_norm": 3.6329457759857178, "learning_rate": 0.001, "loss": 1.7833, "step": 466984 }, { "epoch": 40.289855072463766, "grad_norm": 1.9187835454940796, "learning_rate": 0.001, "loss": 1.7735, "step": 467040 }, { "epoch": 40.29468599033817, "grad_norm": 0.840522050857544, "learning_rate": 0.001, "loss": 1.7868, "step": 467096 }, { "epoch": 40.29951690821256, "grad_norm": 9.573920249938965, "learning_rate": 0.001, "loss": 1.7884, "step": 467152 }, { "epoch": 40.30434782608695, "grad_norm": 4.821118354797363, "learning_rate": 0.001, "loss": 1.7894, "step": 467208 }, { "epoch": 40.309178743961354, "grad_norm": 0.6048091650009155, "learning_rate": 0.001, "loss": 1.788, "step": 467264 }, { "epoch": 40.31400966183575, "grad_norm": 3.0706441402435303, "learning_rate": 0.001, "loss": 1.7888, "step": 467320 }, { "epoch": 40.31884057971015, "grad_norm": 0.3010439872741699, "learning_rate": 0.001, "loss": 1.7863, "step": 467376 }, { "epoch": 40.32367149758454, "grad_norm": 0.5936932563781738, "learning_rate": 0.001, "loss": 1.7896, "step": 467432 }, { "epoch": 40.328502415458935, "grad_norm": 0.531542956829071, "learning_rate": 0.001, "loss": 1.7931, "step": 467488 }, { "epoch": 40.333333333333336, "grad_norm": 1.5305919647216797, "learning_rate": 0.001, "loss": 1.7919, "step": 467544 }, { "epoch": 40.33816425120773, "grad_norm": 3.220773220062256, "learning_rate": 0.001, "loss": 1.8072, "step": 467600 }, { "epoch": 40.34299516908212, "grad_norm": 0.48922473192214966, "learning_rate": 0.001, "loss": 1.8089, "step": 467656 }, { "epoch": 40.34782608695652, "grad_norm": 0.4027334749698639, "learning_rate": 0.001, "loss": 1.8008, "step": 467712 }, { "epoch": 40.35265700483092, "grad_norm": 0.6657637357711792, "learning_rate": 0.001, "loss": 1.8009, "step": 467768 }, { "epoch": 40.35748792270532, "grad_norm": 0.4558785557746887, "learning_rate": 0.001, "loss": 1.7945, "step": 467824 }, { "epoch": 40.36231884057971, "grad_norm": 0.42154136300086975, "learning_rate": 0.001, "loss": 1.8001, "step": 467880 }, { "epoch": 40.367149758454104, "grad_norm": 9.063130378723145, "learning_rate": 0.001, "loss": 1.7923, "step": 467936 }, { "epoch": 40.371980676328505, "grad_norm": 0.9229263067245483, "learning_rate": 0.001, "loss": 1.7841, "step": 467992 }, { "epoch": 40.3768115942029, "grad_norm": 2.44097900390625, "learning_rate": 0.001, "loss": 1.7879, "step": 468048 }, { "epoch": 40.38164251207729, "grad_norm": 3.8910253047943115, "learning_rate": 0.001, "loss": 1.789, "step": 468104 }, { "epoch": 40.38647342995169, "grad_norm": 0.7099465727806091, "learning_rate": 0.001, "loss": 1.7959, "step": 468160 }, { "epoch": 40.391304347826086, "grad_norm": 0.48814600706100464, "learning_rate": 0.001, "loss": 1.7959, "step": 468216 }, { "epoch": 40.39613526570048, "grad_norm": 3.358279228210449, "learning_rate": 0.001, "loss": 1.8128, "step": 468272 }, { "epoch": 40.40096618357488, "grad_norm": 0.5397480726242065, "learning_rate": 0.001, "loss": 1.8036, "step": 468328 }, { "epoch": 40.405797101449274, "grad_norm": 1.929792881011963, "learning_rate": 0.001, "loss": 1.8015, "step": 468384 }, { "epoch": 40.410628019323674, "grad_norm": 0.45460107922554016, "learning_rate": 0.001, "loss": 1.8084, "step": 468440 }, { "epoch": 40.41545893719807, "grad_norm": 0.34679287672042847, "learning_rate": 0.001, "loss": 1.7966, "step": 468496 }, { "epoch": 40.42028985507246, "grad_norm": 1.0440071821212769, "learning_rate": 0.001, "loss": 1.7897, "step": 468552 }, { "epoch": 40.42512077294686, "grad_norm": 0.316698282957077, "learning_rate": 0.001, "loss": 1.7985, "step": 468608 }, { "epoch": 40.429951690821255, "grad_norm": 4.706199645996094, "learning_rate": 0.001, "loss": 1.7846, "step": 468664 }, { "epoch": 40.43478260869565, "grad_norm": 0.5142912864685059, "learning_rate": 0.001, "loss": 1.7943, "step": 468720 }, { "epoch": 40.43961352657005, "grad_norm": 0.34487712383270264, "learning_rate": 0.001, "loss": 1.7988, "step": 468776 }, { "epoch": 40.44444444444444, "grad_norm": 3.8424384593963623, "learning_rate": 0.001, "loss": 1.7842, "step": 468832 }, { "epoch": 40.44927536231884, "grad_norm": 0.6596300601959229, "learning_rate": 0.001, "loss": 1.7865, "step": 468888 }, { "epoch": 40.45410628019324, "grad_norm": 0.3248670697212219, "learning_rate": 0.001, "loss": 1.7873, "step": 468944 }, { "epoch": 40.45893719806763, "grad_norm": 2.6557395458221436, "learning_rate": 0.001, "loss": 1.7878, "step": 469000 }, { "epoch": 40.46376811594203, "grad_norm": 0.3085186779499054, "learning_rate": 0.001, "loss": 1.7935, "step": 469056 }, { "epoch": 40.468599033816425, "grad_norm": 0.3560912609100342, "learning_rate": 0.001, "loss": 1.7973, "step": 469112 }, { "epoch": 40.47342995169082, "grad_norm": 5.407197952270508, "learning_rate": 0.001, "loss": 1.797, "step": 469168 }, { "epoch": 40.47826086956522, "grad_norm": 0.768102765083313, "learning_rate": 0.001, "loss": 1.7848, "step": 469224 }, { "epoch": 40.48309178743961, "grad_norm": 0.41351887583732605, "learning_rate": 0.001, "loss": 1.7851, "step": 469280 }, { "epoch": 40.48792270531401, "grad_norm": 1.4266083240509033, "learning_rate": 0.001, "loss": 1.7875, "step": 469336 }, { "epoch": 40.492753623188406, "grad_norm": 0.7370843291282654, "learning_rate": 0.001, "loss": 1.7774, "step": 469392 }, { "epoch": 40.4975845410628, "grad_norm": 1.0718393325805664, "learning_rate": 0.001, "loss": 1.7945, "step": 469448 }, { "epoch": 40.5024154589372, "grad_norm": 0.7308846712112427, "learning_rate": 0.001, "loss": 1.7811, "step": 469504 }, { "epoch": 40.507246376811594, "grad_norm": 0.43245819211006165, "learning_rate": 0.001, "loss": 1.7922, "step": 469560 }, { "epoch": 40.51207729468599, "grad_norm": 0.6871855854988098, "learning_rate": 0.001, "loss": 1.7985, "step": 469616 }, { "epoch": 40.51690821256039, "grad_norm": 3.2350080013275146, "learning_rate": 0.001, "loss": 1.7913, "step": 469672 }, { "epoch": 40.52173913043478, "grad_norm": 0.330098420381546, "learning_rate": 0.001, "loss": 1.7987, "step": 469728 }, { "epoch": 40.52657004830918, "grad_norm": 1.250045657157898, "learning_rate": 0.001, "loss": 1.7906, "step": 469784 }, { "epoch": 40.531400966183575, "grad_norm": 1.5717312097549438, "learning_rate": 0.001, "loss": 1.8026, "step": 469840 }, { "epoch": 40.53623188405797, "grad_norm": 1.2172527313232422, "learning_rate": 0.001, "loss": 1.7787, "step": 469896 }, { "epoch": 40.54106280193237, "grad_norm": 1.6189872026443481, "learning_rate": 0.001, "loss": 1.7867, "step": 469952 }, { "epoch": 40.54589371980676, "grad_norm": 0.734259843826294, "learning_rate": 0.001, "loss": 1.7887, "step": 470008 }, { "epoch": 40.55072463768116, "grad_norm": 0.9472125172615051, "learning_rate": 0.001, "loss": 1.7851, "step": 470064 }, { "epoch": 40.55555555555556, "grad_norm": 1.1033351421356201, "learning_rate": 0.001, "loss": 1.7915, "step": 470120 }, { "epoch": 40.56038647342995, "grad_norm": 0.8355786800384521, "learning_rate": 0.001, "loss": 1.7946, "step": 470176 }, { "epoch": 40.56521739130435, "grad_norm": 12.913107872009277, "learning_rate": 0.001, "loss": 1.7957, "step": 470232 }, { "epoch": 40.570048309178745, "grad_norm": 2.3686106204986572, "learning_rate": 0.001, "loss": 1.7918, "step": 470288 }, { "epoch": 40.57487922705314, "grad_norm": 0.5766381621360779, "learning_rate": 0.001, "loss": 1.8085, "step": 470344 }, { "epoch": 40.57971014492754, "grad_norm": 1.085120439529419, "learning_rate": 0.001, "loss": 1.8036, "step": 470400 }, { "epoch": 40.58454106280193, "grad_norm": 2.063915252685547, "learning_rate": 0.001, "loss": 1.8052, "step": 470456 }, { "epoch": 40.589371980676326, "grad_norm": 0.5404335260391235, "learning_rate": 0.001, "loss": 1.7923, "step": 470512 }, { "epoch": 40.594202898550726, "grad_norm": 3.4098215103149414, "learning_rate": 0.001, "loss": 1.7998, "step": 470568 }, { "epoch": 40.59903381642512, "grad_norm": 2.08225679397583, "learning_rate": 0.001, "loss": 1.804, "step": 470624 }, { "epoch": 40.60386473429952, "grad_norm": 1.1381951570510864, "learning_rate": 0.001, "loss": 1.8026, "step": 470680 }, { "epoch": 40.608695652173914, "grad_norm": 1.9227761030197144, "learning_rate": 0.001, "loss": 1.7948, "step": 470736 }, { "epoch": 40.61352657004831, "grad_norm": 0.3060775399208069, "learning_rate": 0.001, "loss": 1.8048, "step": 470792 }, { "epoch": 40.61835748792271, "grad_norm": 0.3622453212738037, "learning_rate": 0.001, "loss": 1.8031, "step": 470848 }, { "epoch": 40.6231884057971, "grad_norm": 0.7300992608070374, "learning_rate": 0.001, "loss": 1.786, "step": 470904 }, { "epoch": 40.628019323671495, "grad_norm": 2.353120803833008, "learning_rate": 0.001, "loss": 1.7869, "step": 470960 }, { "epoch": 40.632850241545896, "grad_norm": 4.230882167816162, "learning_rate": 0.001, "loss": 1.7915, "step": 471016 }, { "epoch": 40.63768115942029, "grad_norm": 5.836760520935059, "learning_rate": 0.001, "loss": 1.8007, "step": 471072 }, { "epoch": 40.64251207729468, "grad_norm": 1.0078600645065308, "learning_rate": 0.001, "loss": 1.7984, "step": 471128 }, { "epoch": 40.64734299516908, "grad_norm": 1.9425383806228638, "learning_rate": 0.001, "loss": 1.7963, "step": 471184 }, { "epoch": 40.65217391304348, "grad_norm": 2.351778030395508, "learning_rate": 0.001, "loss": 1.7989, "step": 471240 }, { "epoch": 40.65700483091788, "grad_norm": 1.2992092370986938, "learning_rate": 0.001, "loss": 1.8081, "step": 471296 }, { "epoch": 40.66183574879227, "grad_norm": 0.8100602626800537, "learning_rate": 0.001, "loss": 1.7981, "step": 471352 }, { "epoch": 40.666666666666664, "grad_norm": 0.7669732570648193, "learning_rate": 0.001, "loss": 1.7953, "step": 471408 }, { "epoch": 40.671497584541065, "grad_norm": 0.9874126315116882, "learning_rate": 0.001, "loss": 1.7905, "step": 471464 }, { "epoch": 40.67632850241546, "grad_norm": 2.1374104022979736, "learning_rate": 0.001, "loss": 1.7949, "step": 471520 }, { "epoch": 40.68115942028985, "grad_norm": 1.9910579919815063, "learning_rate": 0.001, "loss": 1.7917, "step": 471576 }, { "epoch": 40.68599033816425, "grad_norm": 0.8126847147941589, "learning_rate": 0.001, "loss": 1.7862, "step": 471632 }, { "epoch": 40.690821256038646, "grad_norm": 1.6848032474517822, "learning_rate": 0.001, "loss": 1.7846, "step": 471688 }, { "epoch": 40.69565217391305, "grad_norm": 5.48894739151001, "learning_rate": 0.001, "loss": 1.7922, "step": 471744 }, { "epoch": 40.70048309178744, "grad_norm": 15.806963920593262, "learning_rate": 0.001, "loss": 1.796, "step": 471800 }, { "epoch": 40.70531400966183, "grad_norm": 1.817166805267334, "learning_rate": 0.001, "loss": 1.7947, "step": 471856 }, { "epoch": 40.710144927536234, "grad_norm": 1.236768126487732, "learning_rate": 0.001, "loss": 1.7957, "step": 471912 }, { "epoch": 40.71497584541063, "grad_norm": 0.7347263693809509, "learning_rate": 0.001, "loss": 1.8026, "step": 471968 }, { "epoch": 40.71980676328502, "grad_norm": 0.33844712376594543, "learning_rate": 0.001, "loss": 1.8077, "step": 472024 }, { "epoch": 40.72463768115942, "grad_norm": 0.2523566484451294, "learning_rate": 0.001, "loss": 1.8086, "step": 472080 }, { "epoch": 40.729468599033815, "grad_norm": 0.5487380623817444, "learning_rate": 0.001, "loss": 1.8063, "step": 472136 }, { "epoch": 40.734299516908216, "grad_norm": 0.2891555726528168, "learning_rate": 0.001, "loss": 1.8087, "step": 472192 }, { "epoch": 40.73913043478261, "grad_norm": 1.0195664167404175, "learning_rate": 0.001, "loss": 1.8053, "step": 472248 }, { "epoch": 40.743961352657, "grad_norm": 0.3404592275619507, "learning_rate": 0.001, "loss": 1.8011, "step": 472304 }, { "epoch": 40.7487922705314, "grad_norm": 0.2918546795845032, "learning_rate": 0.001, "loss": 1.7996, "step": 472360 }, { "epoch": 40.7536231884058, "grad_norm": 1.090384602546692, "learning_rate": 0.001, "loss": 1.7853, "step": 472416 }, { "epoch": 40.75845410628019, "grad_norm": 0.5601615905761719, "learning_rate": 0.001, "loss": 1.7972, "step": 472472 }, { "epoch": 40.76328502415459, "grad_norm": 1.242680311203003, "learning_rate": 0.001, "loss": 1.7908, "step": 472528 }, { "epoch": 40.768115942028984, "grad_norm": 5.124416828155518, "learning_rate": 0.001, "loss": 1.7971, "step": 472584 }, { "epoch": 40.772946859903385, "grad_norm": 1.741362452507019, "learning_rate": 0.001, "loss": 1.7985, "step": 472640 }, { "epoch": 40.77777777777778, "grad_norm": 1.7197939157485962, "learning_rate": 0.001, "loss": 1.8017, "step": 472696 }, { "epoch": 40.78260869565217, "grad_norm": 0.6404716372489929, "learning_rate": 0.001, "loss": 1.8076, "step": 472752 }, { "epoch": 40.78743961352657, "grad_norm": 0.95561283826828, "learning_rate": 0.001, "loss": 1.8056, "step": 472808 }, { "epoch": 40.792270531400966, "grad_norm": 2.1630289554595947, "learning_rate": 0.001, "loss": 1.8173, "step": 472864 }, { "epoch": 40.79710144927536, "grad_norm": 0.44083094596862793, "learning_rate": 0.001, "loss": 1.8341, "step": 472920 }, { "epoch": 40.80193236714976, "grad_norm": 5.592893600463867, "learning_rate": 0.001, "loss": 1.8339, "step": 472976 }, { "epoch": 40.806763285024154, "grad_norm": 1.3401254415512085, "learning_rate": 0.001, "loss": 1.8278, "step": 473032 }, { "epoch": 40.81159420289855, "grad_norm": 2.299884080886841, "learning_rate": 0.001, "loss": 1.8097, "step": 473088 }, { "epoch": 40.81642512077295, "grad_norm": 1.4739097356796265, "learning_rate": 0.001, "loss": 1.8035, "step": 473144 }, { "epoch": 40.82125603864734, "grad_norm": 1.2621538639068604, "learning_rate": 0.001, "loss": 1.8056, "step": 473200 }, { "epoch": 40.82608695652174, "grad_norm": 3.414216995239258, "learning_rate": 0.001, "loss": 1.8027, "step": 473256 }, { "epoch": 40.830917874396135, "grad_norm": 0.6508470177650452, "learning_rate": 0.001, "loss": 1.8106, "step": 473312 }, { "epoch": 40.83574879227053, "grad_norm": 0.2761024534702301, "learning_rate": 0.001, "loss": 1.8053, "step": 473368 }, { "epoch": 40.84057971014493, "grad_norm": 0.42463451623916626, "learning_rate": 0.001, "loss": 1.8063, "step": 473424 }, { "epoch": 40.84541062801932, "grad_norm": 0.9954354763031006, "learning_rate": 0.001, "loss": 1.8115, "step": 473480 }, { "epoch": 40.85024154589372, "grad_norm": 0.4295814633369446, "learning_rate": 0.001, "loss": 1.799, "step": 473536 }, { "epoch": 40.85507246376812, "grad_norm": 0.3261200487613678, "learning_rate": 0.001, "loss": 1.802, "step": 473592 }, { "epoch": 40.85990338164251, "grad_norm": 0.72611403465271, "learning_rate": 0.001, "loss": 1.8004, "step": 473648 }, { "epoch": 40.86473429951691, "grad_norm": 0.2879563868045807, "learning_rate": 0.001, "loss": 1.8048, "step": 473704 }, { "epoch": 40.869565217391305, "grad_norm": 0.28102660179138184, "learning_rate": 0.001, "loss": 1.8084, "step": 473760 }, { "epoch": 40.8743961352657, "grad_norm": 0.3959755003452301, "learning_rate": 0.001, "loss": 1.8062, "step": 473816 }, { "epoch": 40.8792270531401, "grad_norm": 0.522480309009552, "learning_rate": 0.001, "loss": 1.7929, "step": 473872 }, { "epoch": 40.88405797101449, "grad_norm": 0.4061656594276428, "learning_rate": 0.001, "loss": 1.8045, "step": 473928 }, { "epoch": 40.888888888888886, "grad_norm": 0.33784377574920654, "learning_rate": 0.001, "loss": 1.8011, "step": 473984 }, { "epoch": 40.893719806763286, "grad_norm": 0.38558530807495117, "learning_rate": 0.001, "loss": 1.7955, "step": 474040 }, { "epoch": 40.89855072463768, "grad_norm": 0.7068955302238464, "learning_rate": 0.001, "loss": 1.8, "step": 474096 }, { "epoch": 40.90338164251208, "grad_norm": 0.34119927883148193, "learning_rate": 0.001, "loss": 1.7966, "step": 474152 }, { "epoch": 40.908212560386474, "grad_norm": 1.0256468057632446, "learning_rate": 0.001, "loss": 1.7939, "step": 474208 }, { "epoch": 40.91304347826087, "grad_norm": 0.42362144589424133, "learning_rate": 0.001, "loss": 1.7926, "step": 474264 }, { "epoch": 40.91787439613527, "grad_norm": 1.646641731262207, "learning_rate": 0.001, "loss": 1.7942, "step": 474320 }, { "epoch": 40.92270531400966, "grad_norm": 0.718420147895813, "learning_rate": 0.001, "loss": 1.7942, "step": 474376 }, { "epoch": 40.927536231884055, "grad_norm": 1.3165122270584106, "learning_rate": 0.001, "loss": 1.7888, "step": 474432 }, { "epoch": 40.932367149758456, "grad_norm": 0.7424231171607971, "learning_rate": 0.001, "loss": 1.7939, "step": 474488 }, { "epoch": 40.93719806763285, "grad_norm": 0.6402720808982849, "learning_rate": 0.001, "loss": 1.7992, "step": 474544 }, { "epoch": 40.94202898550725, "grad_norm": 0.40274515748023987, "learning_rate": 0.001, "loss": 1.7914, "step": 474600 }, { "epoch": 40.94685990338164, "grad_norm": 0.5087569952011108, "learning_rate": 0.001, "loss": 1.8068, "step": 474656 }, { "epoch": 40.95169082125604, "grad_norm": 0.31640368700027466, "learning_rate": 0.001, "loss": 1.8043, "step": 474712 }, { "epoch": 40.95652173913044, "grad_norm": 0.6490268707275391, "learning_rate": 0.001, "loss": 1.8093, "step": 474768 }, { "epoch": 40.96135265700483, "grad_norm": 0.3309587836265564, "learning_rate": 0.001, "loss": 1.7946, "step": 474824 }, { "epoch": 40.966183574879224, "grad_norm": 0.3931916654109955, "learning_rate": 0.001, "loss": 1.7958, "step": 474880 }, { "epoch": 40.971014492753625, "grad_norm": 0.5302411913871765, "learning_rate": 0.001, "loss": 1.7975, "step": 474936 }, { "epoch": 40.97584541062802, "grad_norm": 0.850965678691864, "learning_rate": 0.001, "loss": 1.7986, "step": 474992 }, { "epoch": 40.98067632850242, "grad_norm": 2.373933792114258, "learning_rate": 0.001, "loss": 1.8104, "step": 475048 }, { "epoch": 40.98550724637681, "grad_norm": 2.4777591228485107, "learning_rate": 0.001, "loss": 1.8295, "step": 475104 }, { "epoch": 40.990338164251206, "grad_norm": 0.3572227954864502, "learning_rate": 0.001, "loss": 1.8322, "step": 475160 }, { "epoch": 40.99516908212561, "grad_norm": 0.9296237230300903, "learning_rate": 0.001, "loss": 1.822, "step": 475216 }, { "epoch": 41.0, "grad_norm": 0.8926190137863159, "learning_rate": 0.001, "loss": 1.8112, "step": 475272 }, { "epoch": 41.00483091787439, "grad_norm": 2.0439271926879883, "learning_rate": 0.001, "loss": 1.7837, "step": 475328 }, { "epoch": 41.009661835748794, "grad_norm": 0.3749220073223114, "learning_rate": 0.001, "loss": 1.7718, "step": 475384 }, { "epoch": 41.01449275362319, "grad_norm": 0.5885409712791443, "learning_rate": 0.001, "loss": 1.7551, "step": 475440 }, { "epoch": 41.01932367149758, "grad_norm": 0.5109129548072815, "learning_rate": 0.001, "loss": 1.7761, "step": 475496 }, { "epoch": 41.02415458937198, "grad_norm": 0.4802860617637634, "learning_rate": 0.001, "loss": 1.7783, "step": 475552 }, { "epoch": 41.028985507246375, "grad_norm": 0.4525991678237915, "learning_rate": 0.001, "loss": 1.77, "step": 475608 }, { "epoch": 41.033816425120776, "grad_norm": 5.876457214355469, "learning_rate": 0.001, "loss": 1.776, "step": 475664 }, { "epoch": 41.03864734299517, "grad_norm": 1.0678629875183105, "learning_rate": 0.001, "loss": 1.7699, "step": 475720 }, { "epoch": 41.04347826086956, "grad_norm": 5.873805046081543, "learning_rate": 0.001, "loss": 1.7627, "step": 475776 }, { "epoch": 41.04830917874396, "grad_norm": 7.068751335144043, "learning_rate": 0.001, "loss": 1.7577, "step": 475832 }, { "epoch": 41.05314009661836, "grad_norm": 0.4418167769908905, "learning_rate": 0.001, "loss": 1.7756, "step": 475888 }, { "epoch": 41.05797101449275, "grad_norm": 1.9148973226547241, "learning_rate": 0.001, "loss": 1.7767, "step": 475944 }, { "epoch": 41.06280193236715, "grad_norm": 0.36009693145751953, "learning_rate": 0.001, "loss": 1.7742, "step": 476000 }, { "epoch": 41.067632850241544, "grad_norm": 0.34679704904556274, "learning_rate": 0.001, "loss": 1.7679, "step": 476056 }, { "epoch": 41.072463768115945, "grad_norm": 2.6572606563568115, "learning_rate": 0.001, "loss": 1.7737, "step": 476112 }, { "epoch": 41.07729468599034, "grad_norm": 0.3586326539516449, "learning_rate": 0.001, "loss": 1.7716, "step": 476168 }, { "epoch": 41.08212560386473, "grad_norm": 0.3348504304885864, "learning_rate": 0.001, "loss": 1.7687, "step": 476224 }, { "epoch": 41.08695652173913, "grad_norm": 1.4983302354812622, "learning_rate": 0.001, "loss": 1.769, "step": 476280 }, { "epoch": 41.091787439613526, "grad_norm": 0.6238147020339966, "learning_rate": 0.001, "loss": 1.7625, "step": 476336 }, { "epoch": 41.09661835748792, "grad_norm": 0.43684133887290955, "learning_rate": 0.001, "loss": 1.7535, "step": 476392 }, { "epoch": 41.10144927536232, "grad_norm": 0.6273221373558044, "learning_rate": 0.001, "loss": 1.775, "step": 476448 }, { "epoch": 41.106280193236714, "grad_norm": 0.33252567052841187, "learning_rate": 0.001, "loss": 1.7813, "step": 476504 }, { "epoch": 41.111111111111114, "grad_norm": 1.2029973268508911, "learning_rate": 0.001, "loss": 1.7839, "step": 476560 }, { "epoch": 41.11594202898551, "grad_norm": 0.7935737371444702, "learning_rate": 0.001, "loss": 1.7744, "step": 476616 }, { "epoch": 41.1207729468599, "grad_norm": 0.3420565724372864, "learning_rate": 0.001, "loss": 1.7688, "step": 476672 }, { "epoch": 41.1256038647343, "grad_norm": 0.4086375832557678, "learning_rate": 0.001, "loss": 1.7676, "step": 476728 }, { "epoch": 41.130434782608695, "grad_norm": 0.7315663695335388, "learning_rate": 0.001, "loss": 1.7666, "step": 476784 }, { "epoch": 41.13526570048309, "grad_norm": 1.3200725317001343, "learning_rate": 0.001, "loss": 1.7617, "step": 476840 }, { "epoch": 41.14009661835749, "grad_norm": 0.36857759952545166, "learning_rate": 0.001, "loss": 1.7746, "step": 476896 }, { "epoch": 41.14492753623188, "grad_norm": 2.1257357597351074, "learning_rate": 0.001, "loss": 1.7752, "step": 476952 }, { "epoch": 41.14975845410628, "grad_norm": 18.307849884033203, "learning_rate": 0.001, "loss": 1.768, "step": 477008 }, { "epoch": 41.15458937198068, "grad_norm": 15.964292526245117, "learning_rate": 0.001, "loss": 1.7659, "step": 477064 }, { "epoch": 41.15942028985507, "grad_norm": 1.7357410192489624, "learning_rate": 0.001, "loss": 1.7811, "step": 477120 }, { "epoch": 41.16425120772947, "grad_norm": 2.8916144371032715, "learning_rate": 0.001, "loss": 1.7766, "step": 477176 }, { "epoch": 41.169082125603865, "grad_norm": 9.587381362915039, "learning_rate": 0.001, "loss": 1.7754, "step": 477232 }, { "epoch": 41.17391304347826, "grad_norm": 10.051681518554688, "learning_rate": 0.001, "loss": 1.7862, "step": 477288 }, { "epoch": 41.17874396135266, "grad_norm": 0.5565129518508911, "learning_rate": 0.001, "loss": 1.8049, "step": 477344 }, { "epoch": 41.18357487922705, "grad_norm": 0.4959957003593445, "learning_rate": 0.001, "loss": 1.8187, "step": 477400 }, { "epoch": 41.18840579710145, "grad_norm": 3.313873052597046, "learning_rate": 0.001, "loss": 1.8037, "step": 477456 }, { "epoch": 41.193236714975846, "grad_norm": 0.4376958906650543, "learning_rate": 0.001, "loss": 1.7858, "step": 477512 }, { "epoch": 41.19806763285024, "grad_norm": 0.4345230460166931, "learning_rate": 0.001, "loss": 1.7866, "step": 477568 }, { "epoch": 41.20289855072464, "grad_norm": 2.190399646759033, "learning_rate": 0.001, "loss": 1.7844, "step": 477624 }, { "epoch": 41.207729468599034, "grad_norm": 0.5309112071990967, "learning_rate": 0.001, "loss": 1.782, "step": 477680 }, { "epoch": 41.21256038647343, "grad_norm": 0.5927920937538147, "learning_rate": 0.001, "loss": 1.767, "step": 477736 }, { "epoch": 41.21739130434783, "grad_norm": 0.5471197366714478, "learning_rate": 0.001, "loss": 1.7655, "step": 477792 }, { "epoch": 41.22222222222222, "grad_norm": 0.3584359288215637, "learning_rate": 0.001, "loss": 1.772, "step": 477848 }, { "epoch": 41.227053140096615, "grad_norm": 0.6914898157119751, "learning_rate": 0.001, "loss": 1.7711, "step": 477904 }, { "epoch": 41.231884057971016, "grad_norm": 1.2572054862976074, "learning_rate": 0.001, "loss": 1.7746, "step": 477960 }, { "epoch": 41.23671497584541, "grad_norm": 2.600163221359253, "learning_rate": 0.001, "loss": 1.7854, "step": 478016 }, { "epoch": 41.24154589371981, "grad_norm": 1.1626826524734497, "learning_rate": 0.001, "loss": 1.78, "step": 478072 }, { "epoch": 41.2463768115942, "grad_norm": 0.4736309349536896, "learning_rate": 0.001, "loss": 1.7716, "step": 478128 }, { "epoch": 41.2512077294686, "grad_norm": 0.49782654643058777, "learning_rate": 0.001, "loss": 1.7727, "step": 478184 }, { "epoch": 41.256038647343, "grad_norm": 1.4296596050262451, "learning_rate": 0.001, "loss": 1.7736, "step": 478240 }, { "epoch": 41.26086956521739, "grad_norm": 1.2838565111160278, "learning_rate": 0.001, "loss": 1.7755, "step": 478296 }, { "epoch": 41.265700483091784, "grad_norm": 0.30322641134262085, "learning_rate": 0.001, "loss": 1.7734, "step": 478352 }, { "epoch": 41.270531400966185, "grad_norm": 0.558612585067749, "learning_rate": 0.001, "loss": 1.7792, "step": 478408 }, { "epoch": 41.27536231884058, "grad_norm": 0.991550624370575, "learning_rate": 0.001, "loss": 1.7751, "step": 478464 }, { "epoch": 41.28019323671498, "grad_norm": 3.387787103652954, "learning_rate": 0.001, "loss": 1.7813, "step": 478520 }, { "epoch": 41.28502415458937, "grad_norm": 0.44729992747306824, "learning_rate": 0.001, "loss": 1.7791, "step": 478576 }, { "epoch": 41.289855072463766, "grad_norm": 0.42040523886680603, "learning_rate": 0.001, "loss": 1.773, "step": 478632 }, { "epoch": 41.29468599033817, "grad_norm": 0.5031183362007141, "learning_rate": 0.001, "loss": 1.7819, "step": 478688 }, { "epoch": 41.29951690821256, "grad_norm": 5.760951042175293, "learning_rate": 0.001, "loss": 1.7731, "step": 478744 }, { "epoch": 41.30434782608695, "grad_norm": 0.3674054741859436, "learning_rate": 0.001, "loss": 1.7887, "step": 478800 }, { "epoch": 41.309178743961354, "grad_norm": 0.3358306586742401, "learning_rate": 0.001, "loss": 1.783, "step": 478856 }, { "epoch": 41.31400966183575, "grad_norm": 0.7326270341873169, "learning_rate": 0.001, "loss": 1.7937, "step": 478912 }, { "epoch": 41.31884057971015, "grad_norm": 1.3335152864456177, "learning_rate": 0.001, "loss": 1.7949, "step": 478968 }, { "epoch": 41.32367149758454, "grad_norm": 1.9383679628372192, "learning_rate": 0.001, "loss": 1.8, "step": 479024 }, { "epoch": 41.328502415458935, "grad_norm": 0.5108675956726074, "learning_rate": 0.001, "loss": 1.7908, "step": 479080 }, { "epoch": 41.333333333333336, "grad_norm": 0.6211147904396057, "learning_rate": 0.001, "loss": 1.7871, "step": 479136 }, { "epoch": 41.33816425120773, "grad_norm": 1.3744021654129028, "learning_rate": 0.001, "loss": 1.7883, "step": 479192 }, { "epoch": 41.34299516908212, "grad_norm": 0.4898088872432709, "learning_rate": 0.001, "loss": 1.7749, "step": 479248 }, { "epoch": 41.34782608695652, "grad_norm": 8.637784004211426, "learning_rate": 0.001, "loss": 1.7906, "step": 479304 }, { "epoch": 41.35265700483092, "grad_norm": 0.5045645236968994, "learning_rate": 0.001, "loss": 1.7901, "step": 479360 }, { "epoch": 41.35748792270532, "grad_norm": 2.35829496383667, "learning_rate": 0.001, "loss": 1.7951, "step": 479416 }, { "epoch": 41.36231884057971, "grad_norm": 3.2063772678375244, "learning_rate": 0.001, "loss": 1.7871, "step": 479472 }, { "epoch": 41.367149758454104, "grad_norm": 8.71304702758789, "learning_rate": 0.001, "loss": 1.7794, "step": 479528 }, { "epoch": 41.371980676328505, "grad_norm": 0.27432936429977417, "learning_rate": 0.001, "loss": 1.7908, "step": 479584 }, { "epoch": 41.3768115942029, "grad_norm": 0.48368749022483826, "learning_rate": 0.001, "loss": 1.801, "step": 479640 }, { "epoch": 41.38164251207729, "grad_norm": 1.5829427242279053, "learning_rate": 0.001, "loss": 1.8054, "step": 479696 }, { "epoch": 41.38647342995169, "grad_norm": 1.237141489982605, "learning_rate": 0.001, "loss": 1.7911, "step": 479752 }, { "epoch": 41.391304347826086, "grad_norm": 0.4113626778125763, "learning_rate": 0.001, "loss": 1.7925, "step": 479808 }, { "epoch": 41.39613526570048, "grad_norm": 3.046964168548584, "learning_rate": 0.001, "loss": 1.7911, "step": 479864 }, { "epoch": 41.40096618357488, "grad_norm": 3.221512794494629, "learning_rate": 0.001, "loss": 1.7843, "step": 479920 }, { "epoch": 41.405797101449274, "grad_norm": 0.30573770403862, "learning_rate": 0.001, "loss": 1.7887, "step": 479976 }, { "epoch": 41.410628019323674, "grad_norm": 0.3555962145328522, "learning_rate": 0.001, "loss": 1.7874, "step": 480032 }, { "epoch": 41.41545893719807, "grad_norm": 11.715228080749512, "learning_rate": 0.001, "loss": 1.7906, "step": 480088 }, { "epoch": 41.42028985507246, "grad_norm": 0.35159745812416077, "learning_rate": 0.001, "loss": 1.7796, "step": 480144 }, { "epoch": 41.42512077294686, "grad_norm": 0.8535027503967285, "learning_rate": 0.001, "loss": 1.7848, "step": 480200 }, { "epoch": 41.429951690821255, "grad_norm": 3.7386040687561035, "learning_rate": 0.001, "loss": 1.7768, "step": 480256 }, { "epoch": 41.43478260869565, "grad_norm": 0.301082581281662, "learning_rate": 0.001, "loss": 1.7808, "step": 480312 }, { "epoch": 41.43961352657005, "grad_norm": 0.3548678755760193, "learning_rate": 0.001, "loss": 1.7732, "step": 480368 }, { "epoch": 41.44444444444444, "grad_norm": 1.2450335025787354, "learning_rate": 0.001, "loss": 1.7715, "step": 480424 }, { "epoch": 41.44927536231884, "grad_norm": 0.5334143042564392, "learning_rate": 0.001, "loss": 1.7803, "step": 480480 }, { "epoch": 41.45410628019324, "grad_norm": 1.6901187896728516, "learning_rate": 0.001, "loss": 1.7874, "step": 480536 }, { "epoch": 41.45893719806763, "grad_norm": 0.4750899076461792, "learning_rate": 0.001, "loss": 1.7804, "step": 480592 }, { "epoch": 41.46376811594203, "grad_norm": 0.3533329367637634, "learning_rate": 0.001, "loss": 1.7864, "step": 480648 }, { "epoch": 41.468599033816425, "grad_norm": 8.500853538513184, "learning_rate": 0.001, "loss": 1.787, "step": 480704 }, { "epoch": 41.47342995169082, "grad_norm": 0.7980721592903137, "learning_rate": 0.001, "loss": 1.7814, "step": 480760 }, { "epoch": 41.47826086956522, "grad_norm": 1.0998457670211792, "learning_rate": 0.001, "loss": 1.7812, "step": 480816 }, { "epoch": 41.48309178743961, "grad_norm": 1.9388716220855713, "learning_rate": 0.001, "loss": 1.7766, "step": 480872 }, { "epoch": 41.48792270531401, "grad_norm": 0.5676780343055725, "learning_rate": 0.001, "loss": 1.7859, "step": 480928 }, { "epoch": 41.492753623188406, "grad_norm": 0.3662495017051697, "learning_rate": 0.001, "loss": 1.7915, "step": 480984 }, { "epoch": 41.4975845410628, "grad_norm": 1.3654133081436157, "learning_rate": 0.001, "loss": 1.7959, "step": 481040 }, { "epoch": 41.5024154589372, "grad_norm": 0.5599485039710999, "learning_rate": 0.001, "loss": 1.7949, "step": 481096 }, { "epoch": 41.507246376811594, "grad_norm": 1.0812079906463623, "learning_rate": 0.001, "loss": 1.7809, "step": 481152 }, { "epoch": 41.51207729468599, "grad_norm": 0.378946989774704, "learning_rate": 0.001, "loss": 1.7828, "step": 481208 }, { "epoch": 41.51690821256039, "grad_norm": 0.593230664730072, "learning_rate": 0.001, "loss": 1.7786, "step": 481264 }, { "epoch": 41.52173913043478, "grad_norm": 0.4693809747695923, "learning_rate": 0.001, "loss": 1.7833, "step": 481320 }, { "epoch": 41.52657004830918, "grad_norm": 1.0312747955322266, "learning_rate": 0.001, "loss": 1.7774, "step": 481376 }, { "epoch": 41.531400966183575, "grad_norm": 0.37058448791503906, "learning_rate": 0.001, "loss": 1.7772, "step": 481432 }, { "epoch": 41.53623188405797, "grad_norm": 0.701335608959198, "learning_rate": 0.001, "loss": 1.7823, "step": 481488 }, { "epoch": 41.54106280193237, "grad_norm": 0.3499172031879425, "learning_rate": 0.001, "loss": 1.7807, "step": 481544 }, { "epoch": 41.54589371980676, "grad_norm": 0.6889649033546448, "learning_rate": 0.001, "loss": 1.7807, "step": 481600 }, { "epoch": 41.55072463768116, "grad_norm": 0.3357560634613037, "learning_rate": 0.001, "loss": 1.7731, "step": 481656 }, { "epoch": 41.55555555555556, "grad_norm": 0.32627397775650024, "learning_rate": 0.001, "loss": 1.7773, "step": 481712 }, { "epoch": 41.56038647342995, "grad_norm": 0.6696512699127197, "learning_rate": 0.001, "loss": 1.7664, "step": 481768 }, { "epoch": 41.56521739130435, "grad_norm": 0.6523452401161194, "learning_rate": 0.001, "loss": 1.7703, "step": 481824 }, { "epoch": 41.570048309178745, "grad_norm": 1.4383167028427124, "learning_rate": 0.001, "loss": 1.7717, "step": 481880 }, { "epoch": 41.57487922705314, "grad_norm": 0.39921385049819946, "learning_rate": 0.001, "loss": 1.7794, "step": 481936 }, { "epoch": 41.57971014492754, "grad_norm": 0.3504568636417389, "learning_rate": 0.001, "loss": 1.7689, "step": 481992 }, { "epoch": 41.58454106280193, "grad_norm": 0.26321402192115784, "learning_rate": 0.001, "loss": 1.7827, "step": 482048 }, { "epoch": 41.589371980676326, "grad_norm": 0.5420135855674744, "learning_rate": 0.001, "loss": 1.7702, "step": 482104 }, { "epoch": 41.594202898550726, "grad_norm": 0.41049522161483765, "learning_rate": 0.001, "loss": 1.7694, "step": 482160 }, { "epoch": 41.59903381642512, "grad_norm": 0.2921530306339264, "learning_rate": 0.001, "loss": 1.7693, "step": 482216 }, { "epoch": 41.60386473429952, "grad_norm": 0.2506851851940155, "learning_rate": 0.001, "loss": 1.7731, "step": 482272 }, { "epoch": 41.608695652173914, "grad_norm": 0.40679728984832764, "learning_rate": 0.001, "loss": 1.7669, "step": 482328 }, { "epoch": 41.61352657004831, "grad_norm": 0.3142333924770355, "learning_rate": 0.001, "loss": 1.7677, "step": 482384 }, { "epoch": 41.61835748792271, "grad_norm": 0.3366691768169403, "learning_rate": 0.001, "loss": 1.7666, "step": 482440 }, { "epoch": 41.6231884057971, "grad_norm": 0.34135791659355164, "learning_rate": 0.001, "loss": 1.7767, "step": 482496 }, { "epoch": 41.628019323671495, "grad_norm": 0.29753515124320984, "learning_rate": 0.001, "loss": 1.7613, "step": 482552 }, { "epoch": 41.632850241545896, "grad_norm": 0.3050452768802643, "learning_rate": 0.001, "loss": 1.7724, "step": 482608 }, { "epoch": 41.63768115942029, "grad_norm": 0.2986689507961273, "learning_rate": 0.001, "loss": 1.7736, "step": 482664 }, { "epoch": 41.64251207729468, "grad_norm": 0.46290600299835205, "learning_rate": 0.001, "loss": 1.7748, "step": 482720 }, { "epoch": 41.64734299516908, "grad_norm": 4.14735221862793, "learning_rate": 0.001, "loss": 1.7805, "step": 482776 }, { "epoch": 41.65217391304348, "grad_norm": 0.5980857014656067, "learning_rate": 0.001, "loss": 1.7823, "step": 482832 }, { "epoch": 41.65700483091788, "grad_norm": 0.36213555932044983, "learning_rate": 0.001, "loss": 1.7801, "step": 482888 }, { "epoch": 41.66183574879227, "grad_norm": 0.524092435836792, "learning_rate": 0.001, "loss": 1.776, "step": 482944 }, { "epoch": 41.666666666666664, "grad_norm": 0.2648862898349762, "learning_rate": 0.001, "loss": 1.7864, "step": 483000 }, { "epoch": 41.671497584541065, "grad_norm": 0.717807412147522, "learning_rate": 0.001, "loss": 1.7882, "step": 483056 }, { "epoch": 41.67632850241546, "grad_norm": 1.0099356174468994, "learning_rate": 0.001, "loss": 1.7928, "step": 483112 }, { "epoch": 41.68115942028985, "grad_norm": 0.3962900638580322, "learning_rate": 0.001, "loss": 1.7741, "step": 483168 }, { "epoch": 41.68599033816425, "grad_norm": 1.5139120817184448, "learning_rate": 0.001, "loss": 1.7667, "step": 483224 }, { "epoch": 41.690821256038646, "grad_norm": 0.4782089293003082, "learning_rate": 0.001, "loss": 1.7736, "step": 483280 }, { "epoch": 41.69565217391305, "grad_norm": 1.228147268295288, "learning_rate": 0.001, "loss": 1.7762, "step": 483336 }, { "epoch": 41.70048309178744, "grad_norm": 1.27705717086792, "learning_rate": 0.001, "loss": 1.7924, "step": 483392 }, { "epoch": 41.70531400966183, "grad_norm": 1.0360182523727417, "learning_rate": 0.001, "loss": 1.7851, "step": 483448 }, { "epoch": 41.710144927536234, "grad_norm": 1.6343693733215332, "learning_rate": 0.001, "loss": 1.7678, "step": 483504 }, { "epoch": 41.71497584541063, "grad_norm": 0.5440138578414917, "learning_rate": 0.001, "loss": 1.7774, "step": 483560 }, { "epoch": 41.71980676328502, "grad_norm": 0.8304124474525452, "learning_rate": 0.001, "loss": 1.7786, "step": 483616 }, { "epoch": 41.72463768115942, "grad_norm": 0.8030955195426941, "learning_rate": 0.001, "loss": 1.7849, "step": 483672 }, { "epoch": 41.729468599033815, "grad_norm": 0.3007888197898865, "learning_rate": 0.001, "loss": 1.7787, "step": 483728 }, { "epoch": 41.734299516908216, "grad_norm": 1.1150705814361572, "learning_rate": 0.001, "loss": 1.7738, "step": 483784 }, { "epoch": 41.73913043478261, "grad_norm": 0.33095914125442505, "learning_rate": 0.001, "loss": 1.7714, "step": 483840 }, { "epoch": 41.743961352657, "grad_norm": 0.29412415623664856, "learning_rate": 0.001, "loss": 1.7632, "step": 483896 }, { "epoch": 41.7487922705314, "grad_norm": 0.38712266087532043, "learning_rate": 0.001, "loss": 1.7759, "step": 483952 }, { "epoch": 41.7536231884058, "grad_norm": 1.4370554685592651, "learning_rate": 0.001, "loss": 1.7776, "step": 484008 }, { "epoch": 41.75845410628019, "grad_norm": 3.35516619682312, "learning_rate": 0.001, "loss": 1.7943, "step": 484064 }, { "epoch": 41.76328502415459, "grad_norm": 0.8320044279098511, "learning_rate": 0.001, "loss": 1.7974, "step": 484120 }, { "epoch": 41.768115942028984, "grad_norm": 1.3436111211776733, "learning_rate": 0.001, "loss": 1.8201, "step": 484176 }, { "epoch": 41.772946859903385, "grad_norm": 2.4458961486816406, "learning_rate": 0.001, "loss": 1.8088, "step": 484232 }, { "epoch": 41.77777777777778, "grad_norm": 0.2836563289165497, "learning_rate": 0.001, "loss": 1.8052, "step": 484288 }, { "epoch": 41.78260869565217, "grad_norm": 1.6821556091308594, "learning_rate": 0.001, "loss": 1.8153, "step": 484344 }, { "epoch": 41.78743961352657, "grad_norm": 0.8647648096084595, "learning_rate": 0.001, "loss": 1.8086, "step": 484400 }, { "epoch": 41.792270531400966, "grad_norm": 0.8150994181632996, "learning_rate": 0.001, "loss": 1.8136, "step": 484456 }, { "epoch": 41.79710144927536, "grad_norm": 1.034799575805664, "learning_rate": 0.001, "loss": 1.8083, "step": 484512 }, { "epoch": 41.80193236714976, "grad_norm": 0.2924371361732483, "learning_rate": 0.001, "loss": 1.8069, "step": 484568 }, { "epoch": 41.806763285024154, "grad_norm": 8.87802505493164, "learning_rate": 0.001, "loss": 1.7988, "step": 484624 }, { "epoch": 41.81159420289855, "grad_norm": 5.548062801361084, "learning_rate": 0.001, "loss": 1.8021, "step": 484680 }, { "epoch": 41.81642512077295, "grad_norm": 0.27715036273002625, "learning_rate": 0.001, "loss": 1.7869, "step": 484736 }, { "epoch": 41.82125603864734, "grad_norm": 0.2625804841518402, "learning_rate": 0.001, "loss": 1.7848, "step": 484792 }, { "epoch": 41.82608695652174, "grad_norm": 0.3942045569419861, "learning_rate": 0.001, "loss": 1.7838, "step": 484848 }, { "epoch": 41.830917874396135, "grad_norm": 0.706469714641571, "learning_rate": 0.001, "loss": 1.7836, "step": 484904 }, { "epoch": 41.83574879227053, "grad_norm": 0.3525569438934326, "learning_rate": 0.001, "loss": 1.7905, "step": 484960 }, { "epoch": 41.84057971014493, "grad_norm": 0.344290554523468, "learning_rate": 0.001, "loss": 1.7817, "step": 485016 }, { "epoch": 41.84541062801932, "grad_norm": 2.547391891479492, "learning_rate": 0.001, "loss": 1.7799, "step": 485072 }, { "epoch": 41.85024154589372, "grad_norm": 1.127303957939148, "learning_rate": 0.001, "loss": 1.7835, "step": 485128 }, { "epoch": 41.85507246376812, "grad_norm": 1.1813757419586182, "learning_rate": 0.001, "loss": 1.7794, "step": 485184 }, { "epoch": 41.85990338164251, "grad_norm": 0.2986561954021454, "learning_rate": 0.001, "loss": 1.7872, "step": 485240 }, { "epoch": 41.86473429951691, "grad_norm": 3.1776645183563232, "learning_rate": 0.001, "loss": 1.7814, "step": 485296 }, { "epoch": 41.869565217391305, "grad_norm": 1.0821311473846436, "learning_rate": 0.001, "loss": 1.7782, "step": 485352 }, { "epoch": 41.8743961352657, "grad_norm": 0.8939442038536072, "learning_rate": 0.001, "loss": 1.7841, "step": 485408 }, { "epoch": 41.8792270531401, "grad_norm": 0.6704204678535461, "learning_rate": 0.001, "loss": 1.781, "step": 485464 }, { "epoch": 41.88405797101449, "grad_norm": 1.8376622200012207, "learning_rate": 0.001, "loss": 1.7826, "step": 485520 }, { "epoch": 41.888888888888886, "grad_norm": 0.2999434769153595, "learning_rate": 0.001, "loss": 1.781, "step": 485576 }, { "epoch": 41.893719806763286, "grad_norm": 0.7336422801017761, "learning_rate": 0.001, "loss": 1.7849, "step": 485632 }, { "epoch": 41.89855072463768, "grad_norm": 0.5957835912704468, "learning_rate": 0.001, "loss": 1.7821, "step": 485688 }, { "epoch": 41.90338164251208, "grad_norm": 0.28564947843551636, "learning_rate": 0.001, "loss": 1.7803, "step": 485744 }, { "epoch": 41.908212560386474, "grad_norm": 0.3469614088535309, "learning_rate": 0.001, "loss": 1.7822, "step": 485800 }, { "epoch": 41.91304347826087, "grad_norm": 1.036623239517212, "learning_rate": 0.001, "loss": 1.7729, "step": 485856 }, { "epoch": 41.91787439613527, "grad_norm": 0.45569103956222534, "learning_rate": 0.001, "loss": 1.7845, "step": 485912 }, { "epoch": 41.92270531400966, "grad_norm": 0.7068533897399902, "learning_rate": 0.001, "loss": 1.7887, "step": 485968 }, { "epoch": 41.927536231884055, "grad_norm": 0.471012145280838, "learning_rate": 0.001, "loss": 1.7943, "step": 486024 }, { "epoch": 41.932367149758456, "grad_norm": 1.8304589986801147, "learning_rate": 0.001, "loss": 1.793, "step": 486080 }, { "epoch": 41.93719806763285, "grad_norm": 0.32733026146888733, "learning_rate": 0.001, "loss": 1.7913, "step": 486136 }, { "epoch": 41.94202898550725, "grad_norm": 1.2495442628860474, "learning_rate": 0.001, "loss": 1.7862, "step": 486192 }, { "epoch": 41.94685990338164, "grad_norm": 0.5472974181175232, "learning_rate": 0.001, "loss": 1.791, "step": 486248 }, { "epoch": 41.95169082125604, "grad_norm": 1.3185720443725586, "learning_rate": 0.001, "loss": 1.7924, "step": 486304 }, { "epoch": 41.95652173913044, "grad_norm": 0.37852004170417786, "learning_rate": 0.001, "loss": 1.7843, "step": 486360 }, { "epoch": 41.96135265700483, "grad_norm": 0.3675987720489502, "learning_rate": 0.001, "loss": 1.7808, "step": 486416 }, { "epoch": 41.966183574879224, "grad_norm": 0.2760379910469055, "learning_rate": 0.001, "loss": 1.7836, "step": 486472 }, { "epoch": 41.971014492753625, "grad_norm": 0.29291966557502747, "learning_rate": 0.001, "loss": 1.7819, "step": 486528 }, { "epoch": 41.97584541062802, "grad_norm": 1.435795783996582, "learning_rate": 0.001, "loss": 1.7791, "step": 486584 }, { "epoch": 41.98067632850242, "grad_norm": 0.44536134600639343, "learning_rate": 0.001, "loss": 1.7803, "step": 486640 }, { "epoch": 41.98550724637681, "grad_norm": 0.5928128957748413, "learning_rate": 0.001, "loss": 1.784, "step": 486696 }, { "epoch": 41.990338164251206, "grad_norm": 0.3290199935436249, "learning_rate": 0.001, "loss": 1.7817, "step": 486752 }, { "epoch": 41.99516908212561, "grad_norm": 0.47797903418540955, "learning_rate": 0.001, "loss": 1.7828, "step": 486808 }, { "epoch": 42.0, "grad_norm": 0.3019459843635559, "learning_rate": 0.001, "loss": 1.7869, "step": 486864 }, { "epoch": 42.00483091787439, "grad_norm": 0.3805495798587799, "learning_rate": 0.001, "loss": 1.7583, "step": 486920 }, { "epoch": 42.009661835748794, "grad_norm": 0.5608112215995789, "learning_rate": 0.001, "loss": 1.7433, "step": 486976 }, { "epoch": 42.01449275362319, "grad_norm": 1.8001693487167358, "learning_rate": 0.001, "loss": 1.7575, "step": 487032 }, { "epoch": 42.01932367149758, "grad_norm": 4.909172534942627, "learning_rate": 0.001, "loss": 1.7349, "step": 487088 }, { "epoch": 42.02415458937198, "grad_norm": 0.26809054613113403, "learning_rate": 0.001, "loss": 1.7376, "step": 487144 }, { "epoch": 42.028985507246375, "grad_norm": 0.36053523421287537, "learning_rate": 0.001, "loss": 1.7501, "step": 487200 }, { "epoch": 42.033816425120776, "grad_norm": 4.105959415435791, "learning_rate": 0.001, "loss": 1.745, "step": 487256 }, { "epoch": 42.03864734299517, "grad_norm": 27.10481834411621, "learning_rate": 0.001, "loss": 1.7479, "step": 487312 }, { "epoch": 42.04347826086956, "grad_norm": 0.4710698127746582, "learning_rate": 0.001, "loss": 1.7438, "step": 487368 }, { "epoch": 42.04830917874396, "grad_norm": 0.29662564396858215, "learning_rate": 0.001, "loss": 1.7414, "step": 487424 }, { "epoch": 42.05314009661836, "grad_norm": 0.3034851849079132, "learning_rate": 0.001, "loss": 1.7484, "step": 487480 }, { "epoch": 42.05797101449275, "grad_norm": 0.3025754392147064, "learning_rate": 0.001, "loss": 1.7507, "step": 487536 }, { "epoch": 42.06280193236715, "grad_norm": 6.905580520629883, "learning_rate": 0.001, "loss": 1.7558, "step": 487592 }, { "epoch": 42.067632850241544, "grad_norm": 0.5275615453720093, "learning_rate": 0.001, "loss": 1.7583, "step": 487648 }, { "epoch": 42.072463768115945, "grad_norm": 0.8237336277961731, "learning_rate": 0.001, "loss": 1.7517, "step": 487704 }, { "epoch": 42.07729468599034, "grad_norm": 1.5896657705307007, "learning_rate": 0.001, "loss": 1.7542, "step": 487760 }, { "epoch": 42.08212560386473, "grad_norm": 2.3079230785369873, "learning_rate": 0.001, "loss": 1.7578, "step": 487816 }, { "epoch": 42.08695652173913, "grad_norm": 1.3778282403945923, "learning_rate": 0.001, "loss": 1.7526, "step": 487872 }, { "epoch": 42.091787439613526, "grad_norm": 1.1482212543487549, "learning_rate": 0.001, "loss": 1.7634, "step": 487928 }, { "epoch": 42.09661835748792, "grad_norm": 0.3569440245628357, "learning_rate": 0.001, "loss": 1.7663, "step": 487984 }, { "epoch": 42.10144927536232, "grad_norm": 1.2983609437942505, "learning_rate": 0.001, "loss": 1.7648, "step": 488040 }, { "epoch": 42.106280193236714, "grad_norm": 0.6139097213745117, "learning_rate": 0.001, "loss": 1.7713, "step": 488096 }, { "epoch": 42.111111111111114, "grad_norm": 0.6897456645965576, "learning_rate": 0.001, "loss": 1.7656, "step": 488152 }, { "epoch": 42.11594202898551, "grad_norm": 2.6329097747802734, "learning_rate": 0.001, "loss": 1.7585, "step": 488208 }, { "epoch": 42.1207729468599, "grad_norm": 0.42164111137390137, "learning_rate": 0.001, "loss": 1.76, "step": 488264 }, { "epoch": 42.1256038647343, "grad_norm": 0.35669389367103577, "learning_rate": 0.001, "loss": 1.7652, "step": 488320 }, { "epoch": 42.130434782608695, "grad_norm": 2.0179178714752197, "learning_rate": 0.001, "loss": 1.7666, "step": 488376 }, { "epoch": 42.13526570048309, "grad_norm": 0.3252449333667755, "learning_rate": 0.001, "loss": 1.7585, "step": 488432 }, { "epoch": 42.14009661835749, "grad_norm": 1.5790807008743286, "learning_rate": 0.001, "loss": 1.7561, "step": 488488 }, { "epoch": 42.14492753623188, "grad_norm": 0.7390744686126709, "learning_rate": 0.001, "loss": 1.7521, "step": 488544 }, { "epoch": 42.14975845410628, "grad_norm": 0.3487186133861542, "learning_rate": 0.001, "loss": 1.7467, "step": 488600 }, { "epoch": 42.15458937198068, "grad_norm": 1.0141098499298096, "learning_rate": 0.001, "loss": 1.75, "step": 488656 }, { "epoch": 42.15942028985507, "grad_norm": 0.3917505741119385, "learning_rate": 0.001, "loss": 1.7494, "step": 488712 }, { "epoch": 42.16425120772947, "grad_norm": 0.2992800772190094, "learning_rate": 0.001, "loss": 1.7559, "step": 488768 }, { "epoch": 42.169082125603865, "grad_norm": 1.0765210390090942, "learning_rate": 0.001, "loss": 1.7508, "step": 488824 }, { "epoch": 42.17391304347826, "grad_norm": 0.5060981512069702, "learning_rate": 0.001, "loss": 1.7489, "step": 488880 }, { "epoch": 42.17874396135266, "grad_norm": 0.631303071975708, "learning_rate": 0.001, "loss": 1.7516, "step": 488936 }, { "epoch": 42.18357487922705, "grad_norm": 0.3968592882156372, "learning_rate": 0.001, "loss": 1.7586, "step": 488992 }, { "epoch": 42.18840579710145, "grad_norm": 0.33593663573265076, "learning_rate": 0.001, "loss": 1.7619, "step": 489048 }, { "epoch": 42.193236714975846, "grad_norm": 0.5084505081176758, "learning_rate": 0.001, "loss": 1.7506, "step": 489104 }, { "epoch": 42.19806763285024, "grad_norm": 0.45566776394844055, "learning_rate": 0.001, "loss": 1.7545, "step": 489160 }, { "epoch": 42.20289855072464, "grad_norm": 2.111151695251465, "learning_rate": 0.001, "loss": 1.7601, "step": 489216 }, { "epoch": 42.207729468599034, "grad_norm": 0.41191428899765015, "learning_rate": 0.001, "loss": 1.7607, "step": 489272 }, { "epoch": 42.21256038647343, "grad_norm": 0.3674837648868561, "learning_rate": 0.001, "loss": 1.7754, "step": 489328 }, { "epoch": 42.21739130434783, "grad_norm": 0.3679570257663727, "learning_rate": 0.001, "loss": 1.7612, "step": 489384 }, { "epoch": 42.22222222222222, "grad_norm": 0.6189741492271423, "learning_rate": 0.001, "loss": 1.7579, "step": 489440 }, { "epoch": 42.227053140096615, "grad_norm": 0.27789726853370667, "learning_rate": 0.001, "loss": 1.7549, "step": 489496 }, { "epoch": 42.231884057971016, "grad_norm": 0.3272222876548767, "learning_rate": 0.001, "loss": 1.751, "step": 489552 }, { "epoch": 42.23671497584541, "grad_norm": 0.35177668929100037, "learning_rate": 0.001, "loss": 1.7518, "step": 489608 }, { "epoch": 42.24154589371981, "grad_norm": 2.27254056930542, "learning_rate": 0.001, "loss": 1.7517, "step": 489664 }, { "epoch": 42.2463768115942, "grad_norm": 0.6424328684806824, "learning_rate": 0.001, "loss": 1.7583, "step": 489720 }, { "epoch": 42.2512077294686, "grad_norm": 1.5050510168075562, "learning_rate": 0.001, "loss": 1.7458, "step": 489776 }, { "epoch": 42.256038647343, "grad_norm": 0.36695384979248047, "learning_rate": 0.001, "loss": 1.7551, "step": 489832 }, { "epoch": 42.26086956521739, "grad_norm": 1.2543820142745972, "learning_rate": 0.001, "loss": 1.7681, "step": 489888 }, { "epoch": 42.265700483091784, "grad_norm": 0.2839926481246948, "learning_rate": 0.001, "loss": 1.7607, "step": 489944 }, { "epoch": 42.270531400966185, "grad_norm": 0.4483574628829956, "learning_rate": 0.001, "loss": 1.7514, "step": 490000 }, { "epoch": 42.27536231884058, "grad_norm": 0.5835283398628235, "learning_rate": 0.001, "loss": 1.7521, "step": 490056 }, { "epoch": 42.28019323671498, "grad_norm": 0.6197874546051025, "learning_rate": 0.001, "loss": 1.7656, "step": 490112 }, { "epoch": 42.28502415458937, "grad_norm": 0.8116411566734314, "learning_rate": 0.001, "loss": 1.7603, "step": 490168 }, { "epoch": 42.289855072463766, "grad_norm": 0.3220919966697693, "learning_rate": 0.001, "loss": 1.7544, "step": 490224 }, { "epoch": 42.29468599033817, "grad_norm": 0.5693132877349854, "learning_rate": 0.001, "loss": 1.7622, "step": 490280 }, { "epoch": 42.29951690821256, "grad_norm": 0.4283170998096466, "learning_rate": 0.001, "loss": 1.7628, "step": 490336 }, { "epoch": 42.30434782608695, "grad_norm": 0.6508527398109436, "learning_rate": 0.001, "loss": 1.7551, "step": 490392 }, { "epoch": 42.309178743961354, "grad_norm": 0.32717782258987427, "learning_rate": 0.001, "loss": 1.7518, "step": 490448 }, { "epoch": 42.31400966183575, "grad_norm": 0.3732554614543915, "learning_rate": 0.001, "loss": 1.7517, "step": 490504 }, { "epoch": 42.31884057971015, "grad_norm": 0.7454554438591003, "learning_rate": 0.001, "loss": 1.7557, "step": 490560 }, { "epoch": 42.32367149758454, "grad_norm": 0.7526639103889465, "learning_rate": 0.001, "loss": 1.7503, "step": 490616 }, { "epoch": 42.328502415458935, "grad_norm": 0.4923805296421051, "learning_rate": 0.001, "loss": 1.7578, "step": 490672 }, { "epoch": 42.333333333333336, "grad_norm": 1.1724202632904053, "learning_rate": 0.001, "loss": 1.76, "step": 490728 }, { "epoch": 42.33816425120773, "grad_norm": 0.30553963780403137, "learning_rate": 0.001, "loss": 1.7613, "step": 490784 }, { "epoch": 42.34299516908212, "grad_norm": 0.31243520975112915, "learning_rate": 0.001, "loss": 1.763, "step": 490840 }, { "epoch": 42.34782608695652, "grad_norm": 2.3004539012908936, "learning_rate": 0.001, "loss": 1.7595, "step": 490896 }, { "epoch": 42.35265700483092, "grad_norm": 0.4995482563972473, "learning_rate": 0.001, "loss": 1.8014, "step": 490952 }, { "epoch": 42.35748792270532, "grad_norm": 0.7967793941497803, "learning_rate": 0.001, "loss": 1.8269, "step": 491008 }, { "epoch": 42.36231884057971, "grad_norm": 0.43179959058761597, "learning_rate": 0.001, "loss": 1.7878, "step": 491064 }, { "epoch": 42.367149758454104, "grad_norm": 1.0754050016403198, "learning_rate": 0.001, "loss": 1.7808, "step": 491120 }, { "epoch": 42.371980676328505, "grad_norm": 1.0952892303466797, "learning_rate": 0.001, "loss": 1.7675, "step": 491176 }, { "epoch": 42.3768115942029, "grad_norm": 0.24367645382881165, "learning_rate": 0.001, "loss": 1.7631, "step": 491232 }, { "epoch": 42.38164251207729, "grad_norm": 0.30544641613960266, "learning_rate": 0.001, "loss": 1.7656, "step": 491288 }, { "epoch": 42.38647342995169, "grad_norm": 0.25850972533226013, "learning_rate": 0.001, "loss": 1.7689, "step": 491344 }, { "epoch": 42.391304347826086, "grad_norm": 0.2946685254573822, "learning_rate": 0.001, "loss": 1.7651, "step": 491400 }, { "epoch": 42.39613526570048, "grad_norm": 0.3624315857887268, "learning_rate": 0.001, "loss": 1.7551, "step": 491456 }, { "epoch": 42.40096618357488, "grad_norm": 1.2270286083221436, "learning_rate": 0.001, "loss": 1.7669, "step": 491512 }, { "epoch": 42.405797101449274, "grad_norm": 0.24939844012260437, "learning_rate": 0.001, "loss": 1.7647, "step": 491568 }, { "epoch": 42.410628019323674, "grad_norm": 0.5110289454460144, "learning_rate": 0.001, "loss": 1.7625, "step": 491624 }, { "epoch": 42.41545893719807, "grad_norm": 0.42668417096138, "learning_rate": 0.001, "loss": 1.7664, "step": 491680 }, { "epoch": 42.42028985507246, "grad_norm": 0.6088284254074097, "learning_rate": 0.001, "loss": 1.7644, "step": 491736 }, { "epoch": 42.42512077294686, "grad_norm": 3.4654033184051514, "learning_rate": 0.001, "loss": 1.77, "step": 491792 }, { "epoch": 42.429951690821255, "grad_norm": 0.4033966660499573, "learning_rate": 0.001, "loss": 1.7713, "step": 491848 }, { "epoch": 42.43478260869565, "grad_norm": 0.4056691825389862, "learning_rate": 0.001, "loss": 1.7682, "step": 491904 }, { "epoch": 42.43961352657005, "grad_norm": 0.27506542205810547, "learning_rate": 0.001, "loss": 1.7694, "step": 491960 }, { "epoch": 42.44444444444444, "grad_norm": 0.5485376119613647, "learning_rate": 0.001, "loss": 1.7635, "step": 492016 }, { "epoch": 42.44927536231884, "grad_norm": 0.5162979364395142, "learning_rate": 0.001, "loss": 1.7687, "step": 492072 }, { "epoch": 42.45410628019324, "grad_norm": 0.31549012660980225, "learning_rate": 0.001, "loss": 1.763, "step": 492128 }, { "epoch": 42.45893719806763, "grad_norm": 0.3403273820877075, "learning_rate": 0.001, "loss": 1.7555, "step": 492184 }, { "epoch": 42.46376811594203, "grad_norm": 0.3960123360157013, "learning_rate": 0.001, "loss": 1.7598, "step": 492240 }, { "epoch": 42.468599033816425, "grad_norm": 0.2913028299808502, "learning_rate": 0.001, "loss": 1.7652, "step": 492296 }, { "epoch": 42.47342995169082, "grad_norm": 0.3264979124069214, "learning_rate": 0.001, "loss": 1.7653, "step": 492352 }, { "epoch": 42.47826086956522, "grad_norm": 0.35475075244903564, "learning_rate": 0.001, "loss": 1.7913, "step": 492408 }, { "epoch": 42.48309178743961, "grad_norm": 0.46228447556495667, "learning_rate": 0.001, "loss": 1.7766, "step": 492464 }, { "epoch": 42.48792270531401, "grad_norm": 0.35546115040779114, "learning_rate": 0.001, "loss": 1.7799, "step": 492520 }, { "epoch": 42.492753623188406, "grad_norm": 0.33248370885849, "learning_rate": 0.001, "loss": 1.7857, "step": 492576 }, { "epoch": 42.4975845410628, "grad_norm": 0.29436033964157104, "learning_rate": 0.001, "loss": 1.7819, "step": 492632 }, { "epoch": 42.5024154589372, "grad_norm": 0.39577922224998474, "learning_rate": 0.001, "loss": 1.7682, "step": 492688 }, { "epoch": 42.507246376811594, "grad_norm": 0.3741127550601959, "learning_rate": 0.001, "loss": 1.7617, "step": 492744 }, { "epoch": 42.51207729468599, "grad_norm": 0.2631893754005432, "learning_rate": 0.001, "loss": 1.7746, "step": 492800 }, { "epoch": 42.51690821256039, "grad_norm": 0.7409164905548096, "learning_rate": 0.001, "loss": 1.775, "step": 492856 }, { "epoch": 42.52173913043478, "grad_norm": 1.6313046216964722, "learning_rate": 0.001, "loss": 1.771, "step": 492912 }, { "epoch": 42.52657004830918, "grad_norm": 0.29509875178337097, "learning_rate": 0.001, "loss": 1.7615, "step": 492968 }, { "epoch": 42.531400966183575, "grad_norm": 6.29130744934082, "learning_rate": 0.001, "loss": 1.7597, "step": 493024 }, { "epoch": 42.53623188405797, "grad_norm": 0.30361488461494446, "learning_rate": 0.001, "loss": 1.7788, "step": 493080 }, { "epoch": 42.54106280193237, "grad_norm": 0.31971079111099243, "learning_rate": 0.001, "loss": 1.7663, "step": 493136 }, { "epoch": 42.54589371980676, "grad_norm": 0.3189185857772827, "learning_rate": 0.001, "loss": 1.7554, "step": 493192 }, { "epoch": 42.55072463768116, "grad_norm": 0.30841463804244995, "learning_rate": 0.001, "loss": 1.7618, "step": 493248 }, { "epoch": 42.55555555555556, "grad_norm": 0.5911606550216675, "learning_rate": 0.001, "loss": 1.7607, "step": 493304 }, { "epoch": 42.56038647342995, "grad_norm": 0.3256855309009552, "learning_rate": 0.001, "loss": 1.7506, "step": 493360 }, { "epoch": 42.56521739130435, "grad_norm": 0.2937866151332855, "learning_rate": 0.001, "loss": 1.7543, "step": 493416 }, { "epoch": 42.570048309178745, "grad_norm": 0.3603109121322632, "learning_rate": 0.001, "loss": 1.7635, "step": 493472 }, { "epoch": 42.57487922705314, "grad_norm": 1.8178479671478271, "learning_rate": 0.001, "loss": 1.7596, "step": 493528 }, { "epoch": 42.57971014492754, "grad_norm": 0.5783098340034485, "learning_rate": 0.001, "loss": 1.7738, "step": 493584 }, { "epoch": 42.58454106280193, "grad_norm": 0.681128740310669, "learning_rate": 0.001, "loss": 1.7759, "step": 493640 }, { "epoch": 42.589371980676326, "grad_norm": 0.5738411545753479, "learning_rate": 0.001, "loss": 1.7688, "step": 493696 }, { "epoch": 42.594202898550726, "grad_norm": 0.37642380595207214, "learning_rate": 0.001, "loss": 1.7704, "step": 493752 }, { "epoch": 42.59903381642512, "grad_norm": 2.3003830909729004, "learning_rate": 0.001, "loss": 1.7691, "step": 493808 }, { "epoch": 42.60386473429952, "grad_norm": 37.6695556640625, "learning_rate": 0.001, "loss": 1.76, "step": 493864 }, { "epoch": 42.608695652173914, "grad_norm": 0.34026166796684265, "learning_rate": 0.001, "loss": 1.7575, "step": 493920 }, { "epoch": 42.61352657004831, "grad_norm": 0.6241693496704102, "learning_rate": 0.001, "loss": 1.7769, "step": 493976 }, { "epoch": 42.61835748792271, "grad_norm": 0.4699699282646179, "learning_rate": 0.001, "loss": 1.7838, "step": 494032 }, { "epoch": 42.6231884057971, "grad_norm": 0.31558921933174133, "learning_rate": 0.001, "loss": 1.773, "step": 494088 }, { "epoch": 42.628019323671495, "grad_norm": 0.48866599798202515, "learning_rate": 0.001, "loss": 1.7829, "step": 494144 }, { "epoch": 42.632850241545896, "grad_norm": 0.9284820556640625, "learning_rate": 0.001, "loss": 1.7708, "step": 494200 }, { "epoch": 42.63768115942029, "grad_norm": 0.5138643383979797, "learning_rate": 0.001, "loss": 1.7748, "step": 494256 }, { "epoch": 42.64251207729468, "grad_norm": 0.4089737832546234, "learning_rate": 0.001, "loss": 1.7702, "step": 494312 }, { "epoch": 42.64734299516908, "grad_norm": 0.42057591676712036, "learning_rate": 0.001, "loss": 1.761, "step": 494368 }, { "epoch": 42.65217391304348, "grad_norm": 0.30342257022857666, "learning_rate": 0.001, "loss": 1.7619, "step": 494424 }, { "epoch": 42.65700483091788, "grad_norm": 1.0620832443237305, "learning_rate": 0.001, "loss": 1.7678, "step": 494480 }, { "epoch": 42.66183574879227, "grad_norm": 1.1656184196472168, "learning_rate": 0.001, "loss": 1.7744, "step": 494536 }, { "epoch": 42.666666666666664, "grad_norm": 0.3710213899612427, "learning_rate": 0.001, "loss": 1.7694, "step": 494592 }, { "epoch": 42.671497584541065, "grad_norm": 0.39259687066078186, "learning_rate": 0.001, "loss": 1.7762, "step": 494648 }, { "epoch": 42.67632850241546, "grad_norm": 0.3502708375453949, "learning_rate": 0.001, "loss": 1.771, "step": 494704 }, { "epoch": 42.68115942028985, "grad_norm": 0.31302696466445923, "learning_rate": 0.001, "loss": 1.7718, "step": 494760 }, { "epoch": 42.68599033816425, "grad_norm": 0.41087082028388977, "learning_rate": 0.001, "loss": 1.7651, "step": 494816 }, { "epoch": 42.690821256038646, "grad_norm": 3.3076224327087402, "learning_rate": 0.001, "loss": 1.7522, "step": 494872 }, { "epoch": 42.69565217391305, "grad_norm": 0.2786913812160492, "learning_rate": 0.001, "loss": 1.7608, "step": 494928 }, { "epoch": 42.70048309178744, "grad_norm": 0.44205424189567566, "learning_rate": 0.001, "loss": 1.7609, "step": 494984 }, { "epoch": 42.70531400966183, "grad_norm": 0.3690367639064789, "learning_rate": 0.001, "loss": 1.7572, "step": 495040 }, { "epoch": 42.710144927536234, "grad_norm": 0.3526320457458496, "learning_rate": 0.001, "loss": 1.7631, "step": 495096 }, { "epoch": 42.71497584541063, "grad_norm": 0.4674495756626129, "learning_rate": 0.001, "loss": 1.7639, "step": 495152 }, { "epoch": 42.71980676328502, "grad_norm": 0.2787375748157501, "learning_rate": 0.001, "loss": 1.7717, "step": 495208 }, { "epoch": 42.72463768115942, "grad_norm": 0.25620928406715393, "learning_rate": 0.001, "loss": 1.7757, "step": 495264 }, { "epoch": 42.729468599033815, "grad_norm": 0.5904477834701538, "learning_rate": 0.001, "loss": 1.7667, "step": 495320 }, { "epoch": 42.734299516908216, "grad_norm": 1.9867202043533325, "learning_rate": 0.001, "loss": 1.767, "step": 495376 }, { "epoch": 42.73913043478261, "grad_norm": 0.4272610545158386, "learning_rate": 0.001, "loss": 1.7527, "step": 495432 }, { "epoch": 42.743961352657, "grad_norm": 0.7886016368865967, "learning_rate": 0.001, "loss": 1.7645, "step": 495488 }, { "epoch": 42.7487922705314, "grad_norm": 0.46949565410614014, "learning_rate": 0.001, "loss": 1.7592, "step": 495544 }, { "epoch": 42.7536231884058, "grad_norm": 1.0974304676055908, "learning_rate": 0.001, "loss": 1.7581, "step": 495600 }, { "epoch": 42.75845410628019, "grad_norm": 0.5070813298225403, "learning_rate": 0.001, "loss": 1.7595, "step": 495656 }, { "epoch": 42.76328502415459, "grad_norm": 0.34071779251098633, "learning_rate": 0.001, "loss": 1.7563, "step": 495712 }, { "epoch": 42.768115942028984, "grad_norm": 1.6125874519348145, "learning_rate": 0.001, "loss": 1.7635, "step": 495768 }, { "epoch": 42.772946859903385, "grad_norm": 1.3556158542633057, "learning_rate": 0.001, "loss": 1.7604, "step": 495824 }, { "epoch": 42.77777777777778, "grad_norm": 0.773227870464325, "learning_rate": 0.001, "loss": 1.757, "step": 495880 }, { "epoch": 42.78260869565217, "grad_norm": 0.42122316360473633, "learning_rate": 0.001, "loss": 1.7469, "step": 495936 }, { "epoch": 42.78743961352657, "grad_norm": 0.28449690341949463, "learning_rate": 0.001, "loss": 1.7535, "step": 495992 }, { "epoch": 42.792270531400966, "grad_norm": 0.4180215001106262, "learning_rate": 0.001, "loss": 1.7568, "step": 496048 }, { "epoch": 42.79710144927536, "grad_norm": 0.22506479918956757, "learning_rate": 0.001, "loss": 1.7621, "step": 496104 }, { "epoch": 42.80193236714976, "grad_norm": 0.3406946063041687, "learning_rate": 0.001, "loss": 1.7561, "step": 496160 }, { "epoch": 42.806763285024154, "grad_norm": 0.3163577616214752, "learning_rate": 0.001, "loss": 1.7526, "step": 496216 }, { "epoch": 42.81159420289855, "grad_norm": 0.24708381295204163, "learning_rate": 0.001, "loss": 1.7611, "step": 496272 }, { "epoch": 42.81642512077295, "grad_norm": 0.3926644027233124, "learning_rate": 0.001, "loss": 1.7748, "step": 496328 }, { "epoch": 42.82125603864734, "grad_norm": 0.2978352904319763, "learning_rate": 0.001, "loss": 1.7778, "step": 496384 }, { "epoch": 42.82608695652174, "grad_norm": 0.5509081482887268, "learning_rate": 0.001, "loss": 1.7854, "step": 496440 }, { "epoch": 42.830917874396135, "grad_norm": 0.32073792815208435, "learning_rate": 0.001, "loss": 1.7783, "step": 496496 }, { "epoch": 42.83574879227053, "grad_norm": 0.6077530980110168, "learning_rate": 0.001, "loss": 1.7698, "step": 496552 }, { "epoch": 42.84057971014493, "grad_norm": 0.3482252359390259, "learning_rate": 0.001, "loss": 1.7672, "step": 496608 }, { "epoch": 42.84541062801932, "grad_norm": 0.3995283246040344, "learning_rate": 0.001, "loss": 1.7694, "step": 496664 }, { "epoch": 42.85024154589372, "grad_norm": 0.30362609028816223, "learning_rate": 0.001, "loss": 1.7697, "step": 496720 }, { "epoch": 42.85507246376812, "grad_norm": 0.3405693769454956, "learning_rate": 0.001, "loss": 1.7648, "step": 496776 }, { "epoch": 42.85990338164251, "grad_norm": 1.4146783351898193, "learning_rate": 0.001, "loss": 1.758, "step": 496832 }, { "epoch": 42.86473429951691, "grad_norm": 0.3263242840766907, "learning_rate": 0.001, "loss": 1.7615, "step": 496888 }, { "epoch": 42.869565217391305, "grad_norm": 0.47114306688308716, "learning_rate": 0.001, "loss": 1.7605, "step": 496944 }, { "epoch": 42.8743961352657, "grad_norm": 0.2934527099132538, "learning_rate": 0.001, "loss": 1.7614, "step": 497000 }, { "epoch": 42.8792270531401, "grad_norm": 0.3143116533756256, "learning_rate": 0.001, "loss": 1.761, "step": 497056 }, { "epoch": 42.88405797101449, "grad_norm": 0.38275882601737976, "learning_rate": 0.001, "loss": 1.762, "step": 497112 }, { "epoch": 42.888888888888886, "grad_norm": 0.3109045624732971, "learning_rate": 0.001, "loss": 1.7622, "step": 497168 }, { "epoch": 42.893719806763286, "grad_norm": 0.31653881072998047, "learning_rate": 0.001, "loss": 1.7707, "step": 497224 }, { "epoch": 42.89855072463768, "grad_norm": 0.3431593179702759, "learning_rate": 0.001, "loss": 1.7721, "step": 497280 }, { "epoch": 42.90338164251208, "grad_norm": 0.2628556787967682, "learning_rate": 0.001, "loss": 1.7801, "step": 497336 }, { "epoch": 42.908212560386474, "grad_norm": 0.3346251845359802, "learning_rate": 0.001, "loss": 1.7737, "step": 497392 }, { "epoch": 42.91304347826087, "grad_norm": 0.5012972950935364, "learning_rate": 0.001, "loss": 1.7738, "step": 497448 }, { "epoch": 42.91787439613527, "grad_norm": 0.4818948805332184, "learning_rate": 0.001, "loss": 1.7789, "step": 497504 }, { "epoch": 42.92270531400966, "grad_norm": 0.3438078761100769, "learning_rate": 0.001, "loss": 1.7626, "step": 497560 }, { "epoch": 42.927536231884055, "grad_norm": 1.327869176864624, "learning_rate": 0.001, "loss": 1.7551, "step": 497616 }, { "epoch": 42.932367149758456, "grad_norm": 0.584452748298645, "learning_rate": 0.001, "loss": 1.7626, "step": 497672 }, { "epoch": 42.93719806763285, "grad_norm": 0.2980891764163971, "learning_rate": 0.001, "loss": 1.7588, "step": 497728 }, { "epoch": 42.94202898550725, "grad_norm": 0.4084571897983551, "learning_rate": 0.001, "loss": 1.755, "step": 497784 }, { "epoch": 42.94685990338164, "grad_norm": 0.293361097574234, "learning_rate": 0.001, "loss": 1.755, "step": 497840 }, { "epoch": 42.95169082125604, "grad_norm": 0.3471390902996063, "learning_rate": 0.001, "loss": 1.7522, "step": 497896 }, { "epoch": 42.95652173913044, "grad_norm": 0.35901501774787903, "learning_rate": 0.001, "loss": 1.7536, "step": 497952 }, { "epoch": 42.96135265700483, "grad_norm": 1.2451173067092896, "learning_rate": 0.001, "loss": 1.7534, "step": 498008 }, { "epoch": 42.966183574879224, "grad_norm": 0.2934431731700897, "learning_rate": 0.001, "loss": 1.7604, "step": 498064 }, { "epoch": 42.971014492753625, "grad_norm": 0.23306246101856232, "learning_rate": 0.001, "loss": 1.7505, "step": 498120 }, { "epoch": 42.97584541062802, "grad_norm": 1.0261784791946411, "learning_rate": 0.001, "loss": 1.754, "step": 498176 }, { "epoch": 42.98067632850242, "grad_norm": 0.3823038339614868, "learning_rate": 0.001, "loss": 1.7532, "step": 498232 }, { "epoch": 42.98550724637681, "grad_norm": 0.962932288646698, "learning_rate": 0.001, "loss": 1.7563, "step": 498288 }, { "epoch": 42.990338164251206, "grad_norm": 0.35090169310569763, "learning_rate": 0.001, "loss": 1.7565, "step": 498344 }, { "epoch": 42.99516908212561, "grad_norm": 3.1163649559020996, "learning_rate": 0.001, "loss": 1.7479, "step": 498400 }, { "epoch": 43.0, "grad_norm": 0.31754398345947266, "learning_rate": 0.001, "loss": 1.752, "step": 498456 }, { "epoch": 43.00483091787439, "grad_norm": 0.2880837917327881, "learning_rate": 0.001, "loss": 1.7211, "step": 498512 }, { "epoch": 43.009661835748794, "grad_norm": 0.5898643732070923, "learning_rate": 0.001, "loss": 1.7208, "step": 498568 }, { "epoch": 43.01449275362319, "grad_norm": 0.9040492177009583, "learning_rate": 0.001, "loss": 1.7222, "step": 498624 }, { "epoch": 43.01932367149758, "grad_norm": 0.4281351864337921, "learning_rate": 0.001, "loss": 1.739, "step": 498680 }, { "epoch": 43.02415458937198, "grad_norm": 0.28973937034606934, "learning_rate": 0.001, "loss": 1.7452, "step": 498736 }, { "epoch": 43.028985507246375, "grad_norm": 0.2905622720718384, "learning_rate": 0.001, "loss": 1.7337, "step": 498792 }, { "epoch": 43.033816425120776, "grad_norm": 0.33053648471832275, "learning_rate": 0.001, "loss": 1.7274, "step": 498848 }, { "epoch": 43.03864734299517, "grad_norm": 0.2611054480075836, "learning_rate": 0.001, "loss": 1.7294, "step": 498904 }, { "epoch": 43.04347826086956, "grad_norm": 1.366268277168274, "learning_rate": 0.001, "loss": 1.7441, "step": 498960 }, { "epoch": 43.04830917874396, "grad_norm": 0.7922677993774414, "learning_rate": 0.001, "loss": 1.7388, "step": 499016 }, { "epoch": 43.05314009661836, "grad_norm": 0.3109741806983948, "learning_rate": 0.001, "loss": 1.732, "step": 499072 }, { "epoch": 43.05797101449275, "grad_norm": 0.45245328545570374, "learning_rate": 0.001, "loss": 1.7349, "step": 499128 }, { "epoch": 43.06280193236715, "grad_norm": 0.3570323884487152, "learning_rate": 0.001, "loss": 1.7293, "step": 499184 }, { "epoch": 43.067632850241544, "grad_norm": 0.2881699502468109, "learning_rate": 0.001, "loss": 1.7342, "step": 499240 }, { "epoch": 43.072463768115945, "grad_norm": 0.3332690894603729, "learning_rate": 0.001, "loss": 1.7223, "step": 499296 }, { "epoch": 43.07729468599034, "grad_norm": 0.5095784068107605, "learning_rate": 0.001, "loss": 1.728, "step": 499352 }, { "epoch": 43.08212560386473, "grad_norm": 0.6457642316818237, "learning_rate": 0.001, "loss": 1.7331, "step": 499408 }, { "epoch": 43.08695652173913, "grad_norm": 0.292636901140213, "learning_rate": 0.001, "loss": 1.7244, "step": 499464 }, { "epoch": 43.091787439613526, "grad_norm": 0.3947647213935852, "learning_rate": 0.001, "loss": 1.7334, "step": 499520 }, { "epoch": 43.09661835748792, "grad_norm": 0.29116585850715637, "learning_rate": 0.001, "loss": 1.7353, "step": 499576 }, { "epoch": 43.10144927536232, "grad_norm": 0.32003694772720337, "learning_rate": 0.001, "loss": 1.7247, "step": 499632 }, { "epoch": 43.106280193236714, "grad_norm": 0.2887139618396759, "learning_rate": 0.001, "loss": 1.7301, "step": 499688 }, { "epoch": 43.111111111111114, "grad_norm": 0.29200631380081177, "learning_rate": 0.001, "loss": 1.7285, "step": 499744 }, { "epoch": 43.11594202898551, "grad_norm": 0.26603424549102783, "learning_rate": 0.001, "loss": 1.7262, "step": 499800 }, { "epoch": 43.1207729468599, "grad_norm": 0.29278939962387085, "learning_rate": 0.001, "loss": 1.7259, "step": 499856 }, { "epoch": 43.1256038647343, "grad_norm": 0.41091659665107727, "learning_rate": 0.001, "loss": 1.7271, "step": 499912 }, { "epoch": 43.130434782608695, "grad_norm": 0.35819709300994873, "learning_rate": 0.001, "loss": 1.7307, "step": 499968 }, { "epoch": 43.13526570048309, "grad_norm": 0.2634340226650238, "learning_rate": 0.001, "loss": 1.7294, "step": 500024 }, { "epoch": 43.14009661835749, "grad_norm": 0.5101088881492615, "learning_rate": 0.001, "loss": 1.7346, "step": 500080 }, { "epoch": 43.14492753623188, "grad_norm": 0.3782312273979187, "learning_rate": 0.001, "loss": 1.7212, "step": 500136 }, { "epoch": 43.14975845410628, "grad_norm": 0.3335227370262146, "learning_rate": 0.001, "loss": 1.7193, "step": 500192 }, { "epoch": 43.15458937198068, "grad_norm": 0.2480354607105255, "learning_rate": 0.001, "loss": 1.7236, "step": 500248 }, { "epoch": 43.15942028985507, "grad_norm": 0.24254335463047028, "learning_rate": 0.001, "loss": 1.7284, "step": 500304 }, { "epoch": 43.16425120772947, "grad_norm": 0.25509339570999146, "learning_rate": 0.001, "loss": 1.7248, "step": 500360 }, { "epoch": 43.169082125603865, "grad_norm": 0.3785673677921295, "learning_rate": 0.001, "loss": 1.7209, "step": 500416 }, { "epoch": 43.17391304347826, "grad_norm": 0.2932906150817871, "learning_rate": 0.001, "loss": 1.7252, "step": 500472 }, { "epoch": 43.17874396135266, "grad_norm": 0.268908828496933, "learning_rate": 0.001, "loss": 1.7302, "step": 500528 }, { "epoch": 43.18357487922705, "grad_norm": 0.31887146830558777, "learning_rate": 0.001, "loss": 1.7487, "step": 500584 }, { "epoch": 43.18840579710145, "grad_norm": 0.3154051899909973, "learning_rate": 0.001, "loss": 1.745, "step": 500640 }, { "epoch": 43.193236714975846, "grad_norm": 0.3698274791240692, "learning_rate": 0.001, "loss": 1.7363, "step": 500696 }, { "epoch": 43.19806763285024, "grad_norm": 0.3926672339439392, "learning_rate": 0.001, "loss": 1.7399, "step": 500752 }, { "epoch": 43.20289855072464, "grad_norm": 1.1627651453018188, "learning_rate": 0.001, "loss": 1.7273, "step": 500808 }, { "epoch": 43.207729468599034, "grad_norm": 0.5841051340103149, "learning_rate": 0.001, "loss": 1.7236, "step": 500864 }, { "epoch": 43.21256038647343, "grad_norm": 0.4725930094718933, "learning_rate": 0.001, "loss": 1.7262, "step": 500920 }, { "epoch": 43.21739130434783, "grad_norm": 0.28683337569236755, "learning_rate": 0.001, "loss": 1.7206, "step": 500976 }, { "epoch": 43.22222222222222, "grad_norm": 0.36116835474967957, "learning_rate": 0.001, "loss": 1.7251, "step": 501032 }, { "epoch": 43.227053140096615, "grad_norm": 0.5035881400108337, "learning_rate": 0.001, "loss": 1.7356, "step": 501088 }, { "epoch": 43.231884057971016, "grad_norm": 0.3092077672481537, "learning_rate": 0.001, "loss": 1.7311, "step": 501144 }, { "epoch": 43.23671497584541, "grad_norm": 0.3408401608467102, "learning_rate": 0.001, "loss": 1.7321, "step": 501200 }, { "epoch": 43.24154589371981, "grad_norm": 0.33938276767730713, "learning_rate": 0.001, "loss": 1.7333, "step": 501256 }, { "epoch": 43.2463768115942, "grad_norm": 0.3274970054626465, "learning_rate": 0.001, "loss": 1.7293, "step": 501312 }, { "epoch": 43.2512077294686, "grad_norm": 0.5984912514686584, "learning_rate": 0.001, "loss": 1.7267, "step": 501368 }, { "epoch": 43.256038647343, "grad_norm": 1.0737807750701904, "learning_rate": 0.001, "loss": 1.7401, "step": 501424 }, { "epoch": 43.26086956521739, "grad_norm": 0.310937762260437, "learning_rate": 0.001, "loss": 1.756, "step": 501480 }, { "epoch": 43.265700483091784, "grad_norm": 0.36521288752555847, "learning_rate": 0.001, "loss": 1.7321, "step": 501536 }, { "epoch": 43.270531400966185, "grad_norm": 0.383823037147522, "learning_rate": 0.001, "loss": 1.7292, "step": 501592 }, { "epoch": 43.27536231884058, "grad_norm": 0.27954620122909546, "learning_rate": 0.001, "loss": 1.7334, "step": 501648 }, { "epoch": 43.28019323671498, "grad_norm": 2.603872299194336, "learning_rate": 0.001, "loss": 1.7271, "step": 501704 }, { "epoch": 43.28502415458937, "grad_norm": 1.2268099784851074, "learning_rate": 0.001, "loss": 1.7381, "step": 501760 }, { "epoch": 43.289855072463766, "grad_norm": 0.4057111144065857, "learning_rate": 0.001, "loss": 1.734, "step": 501816 }, { "epoch": 43.29468599033817, "grad_norm": 0.49760401248931885, "learning_rate": 0.001, "loss": 1.7475, "step": 501872 }, { "epoch": 43.29951690821256, "grad_norm": 0.33154579997062683, "learning_rate": 0.001, "loss": 1.7539, "step": 501928 }, { "epoch": 43.30434782608695, "grad_norm": 0.4327848255634308, "learning_rate": 0.001, "loss": 1.7369, "step": 501984 }, { "epoch": 43.309178743961354, "grad_norm": 0.3378254175186157, "learning_rate": 0.001, "loss": 1.7337, "step": 502040 }, { "epoch": 43.31400966183575, "grad_norm": 0.529498279094696, "learning_rate": 0.001, "loss": 1.7321, "step": 502096 }, { "epoch": 43.31884057971015, "grad_norm": 0.35313135385513306, "learning_rate": 0.001, "loss": 1.7383, "step": 502152 }, { "epoch": 43.32367149758454, "grad_norm": 0.44525596499443054, "learning_rate": 0.001, "loss": 1.731, "step": 502208 }, { "epoch": 43.328502415458935, "grad_norm": 0.2622774839401245, "learning_rate": 0.001, "loss": 1.7265, "step": 502264 }, { "epoch": 43.333333333333336, "grad_norm": 0.6243840456008911, "learning_rate": 0.001, "loss": 1.7392, "step": 502320 }, { "epoch": 43.33816425120773, "grad_norm": 0.3983575701713562, "learning_rate": 0.001, "loss": 1.7253, "step": 502376 }, { "epoch": 43.34299516908212, "grad_norm": 0.23178161680698395, "learning_rate": 0.001, "loss": 1.7271, "step": 502432 }, { "epoch": 43.34782608695652, "grad_norm": 2.99115252494812, "learning_rate": 0.001, "loss": 1.7233, "step": 502488 }, { "epoch": 43.35265700483092, "grad_norm": 0.39675578474998474, "learning_rate": 0.001, "loss": 1.7384, "step": 502544 }, { "epoch": 43.35748792270532, "grad_norm": 0.5112400054931641, "learning_rate": 0.001, "loss": 1.7475, "step": 502600 }, { "epoch": 43.36231884057971, "grad_norm": 0.6033613681793213, "learning_rate": 0.001, "loss": 1.7345, "step": 502656 }, { "epoch": 43.367149758454104, "grad_norm": 0.3621089458465576, "learning_rate": 0.001, "loss": 1.7381, "step": 502712 }, { "epoch": 43.371980676328505, "grad_norm": 0.41851142048835754, "learning_rate": 0.001, "loss": 1.7355, "step": 502768 }, { "epoch": 43.3768115942029, "grad_norm": 0.317239910364151, "learning_rate": 0.001, "loss": 1.728, "step": 502824 }, { "epoch": 43.38164251207729, "grad_norm": 0.3395302891731262, "learning_rate": 0.001, "loss": 1.7343, "step": 502880 }, { "epoch": 43.38647342995169, "grad_norm": 0.3218384385108948, "learning_rate": 0.001, "loss": 1.7402, "step": 502936 }, { "epoch": 43.391304347826086, "grad_norm": 0.9764431118965149, "learning_rate": 0.001, "loss": 1.7304, "step": 502992 }, { "epoch": 43.39613526570048, "grad_norm": 0.44307273626327515, "learning_rate": 0.001, "loss": 1.7378, "step": 503048 }, { "epoch": 43.40096618357488, "grad_norm": 0.3319181501865387, "learning_rate": 0.001, "loss": 1.7366, "step": 503104 }, { "epoch": 43.405797101449274, "grad_norm": 0.27848803997039795, "learning_rate": 0.001, "loss": 1.7248, "step": 503160 }, { "epoch": 43.410628019323674, "grad_norm": 0.24711474776268005, "learning_rate": 0.001, "loss": 1.7308, "step": 503216 }, { "epoch": 43.41545893719807, "grad_norm": 0.2719164788722992, "learning_rate": 0.001, "loss": 1.7285, "step": 503272 }, { "epoch": 43.42028985507246, "grad_norm": 0.32736772298812866, "learning_rate": 0.001, "loss": 1.7304, "step": 503328 }, { "epoch": 43.42512077294686, "grad_norm": 0.34980425238609314, "learning_rate": 0.001, "loss": 1.7334, "step": 503384 }, { "epoch": 43.429951690821255, "grad_norm": 0.24120280146598816, "learning_rate": 0.001, "loss": 1.7292, "step": 503440 }, { "epoch": 43.43478260869565, "grad_norm": 0.28396502137184143, "learning_rate": 0.001, "loss": 1.7256, "step": 503496 }, { "epoch": 43.43961352657005, "grad_norm": 0.6430443525314331, "learning_rate": 0.001, "loss": 1.7309, "step": 503552 }, { "epoch": 43.44444444444444, "grad_norm": 0.7724123001098633, "learning_rate": 0.001, "loss": 1.7327, "step": 503608 }, { "epoch": 43.44927536231884, "grad_norm": 0.42654114961624146, "learning_rate": 0.001, "loss": 1.7293, "step": 503664 }, { "epoch": 43.45410628019324, "grad_norm": 0.33623358607292175, "learning_rate": 0.001, "loss": 1.7285, "step": 503720 }, { "epoch": 43.45893719806763, "grad_norm": 0.30150890350341797, "learning_rate": 0.001, "loss": 1.73, "step": 503776 }, { "epoch": 43.46376811594203, "grad_norm": 0.30979353189468384, "learning_rate": 0.001, "loss": 1.7376, "step": 503832 }, { "epoch": 43.468599033816425, "grad_norm": 0.30670541524887085, "learning_rate": 0.001, "loss": 1.739, "step": 503888 }, { "epoch": 43.47342995169082, "grad_norm": 0.3110942840576172, "learning_rate": 0.001, "loss": 1.7273, "step": 503944 }, { "epoch": 43.47826086956522, "grad_norm": 0.33473703265190125, "learning_rate": 0.001, "loss": 1.7337, "step": 504000 }, { "epoch": 43.48309178743961, "grad_norm": 0.35574331879615784, "learning_rate": 0.001, "loss": 1.7342, "step": 504056 }, { "epoch": 43.48792270531401, "grad_norm": 0.9274934530258179, "learning_rate": 0.001, "loss": 1.7278, "step": 504112 }, { "epoch": 43.492753623188406, "grad_norm": 0.37786850333213806, "learning_rate": 0.001, "loss": 1.7601, "step": 504168 }, { "epoch": 43.4975845410628, "grad_norm": 0.472273588180542, "learning_rate": 0.001, "loss": 1.7444, "step": 504224 }, { "epoch": 43.5024154589372, "grad_norm": 0.2943299412727356, "learning_rate": 0.001, "loss": 1.7449, "step": 504280 }, { "epoch": 43.507246376811594, "grad_norm": 0.3513292670249939, "learning_rate": 0.001, "loss": 1.7432, "step": 504336 }, { "epoch": 43.51207729468599, "grad_norm": 0.30205070972442627, "learning_rate": 0.001, "loss": 1.7484, "step": 504392 }, { "epoch": 43.51690821256039, "grad_norm": 0.32721245288848877, "learning_rate": 0.001, "loss": 1.7413, "step": 504448 }, { "epoch": 43.52173913043478, "grad_norm": 0.3585265576839447, "learning_rate": 0.001, "loss": 1.7495, "step": 504504 }, { "epoch": 43.52657004830918, "grad_norm": 0.5081578493118286, "learning_rate": 0.001, "loss": 1.7606, "step": 504560 }, { "epoch": 43.531400966183575, "grad_norm": 2.930466413497925, "learning_rate": 0.001, "loss": 1.7465, "step": 504616 }, { "epoch": 43.53623188405797, "grad_norm": 1.5184738636016846, "learning_rate": 0.001, "loss": 1.7604, "step": 504672 }, { "epoch": 43.54106280193237, "grad_norm": 1.3551055192947388, "learning_rate": 0.001, "loss": 1.7635, "step": 504728 }, { "epoch": 43.54589371980676, "grad_norm": 0.7215574979782104, "learning_rate": 0.001, "loss": 1.7756, "step": 504784 }, { "epoch": 43.55072463768116, "grad_norm": 2.8795104026794434, "learning_rate": 0.001, "loss": 1.7898, "step": 504840 }, { "epoch": 43.55555555555556, "grad_norm": 0.36410486698150635, "learning_rate": 0.001, "loss": 1.7767, "step": 504896 }, { "epoch": 43.56038647342995, "grad_norm": 0.2863364517688751, "learning_rate": 0.001, "loss": 1.7698, "step": 504952 }, { "epoch": 43.56521739130435, "grad_norm": 0.3670561611652374, "learning_rate": 0.001, "loss": 1.7651, "step": 505008 }, { "epoch": 43.570048309178745, "grad_norm": 0.9845800995826721, "learning_rate": 0.001, "loss": 1.778, "step": 505064 }, { "epoch": 43.57487922705314, "grad_norm": 0.8016309142112732, "learning_rate": 0.001, "loss": 1.7631, "step": 505120 }, { "epoch": 43.57971014492754, "grad_norm": 0.36346426606178284, "learning_rate": 0.001, "loss": 1.7706, "step": 505176 }, { "epoch": 43.58454106280193, "grad_norm": 9.896422386169434, "learning_rate": 0.001, "loss": 1.7655, "step": 505232 }, { "epoch": 43.589371980676326, "grad_norm": 0.29240819811820984, "learning_rate": 0.001, "loss": 1.758, "step": 505288 }, { "epoch": 43.594202898550726, "grad_norm": 0.3180306553840637, "learning_rate": 0.001, "loss": 1.7536, "step": 505344 }, { "epoch": 43.59903381642512, "grad_norm": 1.12047278881073, "learning_rate": 0.001, "loss": 1.7631, "step": 505400 }, { "epoch": 43.60386473429952, "grad_norm": 1.241138219833374, "learning_rate": 0.001, "loss": 1.7654, "step": 505456 }, { "epoch": 43.608695652173914, "grad_norm": 3.763753652572632, "learning_rate": 0.001, "loss": 1.771, "step": 505512 }, { "epoch": 43.61352657004831, "grad_norm": 2.2122726440429688, "learning_rate": 0.001, "loss": 1.7725, "step": 505568 }, { "epoch": 43.61835748792271, "grad_norm": 0.995103120803833, "learning_rate": 0.001, "loss": 1.7656, "step": 505624 }, { "epoch": 43.6231884057971, "grad_norm": 0.3988291621208191, "learning_rate": 0.001, "loss": 1.7652, "step": 505680 }, { "epoch": 43.628019323671495, "grad_norm": 0.2836867570877075, "learning_rate": 0.001, "loss": 1.7488, "step": 505736 }, { "epoch": 43.632850241545896, "grad_norm": 0.569118320941925, "learning_rate": 0.001, "loss": 1.743, "step": 505792 }, { "epoch": 43.63768115942029, "grad_norm": 1.7818878889083862, "learning_rate": 0.001, "loss": 1.7451, "step": 505848 }, { "epoch": 43.64251207729468, "grad_norm": 0.5960211753845215, "learning_rate": 0.001, "loss": 1.7419, "step": 505904 }, { "epoch": 43.64734299516908, "grad_norm": 0.41328251361846924, "learning_rate": 0.001, "loss": 1.7382, "step": 505960 }, { "epoch": 43.65217391304348, "grad_norm": 0.46101588010787964, "learning_rate": 0.001, "loss": 1.7412, "step": 506016 }, { "epoch": 43.65700483091788, "grad_norm": 0.41485393047332764, "learning_rate": 0.001, "loss": 1.7466, "step": 506072 }, { "epoch": 43.66183574879227, "grad_norm": 1.0683009624481201, "learning_rate": 0.001, "loss": 1.7432, "step": 506128 }, { "epoch": 43.666666666666664, "grad_norm": 8.871010780334473, "learning_rate": 0.001, "loss": 1.7518, "step": 506184 }, { "epoch": 43.671497584541065, "grad_norm": 0.7039579153060913, "learning_rate": 0.001, "loss": 1.7485, "step": 506240 }, { "epoch": 43.67632850241546, "grad_norm": 0.43488240242004395, "learning_rate": 0.001, "loss": 1.7493, "step": 506296 }, { "epoch": 43.68115942028985, "grad_norm": 1.583502173423767, "learning_rate": 0.001, "loss": 1.7478, "step": 506352 }, { "epoch": 43.68599033816425, "grad_norm": 0.638083815574646, "learning_rate": 0.001, "loss": 1.7513, "step": 506408 }, { "epoch": 43.690821256038646, "grad_norm": 0.44312989711761475, "learning_rate": 0.001, "loss": 1.7684, "step": 506464 }, { "epoch": 43.69565217391305, "grad_norm": 0.34854406118392944, "learning_rate": 0.001, "loss": 1.7516, "step": 506520 }, { "epoch": 43.70048309178744, "grad_norm": 0.308992862701416, "learning_rate": 0.001, "loss": 1.7412, "step": 506576 }, { "epoch": 43.70531400966183, "grad_norm": 0.5950103998184204, "learning_rate": 0.001, "loss": 1.74, "step": 506632 }, { "epoch": 43.710144927536234, "grad_norm": 1.336281418800354, "learning_rate": 0.001, "loss": 1.7497, "step": 506688 }, { "epoch": 43.71497584541063, "grad_norm": 0.42758655548095703, "learning_rate": 0.001, "loss": 1.7453, "step": 506744 }, { "epoch": 43.71980676328502, "grad_norm": 0.4068935811519623, "learning_rate": 0.001, "loss": 1.7451, "step": 506800 }, { "epoch": 43.72463768115942, "grad_norm": 0.3154278099536896, "learning_rate": 0.001, "loss": 1.7499, "step": 506856 }, { "epoch": 43.729468599033815, "grad_norm": 0.40624624490737915, "learning_rate": 0.001, "loss": 1.7506, "step": 506912 }, { "epoch": 43.734299516908216, "grad_norm": 0.6809932589530945, "learning_rate": 0.001, "loss": 1.7481, "step": 506968 }, { "epoch": 43.73913043478261, "grad_norm": 0.4508463740348816, "learning_rate": 0.001, "loss": 1.761, "step": 507024 }, { "epoch": 43.743961352657, "grad_norm": 0.31132858991622925, "learning_rate": 0.001, "loss": 1.7471, "step": 507080 }, { "epoch": 43.7487922705314, "grad_norm": 1.1491883993148804, "learning_rate": 0.001, "loss": 1.7549, "step": 507136 }, { "epoch": 43.7536231884058, "grad_norm": 0.3585839867591858, "learning_rate": 0.001, "loss": 1.7578, "step": 507192 }, { "epoch": 43.75845410628019, "grad_norm": 0.42305389046669006, "learning_rate": 0.001, "loss": 1.7423, "step": 507248 }, { "epoch": 43.76328502415459, "grad_norm": 1.0736076831817627, "learning_rate": 0.001, "loss": 1.7486, "step": 507304 }, { "epoch": 43.768115942028984, "grad_norm": 0.3753129541873932, "learning_rate": 0.001, "loss": 1.7584, "step": 507360 }, { "epoch": 43.772946859903385, "grad_norm": 0.2627319395542145, "learning_rate": 0.001, "loss": 1.753, "step": 507416 }, { "epoch": 43.77777777777778, "grad_norm": 1.1527009010314941, "learning_rate": 0.001, "loss": 1.7548, "step": 507472 }, { "epoch": 43.78260869565217, "grad_norm": 0.5647760033607483, "learning_rate": 0.001, "loss": 1.7575, "step": 507528 }, { "epoch": 43.78743961352657, "grad_norm": 0.6352528929710388, "learning_rate": 0.001, "loss": 1.7585, "step": 507584 }, { "epoch": 43.792270531400966, "grad_norm": 0.26792261004447937, "learning_rate": 0.001, "loss": 1.7571, "step": 507640 }, { "epoch": 43.79710144927536, "grad_norm": 0.31790515780448914, "learning_rate": 0.001, "loss": 1.7679, "step": 507696 }, { "epoch": 43.80193236714976, "grad_norm": 0.42530447244644165, "learning_rate": 0.001, "loss": 1.7716, "step": 507752 }, { "epoch": 43.806763285024154, "grad_norm": 0.31393179297447205, "learning_rate": 0.001, "loss": 1.7727, "step": 507808 }, { "epoch": 43.81159420289855, "grad_norm": 0.5969078540802002, "learning_rate": 0.001, "loss": 1.7688, "step": 507864 }, { "epoch": 43.81642512077295, "grad_norm": 1.5209839344024658, "learning_rate": 0.001, "loss": 1.7803, "step": 507920 }, { "epoch": 43.82125603864734, "grad_norm": 0.4208824038505554, "learning_rate": 0.001, "loss": 1.7738, "step": 507976 }, { "epoch": 43.82608695652174, "grad_norm": 2.8256845474243164, "learning_rate": 0.001, "loss": 1.7665, "step": 508032 }, { "epoch": 43.830917874396135, "grad_norm": 4.412088394165039, "learning_rate": 0.001, "loss": 1.7678, "step": 508088 }, { "epoch": 43.83574879227053, "grad_norm": 0.9325332045555115, "learning_rate": 0.001, "loss": 1.7743, "step": 508144 }, { "epoch": 43.84057971014493, "grad_norm": 1.7778911590576172, "learning_rate": 0.001, "loss": 1.7627, "step": 508200 }, { "epoch": 43.84541062801932, "grad_norm": 1.9286770820617676, "learning_rate": 0.001, "loss": 1.7649, "step": 508256 }, { "epoch": 43.85024154589372, "grad_norm": 2.1250927448272705, "learning_rate": 0.001, "loss": 1.7569, "step": 508312 }, { "epoch": 43.85507246376812, "grad_norm": 14.545105934143066, "learning_rate": 0.001, "loss": 1.7645, "step": 508368 }, { "epoch": 43.85990338164251, "grad_norm": 0.37764468789100647, "learning_rate": 0.001, "loss": 1.7652, "step": 508424 }, { "epoch": 43.86473429951691, "grad_norm": 0.8136737942695618, "learning_rate": 0.001, "loss": 1.7627, "step": 508480 }, { "epoch": 43.869565217391305, "grad_norm": 1.5910199880599976, "learning_rate": 0.001, "loss": 1.7657, "step": 508536 }, { "epoch": 43.8743961352657, "grad_norm": 0.3547941744327545, "learning_rate": 0.001, "loss": 1.7514, "step": 508592 }, { "epoch": 43.8792270531401, "grad_norm": 1.1046407222747803, "learning_rate": 0.001, "loss": 1.7611, "step": 508648 }, { "epoch": 43.88405797101449, "grad_norm": 5.16987943649292, "learning_rate": 0.001, "loss": 1.7563, "step": 508704 }, { "epoch": 43.888888888888886, "grad_norm": 15.713579177856445, "learning_rate": 0.001, "loss": 1.7487, "step": 508760 }, { "epoch": 43.893719806763286, "grad_norm": 0.7006009817123413, "learning_rate": 0.001, "loss": 1.7504, "step": 508816 }, { "epoch": 43.89855072463768, "grad_norm": 0.402317613363266, "learning_rate": 0.001, "loss": 1.7418, "step": 508872 }, { "epoch": 43.90338164251208, "grad_norm": 0.2797320783138275, "learning_rate": 0.001, "loss": 1.749, "step": 508928 }, { "epoch": 43.908212560386474, "grad_norm": 0.4944930374622345, "learning_rate": 0.001, "loss": 1.7483, "step": 508984 }, { "epoch": 43.91304347826087, "grad_norm": 0.4514928460121155, "learning_rate": 0.001, "loss": 1.7455, "step": 509040 }, { "epoch": 43.91787439613527, "grad_norm": 0.3731263279914856, "learning_rate": 0.001, "loss": 1.7565, "step": 509096 }, { "epoch": 43.92270531400966, "grad_norm": 0.7510005831718445, "learning_rate": 0.001, "loss": 1.7534, "step": 509152 }, { "epoch": 43.927536231884055, "grad_norm": 0.3649645745754242, "learning_rate": 0.001, "loss": 1.7515, "step": 509208 }, { "epoch": 43.932367149758456, "grad_norm": 0.34229764342308044, "learning_rate": 0.001, "loss": 1.7657, "step": 509264 }, { "epoch": 43.93719806763285, "grad_norm": 0.25829237699508667, "learning_rate": 0.001, "loss": 1.7599, "step": 509320 }, { "epoch": 43.94202898550725, "grad_norm": 0.5315322875976562, "learning_rate": 0.001, "loss": 1.7615, "step": 509376 }, { "epoch": 43.94685990338164, "grad_norm": 0.7372596263885498, "learning_rate": 0.001, "loss": 1.7638, "step": 509432 }, { "epoch": 43.95169082125604, "grad_norm": 0.819179117679596, "learning_rate": 0.001, "loss": 1.7562, "step": 509488 }, { "epoch": 43.95652173913044, "grad_norm": 0.3208511173725128, "learning_rate": 0.001, "loss": 1.7528, "step": 509544 }, { "epoch": 43.96135265700483, "grad_norm": 2.2648298740386963, "learning_rate": 0.001, "loss": 1.7659, "step": 509600 }, { "epoch": 43.966183574879224, "grad_norm": 0.31545767188072205, "learning_rate": 0.001, "loss": 1.7594, "step": 509656 }, { "epoch": 43.971014492753625, "grad_norm": 1.0272369384765625, "learning_rate": 0.001, "loss": 1.7641, "step": 509712 }, { "epoch": 43.97584541062802, "grad_norm": 0.32061269879341125, "learning_rate": 0.001, "loss": 1.766, "step": 509768 }, { "epoch": 43.98067632850242, "grad_norm": 0.9945535063743591, "learning_rate": 0.001, "loss": 1.7604, "step": 509824 }, { "epoch": 43.98550724637681, "grad_norm": 853.5796508789062, "learning_rate": 0.001, "loss": 1.7667, "step": 509880 }, { "epoch": 43.990338164251206, "grad_norm": 2.772505760192871, "learning_rate": 0.001, "loss": 1.7585, "step": 509936 }, { "epoch": 43.99516908212561, "grad_norm": 4.480376243591309, "learning_rate": 0.001, "loss": 1.7613, "step": 509992 }, { "epoch": 44.0, "grad_norm": 0.3885868191719055, "learning_rate": 0.001, "loss": 1.7493, "step": 510048 }, { "epoch": 44.00483091787439, "grad_norm": 0.9080930352210999, "learning_rate": 0.001, "loss": 1.7168, "step": 510104 }, { "epoch": 44.009661835748794, "grad_norm": 0.5800145864486694, "learning_rate": 0.001, "loss": 1.7208, "step": 510160 }, { "epoch": 44.01449275362319, "grad_norm": 0.3350420594215393, "learning_rate": 0.001, "loss": 1.7119, "step": 510216 }, { "epoch": 44.01932367149758, "grad_norm": 0.3337475657463074, "learning_rate": 0.001, "loss": 1.7181, "step": 510272 }, { "epoch": 44.02415458937198, "grad_norm": 0.6137295365333557, "learning_rate": 0.001, "loss": 1.7253, "step": 510328 }, { "epoch": 44.028985507246375, "grad_norm": 0.3282407522201538, "learning_rate": 0.001, "loss": 1.7147, "step": 510384 }, { "epoch": 44.033816425120776, "grad_norm": 0.3639090657234192, "learning_rate": 0.001, "loss": 1.7151, "step": 510440 }, { "epoch": 44.03864734299517, "grad_norm": 0.32378512620925903, "learning_rate": 0.001, "loss": 1.7266, "step": 510496 }, { "epoch": 44.04347826086956, "grad_norm": 0.375983864068985, "learning_rate": 0.001, "loss": 1.7189, "step": 510552 }, { "epoch": 44.04830917874396, "grad_norm": 0.4370112121105194, "learning_rate": 0.001, "loss": 1.7167, "step": 510608 }, { "epoch": 44.05314009661836, "grad_norm": 0.35594841837882996, "learning_rate": 0.001, "loss": 1.7184, "step": 510664 }, { "epoch": 44.05797101449275, "grad_norm": 0.28139302134513855, "learning_rate": 0.001, "loss": 1.7187, "step": 510720 }, { "epoch": 44.06280193236715, "grad_norm": 0.3106311559677124, "learning_rate": 0.001, "loss": 1.7161, "step": 510776 }, { "epoch": 44.067632850241544, "grad_norm": 0.28595760464668274, "learning_rate": 0.001, "loss": 1.721, "step": 510832 }, { "epoch": 44.072463768115945, "grad_norm": 0.379766047000885, "learning_rate": 0.001, "loss": 1.7166, "step": 510888 }, { "epoch": 44.07729468599034, "grad_norm": 0.8302298188209534, "learning_rate": 0.001, "loss": 1.7152, "step": 510944 }, { "epoch": 44.08212560386473, "grad_norm": 0.36580774188041687, "learning_rate": 0.001, "loss": 1.7216, "step": 511000 }, { "epoch": 44.08695652173913, "grad_norm": 0.2754070460796356, "learning_rate": 0.001, "loss": 1.7111, "step": 511056 }, { "epoch": 44.091787439613526, "grad_norm": 0.4942632019519806, "learning_rate": 0.001, "loss": 1.7138, "step": 511112 }, { "epoch": 44.09661835748792, "grad_norm": 0.264268696308136, "learning_rate": 0.001, "loss": 1.7277, "step": 511168 }, { "epoch": 44.10144927536232, "grad_norm": 0.6325370669364929, "learning_rate": 0.001, "loss": 1.7267, "step": 511224 }, { "epoch": 44.106280193236714, "grad_norm": 0.5119471549987793, "learning_rate": 0.001, "loss": 1.7304, "step": 511280 }, { "epoch": 44.111111111111114, "grad_norm": 0.2735876142978668, "learning_rate": 0.001, "loss": 1.728, "step": 511336 }, { "epoch": 44.11594202898551, "grad_norm": 0.8999773263931274, "learning_rate": 0.001, "loss": 1.7292, "step": 511392 }, { "epoch": 44.1207729468599, "grad_norm": 0.3545585870742798, "learning_rate": 0.001, "loss": 1.7305, "step": 511448 }, { "epoch": 44.1256038647343, "grad_norm": 0.31046995520591736, "learning_rate": 0.001, "loss": 1.7271, "step": 511504 }, { "epoch": 44.130434782608695, "grad_norm": 0.4907430410385132, "learning_rate": 0.001, "loss": 1.7451, "step": 511560 }, { "epoch": 44.13526570048309, "grad_norm": 0.3819011151790619, "learning_rate": 0.001, "loss": 1.7349, "step": 511616 }, { "epoch": 44.14009661835749, "grad_norm": 1.98544442653656, "learning_rate": 0.001, "loss": 1.7248, "step": 511672 }, { "epoch": 44.14492753623188, "grad_norm": 2.1118509769439697, "learning_rate": 0.001, "loss": 1.7293, "step": 511728 }, { "epoch": 44.14975845410628, "grad_norm": 0.3777749836444855, "learning_rate": 0.001, "loss": 1.7266, "step": 511784 }, { "epoch": 44.15458937198068, "grad_norm": 0.42451635003089905, "learning_rate": 0.001, "loss": 1.7119, "step": 511840 }, { "epoch": 44.15942028985507, "grad_norm": 0.38344424962997437, "learning_rate": 0.001, "loss": 1.7263, "step": 511896 }, { "epoch": 44.16425120772947, "grad_norm": 0.37173226475715637, "learning_rate": 0.001, "loss": 1.7276, "step": 511952 }, { "epoch": 44.169082125603865, "grad_norm": 6.116097927093506, "learning_rate": 0.001, "loss": 1.7288, "step": 512008 }, { "epoch": 44.17391304347826, "grad_norm": 0.3155895471572876, "learning_rate": 0.001, "loss": 1.7332, "step": 512064 }, { "epoch": 44.17874396135266, "grad_norm": 0.5041471719741821, "learning_rate": 0.001, "loss": 1.7187, "step": 512120 }, { "epoch": 44.18357487922705, "grad_norm": 0.4779297709465027, "learning_rate": 0.001, "loss": 1.7201, "step": 512176 }, { "epoch": 44.18840579710145, "grad_norm": 0.39231663942337036, "learning_rate": 0.001, "loss": 1.7221, "step": 512232 }, { "epoch": 44.193236714975846, "grad_norm": 1.7418016195297241, "learning_rate": 0.001, "loss": 1.7299, "step": 512288 }, { "epoch": 44.19806763285024, "grad_norm": 0.3186659812927246, "learning_rate": 0.001, "loss": 1.7277, "step": 512344 }, { "epoch": 44.20289855072464, "grad_norm": 0.34087079763412476, "learning_rate": 0.001, "loss": 1.7308, "step": 512400 }, { "epoch": 44.207729468599034, "grad_norm": 1.7852113246917725, "learning_rate": 0.001, "loss": 1.7236, "step": 512456 }, { "epoch": 44.21256038647343, "grad_norm": 9.457098960876465, "learning_rate": 0.001, "loss": 1.7373, "step": 512512 }, { "epoch": 44.21739130434783, "grad_norm": 0.7152717113494873, "learning_rate": 0.001, "loss": 1.7392, "step": 512568 }, { "epoch": 44.22222222222222, "grad_norm": 0.29821234941482544, "learning_rate": 0.001, "loss": 1.728, "step": 512624 }, { "epoch": 44.227053140096615, "grad_norm": 0.6706036925315857, "learning_rate": 0.001, "loss": 1.7298, "step": 512680 }, { "epoch": 44.231884057971016, "grad_norm": 0.308407187461853, "learning_rate": 0.001, "loss": 1.7263, "step": 512736 }, { "epoch": 44.23671497584541, "grad_norm": 6.44401741027832, "learning_rate": 0.001, "loss": 1.7461, "step": 512792 }, { "epoch": 44.24154589371981, "grad_norm": 0.3682785928249359, "learning_rate": 0.001, "loss": 1.7771, "step": 512848 }, { "epoch": 44.2463768115942, "grad_norm": 0.6611726880073547, "learning_rate": 0.001, "loss": 1.7545, "step": 512904 }, { "epoch": 44.2512077294686, "grad_norm": 0.7331146597862244, "learning_rate": 0.001, "loss": 1.7423, "step": 512960 }, { "epoch": 44.256038647343, "grad_norm": 0.356121689081192, "learning_rate": 0.001, "loss": 1.7423, "step": 513016 }, { "epoch": 44.26086956521739, "grad_norm": 0.35807570815086365, "learning_rate": 0.001, "loss": 1.7548, "step": 513072 }, { "epoch": 44.265700483091784, "grad_norm": 0.35216668248176575, "learning_rate": 0.001, "loss": 1.7457, "step": 513128 }, { "epoch": 44.270531400966185, "grad_norm": 0.7017082571983337, "learning_rate": 0.001, "loss": 1.7377, "step": 513184 }, { "epoch": 44.27536231884058, "grad_norm": 0.4306289851665497, "learning_rate": 0.001, "loss": 1.7365, "step": 513240 }, { "epoch": 44.28019323671498, "grad_norm": 1.0133066177368164, "learning_rate": 0.001, "loss": 1.7319, "step": 513296 }, { "epoch": 44.28502415458937, "grad_norm": 0.3101988732814789, "learning_rate": 0.001, "loss": 1.7567, "step": 513352 }, { "epoch": 44.289855072463766, "grad_norm": 0.32137158513069153, "learning_rate": 0.001, "loss": 1.7461, "step": 513408 }, { "epoch": 44.29468599033817, "grad_norm": 1.0342674255371094, "learning_rate": 0.001, "loss": 1.7435, "step": 513464 }, { "epoch": 44.29951690821256, "grad_norm": 0.652679979801178, "learning_rate": 0.001, "loss": 1.7334, "step": 513520 }, { "epoch": 44.30434782608695, "grad_norm": 0.5621470212936401, "learning_rate": 0.001, "loss": 1.7344, "step": 513576 }, { "epoch": 44.309178743961354, "grad_norm": 3.8929972648620605, "learning_rate": 0.001, "loss": 1.7362, "step": 513632 }, { "epoch": 44.31400966183575, "grad_norm": 0.918247640132904, "learning_rate": 0.001, "loss": 1.7383, "step": 513688 }, { "epoch": 44.31884057971015, "grad_norm": 0.5870917439460754, "learning_rate": 0.001, "loss": 1.7398, "step": 513744 }, { "epoch": 44.32367149758454, "grad_norm": 0.6914522647857666, "learning_rate": 0.001, "loss": 1.7278, "step": 513800 }, { "epoch": 44.328502415458935, "grad_norm": 0.30583176016807556, "learning_rate": 0.001, "loss": 1.7381, "step": 513856 }, { "epoch": 44.333333333333336, "grad_norm": 0.3249337077140808, "learning_rate": 0.001, "loss": 1.7323, "step": 513912 }, { "epoch": 44.33816425120773, "grad_norm": 0.34601733088493347, "learning_rate": 0.001, "loss": 1.744, "step": 513968 }, { "epoch": 44.34299516908212, "grad_norm": 0.36104172468185425, "learning_rate": 0.001, "loss": 1.7368, "step": 514024 }, { "epoch": 44.34782608695652, "grad_norm": 0.6452495455741882, "learning_rate": 0.001, "loss": 1.7373, "step": 514080 }, { "epoch": 44.35265700483092, "grad_norm": 0.3669946789741516, "learning_rate": 0.001, "loss": 1.7293, "step": 514136 }, { "epoch": 44.35748792270532, "grad_norm": 0.9303929209709167, "learning_rate": 0.001, "loss": 1.7308, "step": 514192 }, { "epoch": 44.36231884057971, "grad_norm": 0.2876887023448944, "learning_rate": 0.001, "loss": 1.7418, "step": 514248 }, { "epoch": 44.367149758454104, "grad_norm": 0.3033621907234192, "learning_rate": 0.001, "loss": 1.7375, "step": 514304 }, { "epoch": 44.371980676328505, "grad_norm": 0.2653990089893341, "learning_rate": 0.001, "loss": 1.751, "step": 514360 }, { "epoch": 44.3768115942029, "grad_norm": 0.3015618920326233, "learning_rate": 0.001, "loss": 1.754, "step": 514416 }, { "epoch": 44.38164251207729, "grad_norm": 0.30280616879463196, "learning_rate": 0.001, "loss": 1.7389, "step": 514472 }, { "epoch": 44.38647342995169, "grad_norm": 0.9202247858047485, "learning_rate": 0.001, "loss": 1.7388, "step": 514528 }, { "epoch": 44.391304347826086, "grad_norm": 0.3107450008392334, "learning_rate": 0.001, "loss": 1.7401, "step": 514584 }, { "epoch": 44.39613526570048, "grad_norm": 0.2641608417034149, "learning_rate": 0.001, "loss": 1.7425, "step": 514640 }, { "epoch": 44.40096618357488, "grad_norm": 0.7104753255844116, "learning_rate": 0.001, "loss": 1.7507, "step": 514696 }, { "epoch": 44.405797101449274, "grad_norm": 0.37904998660087585, "learning_rate": 0.001, "loss": 1.7589, "step": 514752 }, { "epoch": 44.410628019323674, "grad_norm": 0.7512887120246887, "learning_rate": 0.001, "loss": 1.7716, "step": 514808 }, { "epoch": 44.41545893719807, "grad_norm": 1.52034592628479, "learning_rate": 0.001, "loss": 1.7806, "step": 514864 }, { "epoch": 44.42028985507246, "grad_norm": 0.40961334109306335, "learning_rate": 0.001, "loss": 1.7573, "step": 514920 }, { "epoch": 44.42512077294686, "grad_norm": 0.5606068968772888, "learning_rate": 0.001, "loss": 1.7619, "step": 514976 }, { "epoch": 44.429951690821255, "grad_norm": 0.45476940274238586, "learning_rate": 0.001, "loss": 1.7645, "step": 515032 }, { "epoch": 44.43478260869565, "grad_norm": 2.7815322875976562, "learning_rate": 0.001, "loss": 1.7684, "step": 515088 }, { "epoch": 44.43961352657005, "grad_norm": 1.0639739036560059, "learning_rate": 0.001, "loss": 1.7601, "step": 515144 }, { "epoch": 44.44444444444444, "grad_norm": 0.3797971308231354, "learning_rate": 0.001, "loss": 1.7633, "step": 515200 }, { "epoch": 44.44927536231884, "grad_norm": 6.126946926116943, "learning_rate": 0.001, "loss": 1.7538, "step": 515256 }, { "epoch": 44.45410628019324, "grad_norm": 0.31685301661491394, "learning_rate": 0.001, "loss": 1.7576, "step": 515312 }, { "epoch": 44.45893719806763, "grad_norm": 4.233808517456055, "learning_rate": 0.001, "loss": 1.7644, "step": 515368 }, { "epoch": 44.46376811594203, "grad_norm": 0.5376169085502625, "learning_rate": 0.001, "loss": 1.7546, "step": 515424 }, { "epoch": 44.468599033816425, "grad_norm": 0.28769081830978394, "learning_rate": 0.001, "loss": 1.7579, "step": 515480 }, { "epoch": 44.47342995169082, "grad_norm": 0.4230288863182068, "learning_rate": 0.001, "loss": 1.7613, "step": 515536 }, { "epoch": 44.47826086956522, "grad_norm": 0.5095715522766113, "learning_rate": 0.001, "loss": 1.7499, "step": 515592 }, { "epoch": 44.48309178743961, "grad_norm": 0.33711111545562744, "learning_rate": 0.001, "loss": 1.7434, "step": 515648 }, { "epoch": 44.48792270531401, "grad_norm": 0.6009061932563782, "learning_rate": 0.001, "loss": 1.7487, "step": 515704 }, { "epoch": 44.492753623188406, "grad_norm": 0.4340749979019165, "learning_rate": 0.001, "loss": 1.7596, "step": 515760 }, { "epoch": 44.4975845410628, "grad_norm": 0.3595503568649292, "learning_rate": 0.001, "loss": 1.7888, "step": 515816 }, { "epoch": 44.5024154589372, "grad_norm": 0.3426569104194641, "learning_rate": 0.001, "loss": 1.8193, "step": 515872 }, { "epoch": 44.507246376811594, "grad_norm": 0.9158487319946289, "learning_rate": 0.001, "loss": 1.8223, "step": 515928 }, { "epoch": 44.51207729468599, "grad_norm": 1.7137371301651, "learning_rate": 0.001, "loss": 1.8056, "step": 515984 }, { "epoch": 44.51690821256039, "grad_norm": 0.4418066740036011, "learning_rate": 0.001, "loss": 1.8092, "step": 516040 }, { "epoch": 44.52173913043478, "grad_norm": 3.798206090927124, "learning_rate": 0.001, "loss": 1.7802, "step": 516096 }, { "epoch": 44.52657004830918, "grad_norm": 0.763675332069397, "learning_rate": 0.001, "loss": 1.7725, "step": 516152 }, { "epoch": 44.531400966183575, "grad_norm": 3.9402244091033936, "learning_rate": 0.001, "loss": 1.7802, "step": 516208 }, { "epoch": 44.53623188405797, "grad_norm": 0.39857804775238037, "learning_rate": 0.001, "loss": 1.7745, "step": 516264 }, { "epoch": 44.54106280193237, "grad_norm": 0.6346734762191772, "learning_rate": 0.001, "loss": 1.7699, "step": 516320 }, { "epoch": 44.54589371980676, "grad_norm": 0.8489824533462524, "learning_rate": 0.001, "loss": 1.7647, "step": 516376 }, { "epoch": 44.55072463768116, "grad_norm": 0.8828040361404419, "learning_rate": 0.001, "loss": 1.7653, "step": 516432 }, { "epoch": 44.55555555555556, "grad_norm": 2.1146938800811768, "learning_rate": 0.001, "loss": 1.7618, "step": 516488 }, { "epoch": 44.56038647342995, "grad_norm": 0.9239327907562256, "learning_rate": 0.001, "loss": 1.7711, "step": 516544 }, { "epoch": 44.56521739130435, "grad_norm": 2.416935682296753, "learning_rate": 0.001, "loss": 1.7657, "step": 516600 }, { "epoch": 44.570048309178745, "grad_norm": 0.8220854997634888, "learning_rate": 0.001, "loss": 1.7693, "step": 516656 }, { "epoch": 44.57487922705314, "grad_norm": 26.31968879699707, "learning_rate": 0.001, "loss": 1.7713, "step": 516712 }, { "epoch": 44.57971014492754, "grad_norm": 5.89824104309082, "learning_rate": 0.001, "loss": 1.7684, "step": 516768 }, { "epoch": 44.58454106280193, "grad_norm": 0.3573940098285675, "learning_rate": 0.001, "loss": 1.7659, "step": 516824 }, { "epoch": 44.589371980676326, "grad_norm": 2.2968807220458984, "learning_rate": 0.001, "loss": 1.7575, "step": 516880 }, { "epoch": 44.594202898550726, "grad_norm": 3.0044000148773193, "learning_rate": 0.001, "loss": 1.7513, "step": 516936 }, { "epoch": 44.59903381642512, "grad_norm": 0.48029670119285583, "learning_rate": 0.001, "loss": 1.7525, "step": 516992 }, { "epoch": 44.60386473429952, "grad_norm": 0.7424532175064087, "learning_rate": 0.001, "loss": 1.7566, "step": 517048 }, { "epoch": 44.608695652173914, "grad_norm": 2.5451290607452393, "learning_rate": 0.001, "loss": 1.7442, "step": 517104 }, { "epoch": 44.61352657004831, "grad_norm": 1.275504231452942, "learning_rate": 0.001, "loss": 1.75, "step": 517160 }, { "epoch": 44.61835748792271, "grad_norm": 0.33170944452285767, "learning_rate": 0.001, "loss": 1.7523, "step": 517216 }, { "epoch": 44.6231884057971, "grad_norm": 5.117401123046875, "learning_rate": 0.001, "loss": 1.744, "step": 517272 }, { "epoch": 44.628019323671495, "grad_norm": 0.4236985743045807, "learning_rate": 0.001, "loss": 1.7625, "step": 517328 }, { "epoch": 44.632850241545896, "grad_norm": 0.33790692687034607, "learning_rate": 0.001, "loss": 1.7418, "step": 517384 }, { "epoch": 44.63768115942029, "grad_norm": 0.5492182374000549, "learning_rate": 0.001, "loss": 1.7506, "step": 517440 }, { "epoch": 44.64251207729468, "grad_norm": 0.6100949645042419, "learning_rate": 0.001, "loss": 1.7501, "step": 517496 }, { "epoch": 44.64734299516908, "grad_norm": 0.24246233701705933, "learning_rate": 0.001, "loss": 1.7444, "step": 517552 }, { "epoch": 44.65217391304348, "grad_norm": 0.489010751247406, "learning_rate": 0.001, "loss": 1.7432, "step": 517608 }, { "epoch": 44.65700483091788, "grad_norm": 0.4861413538455963, "learning_rate": 0.001, "loss": 1.7521, "step": 517664 }, { "epoch": 44.66183574879227, "grad_norm": 0.35629215836524963, "learning_rate": 0.001, "loss": 1.745, "step": 517720 }, { "epoch": 44.666666666666664, "grad_norm": 0.35338258743286133, "learning_rate": 0.001, "loss": 1.7398, "step": 517776 }, { "epoch": 44.671497584541065, "grad_norm": 1.5289599895477295, "learning_rate": 0.001, "loss": 1.7491, "step": 517832 }, { "epoch": 44.67632850241546, "grad_norm": 0.3826262950897217, "learning_rate": 0.001, "loss": 1.7456, "step": 517888 }, { "epoch": 44.68115942028985, "grad_norm": 0.37195757031440735, "learning_rate": 0.001, "loss": 1.744, "step": 517944 }, { "epoch": 44.68599033816425, "grad_norm": 0.25821664929389954, "learning_rate": 0.001, "loss": 1.7452, "step": 518000 }, { "epoch": 44.690821256038646, "grad_norm": 0.2607474625110626, "learning_rate": 0.001, "loss": 1.7401, "step": 518056 }, { "epoch": 44.69565217391305, "grad_norm": 6.3363566398620605, "learning_rate": 0.001, "loss": 1.7403, "step": 518112 }, { "epoch": 44.70048309178744, "grad_norm": 1.1255825757980347, "learning_rate": 0.001, "loss": 1.7455, "step": 518168 }, { "epoch": 44.70531400966183, "grad_norm": 0.5363611578941345, "learning_rate": 0.001, "loss": 1.7397, "step": 518224 }, { "epoch": 44.710144927536234, "grad_norm": 0.4870119094848633, "learning_rate": 0.001, "loss": 1.7574, "step": 518280 }, { "epoch": 44.71497584541063, "grad_norm": 0.34560301899909973, "learning_rate": 0.001, "loss": 1.7583, "step": 518336 }, { "epoch": 44.71980676328502, "grad_norm": 0.38073885440826416, "learning_rate": 0.001, "loss": 1.7573, "step": 518392 }, { "epoch": 44.72463768115942, "grad_norm": 0.3179895281791687, "learning_rate": 0.001, "loss": 1.7532, "step": 518448 }, { "epoch": 44.729468599033815, "grad_norm": 0.8279065489768982, "learning_rate": 0.001, "loss": 1.753, "step": 518504 }, { "epoch": 44.734299516908216, "grad_norm": 0.39664438366889954, "learning_rate": 0.001, "loss": 1.7572, "step": 518560 }, { "epoch": 44.73913043478261, "grad_norm": 0.5696873068809509, "learning_rate": 0.001, "loss": 1.7585, "step": 518616 }, { "epoch": 44.743961352657, "grad_norm": 1.2663644552230835, "learning_rate": 0.001, "loss": 1.7634, "step": 518672 }, { "epoch": 44.7487922705314, "grad_norm": 0.39203858375549316, "learning_rate": 0.001, "loss": 1.7512, "step": 518728 }, { "epoch": 44.7536231884058, "grad_norm": 2.1809194087982178, "learning_rate": 0.001, "loss": 1.7532, "step": 518784 }, { "epoch": 44.75845410628019, "grad_norm": 1.8396408557891846, "learning_rate": 0.001, "loss": 1.7507, "step": 518840 }, { "epoch": 44.76328502415459, "grad_norm": 0.4956870973110199, "learning_rate": 0.001, "loss": 1.757, "step": 518896 }, { "epoch": 44.768115942028984, "grad_norm": 4.103661060333252, "learning_rate": 0.001, "loss": 1.7614, "step": 518952 }, { "epoch": 44.772946859903385, "grad_norm": 0.3233181834220886, "learning_rate": 0.001, "loss": 1.7676, "step": 519008 }, { "epoch": 44.77777777777778, "grad_norm": 12.702004432678223, "learning_rate": 0.001, "loss": 1.7642, "step": 519064 }, { "epoch": 44.78260869565217, "grad_norm": 16.55841636657715, "learning_rate": 0.001, "loss": 1.7648, "step": 519120 }, { "epoch": 44.78743961352657, "grad_norm": 5.862295627593994, "learning_rate": 0.001, "loss": 1.7744, "step": 519176 }, { "epoch": 44.792270531400966, "grad_norm": 2.0035932064056396, "learning_rate": 0.001, "loss": 1.7684, "step": 519232 }, { "epoch": 44.79710144927536, "grad_norm": 1.7130136489868164, "learning_rate": 0.001, "loss": 1.7697, "step": 519288 }, { "epoch": 44.80193236714976, "grad_norm": 0.7822756767272949, "learning_rate": 0.001, "loss": 1.7827, "step": 519344 }, { "epoch": 44.806763285024154, "grad_norm": 1.0391079187393188, "learning_rate": 0.001, "loss": 1.7797, "step": 519400 }, { "epoch": 44.81159420289855, "grad_norm": 0.31277626752853394, "learning_rate": 0.001, "loss": 1.7848, "step": 519456 }, { "epoch": 44.81642512077295, "grad_norm": 0.5335419178009033, "learning_rate": 0.001, "loss": 1.7807, "step": 519512 }, { "epoch": 44.82125603864734, "grad_norm": 2.35077166557312, "learning_rate": 0.001, "loss": 1.7775, "step": 519568 }, { "epoch": 44.82608695652174, "grad_norm": 0.5974383354187012, "learning_rate": 0.001, "loss": 1.786, "step": 519624 }, { "epoch": 44.830917874396135, "grad_norm": 0.4347194731235504, "learning_rate": 0.001, "loss": 1.7751, "step": 519680 }, { "epoch": 44.83574879227053, "grad_norm": 0.28671127557754517, "learning_rate": 0.001, "loss": 1.7712, "step": 519736 }, { "epoch": 44.84057971014493, "grad_norm": 0.5309271812438965, "learning_rate": 0.001, "loss": 1.7681, "step": 519792 }, { "epoch": 44.84541062801932, "grad_norm": 1.5757977962493896, "learning_rate": 0.001, "loss": 1.7621, "step": 519848 }, { "epoch": 44.85024154589372, "grad_norm": 0.3404558598995209, "learning_rate": 0.001, "loss": 1.7594, "step": 519904 }, { "epoch": 44.85507246376812, "grad_norm": 0.33870580792427063, "learning_rate": 0.001, "loss": 1.767, "step": 519960 }, { "epoch": 44.85990338164251, "grad_norm": 0.5620613098144531, "learning_rate": 0.001, "loss": 1.7536, "step": 520016 }, { "epoch": 44.86473429951691, "grad_norm": 1.3506306409835815, "learning_rate": 0.001, "loss": 1.7595, "step": 520072 }, { "epoch": 44.869565217391305, "grad_norm": 0.4508839547634125, "learning_rate": 0.001, "loss": 1.754, "step": 520128 }, { "epoch": 44.8743961352657, "grad_norm": 0.3961387276649475, "learning_rate": 0.001, "loss": 1.7793, "step": 520184 }, { "epoch": 44.8792270531401, "grad_norm": 0.31895628571510315, "learning_rate": 0.001, "loss": 1.7584, "step": 520240 }, { "epoch": 44.88405797101449, "grad_norm": 0.4472622275352478, "learning_rate": 0.001, "loss": 1.7637, "step": 520296 }, { "epoch": 44.888888888888886, "grad_norm": 0.31508901715278625, "learning_rate": 0.001, "loss": 1.7714, "step": 520352 }, { "epoch": 44.893719806763286, "grad_norm": 0.680257260799408, "learning_rate": 0.001, "loss": 1.7649, "step": 520408 }, { "epoch": 44.89855072463768, "grad_norm": 0.30827298760414124, "learning_rate": 0.001, "loss": 1.761, "step": 520464 }, { "epoch": 44.90338164251208, "grad_norm": 0.4116891920566559, "learning_rate": 0.001, "loss": 1.7627, "step": 520520 }, { "epoch": 44.908212560386474, "grad_norm": 0.3044597804546356, "learning_rate": 0.001, "loss": 1.7603, "step": 520576 }, { "epoch": 44.91304347826087, "grad_norm": 0.33176735043525696, "learning_rate": 0.001, "loss": 1.7502, "step": 520632 }, { "epoch": 44.91787439613527, "grad_norm": 0.3552037477493286, "learning_rate": 0.001, "loss": 1.7535, "step": 520688 }, { "epoch": 44.92270531400966, "grad_norm": 0.30814239382743835, "learning_rate": 0.001, "loss": 1.75, "step": 520744 }, { "epoch": 44.927536231884055, "grad_norm": 1.234261155128479, "learning_rate": 0.001, "loss": 1.7541, "step": 520800 }, { "epoch": 44.932367149758456, "grad_norm": 1.8364142179489136, "learning_rate": 0.001, "loss": 1.7495, "step": 520856 }, { "epoch": 44.93719806763285, "grad_norm": 4.369121551513672, "learning_rate": 0.001, "loss": 1.7514, "step": 520912 }, { "epoch": 44.94202898550725, "grad_norm": 0.7031179070472717, "learning_rate": 0.001, "loss": 1.7496, "step": 520968 }, { "epoch": 44.94685990338164, "grad_norm": 0.38052552938461304, "learning_rate": 0.001, "loss": 1.7483, "step": 521024 }, { "epoch": 44.95169082125604, "grad_norm": 0.282537579536438, "learning_rate": 0.001, "loss": 1.7444, "step": 521080 }, { "epoch": 44.95652173913044, "grad_norm": 0.5754269957542419, "learning_rate": 0.001, "loss": 1.7569, "step": 521136 }, { "epoch": 44.96135265700483, "grad_norm": 0.3936268985271454, "learning_rate": 0.001, "loss": 1.7664, "step": 521192 }, { "epoch": 44.966183574879224, "grad_norm": 0.3173277676105499, "learning_rate": 0.001, "loss": 1.7585, "step": 521248 }, { "epoch": 44.971014492753625, "grad_norm": 0.45701006054878235, "learning_rate": 0.001, "loss": 1.7602, "step": 521304 }, { "epoch": 44.97584541062802, "grad_norm": 0.35018354654312134, "learning_rate": 0.001, "loss": 1.7655, "step": 521360 }, { "epoch": 44.98067632850242, "grad_norm": 0.2741232216358185, "learning_rate": 0.001, "loss": 1.7586, "step": 521416 }, { "epoch": 44.98550724637681, "grad_norm": 0.39522579312324524, "learning_rate": 0.001, "loss": 1.7532, "step": 521472 }, { "epoch": 44.990338164251206, "grad_norm": 0.9977519512176514, "learning_rate": 0.001, "loss": 1.7565, "step": 521528 }, { "epoch": 44.99516908212561, "grad_norm": 4.741569995880127, "learning_rate": 0.001, "loss": 1.7652, "step": 521584 }, { "epoch": 45.0, "grad_norm": 0.31371384859085083, "learning_rate": 0.001, "loss": 1.7518, "step": 521640 }, { "epoch": 45.00483091787439, "grad_norm": 0.3293420672416687, "learning_rate": 0.001, "loss": 1.7192, "step": 521696 }, { "epoch": 45.009661835748794, "grad_norm": 0.3113054037094116, "learning_rate": 0.001, "loss": 1.7207, "step": 521752 }, { "epoch": 45.01449275362319, "grad_norm": 0.43630892038345337, "learning_rate": 0.001, "loss": 1.7212, "step": 521808 }, { "epoch": 45.01932367149758, "grad_norm": 0.45723673701286316, "learning_rate": 0.001, "loss": 1.7243, "step": 521864 }, { "epoch": 45.02415458937198, "grad_norm": 0.3399609923362732, "learning_rate": 0.001, "loss": 1.728, "step": 521920 }, { "epoch": 45.028985507246375, "grad_norm": 0.3382002115249634, "learning_rate": 0.001, "loss": 1.715, "step": 521976 }, { "epoch": 45.033816425120776, "grad_norm": 0.3635134696960449, "learning_rate": 0.001, "loss": 1.7189, "step": 522032 }, { "epoch": 45.03864734299517, "grad_norm": 0.3185690939426422, "learning_rate": 0.001, "loss": 1.7229, "step": 522088 }, { "epoch": 45.04347826086956, "grad_norm": 0.25990843772888184, "learning_rate": 0.001, "loss": 1.7229, "step": 522144 }, { "epoch": 45.04830917874396, "grad_norm": 0.23278699815273285, "learning_rate": 0.001, "loss": 1.7234, "step": 522200 }, { "epoch": 45.05314009661836, "grad_norm": 0.3390466272830963, "learning_rate": 0.001, "loss": 1.7141, "step": 522256 }, { "epoch": 45.05797101449275, "grad_norm": 0.332475483417511, "learning_rate": 0.001, "loss": 1.7136, "step": 522312 }, { "epoch": 45.06280193236715, "grad_norm": 0.2712809443473816, "learning_rate": 0.001, "loss": 1.7147, "step": 522368 }, { "epoch": 45.067632850241544, "grad_norm": 0.24673783779144287, "learning_rate": 0.001, "loss": 1.715, "step": 522424 }, { "epoch": 45.072463768115945, "grad_norm": 5.1654253005981445, "learning_rate": 0.001, "loss": 1.7151, "step": 522480 }, { "epoch": 45.07729468599034, "grad_norm": 0.3564213514328003, "learning_rate": 0.001, "loss": 1.7228, "step": 522536 }, { "epoch": 45.08212560386473, "grad_norm": 0.2672257721424103, "learning_rate": 0.001, "loss": 1.7215, "step": 522592 }, { "epoch": 45.08695652173913, "grad_norm": 0.35722917318344116, "learning_rate": 0.001, "loss": 1.7197, "step": 522648 }, { "epoch": 45.091787439613526, "grad_norm": 1.3635164499282837, "learning_rate": 0.001, "loss": 1.7282, "step": 522704 }, { "epoch": 45.09661835748792, "grad_norm": 0.39401012659072876, "learning_rate": 0.001, "loss": 1.7375, "step": 522760 }, { "epoch": 45.10144927536232, "grad_norm": 0.26064446568489075, "learning_rate": 0.001, "loss": 1.716, "step": 522816 }, { "epoch": 45.106280193236714, "grad_norm": 0.4009648263454437, "learning_rate": 0.001, "loss": 1.7216, "step": 522872 }, { "epoch": 45.111111111111114, "grad_norm": 0.4340539574623108, "learning_rate": 0.001, "loss": 1.7306, "step": 522928 }, { "epoch": 45.11594202898551, "grad_norm": 0.7048554420471191, "learning_rate": 0.001, "loss": 1.7277, "step": 522984 }, { "epoch": 45.1207729468599, "grad_norm": 0.3423740863800049, "learning_rate": 0.001, "loss": 1.7246, "step": 523040 }, { "epoch": 45.1256038647343, "grad_norm": 0.3318486511707306, "learning_rate": 0.001, "loss": 1.7212, "step": 523096 }, { "epoch": 45.130434782608695, "grad_norm": 0.6265755295753479, "learning_rate": 0.001, "loss": 1.7222, "step": 523152 }, { "epoch": 45.13526570048309, "grad_norm": 1.302554726600647, "learning_rate": 0.001, "loss": 1.7261, "step": 523208 }, { "epoch": 45.14009661835749, "grad_norm": 0.37194523215293884, "learning_rate": 0.001, "loss": 1.7222, "step": 523264 }, { "epoch": 45.14492753623188, "grad_norm": 1.2909315824508667, "learning_rate": 0.001, "loss": 1.7373, "step": 523320 }, { "epoch": 45.14975845410628, "grad_norm": 0.7496675848960876, "learning_rate": 0.001, "loss": 1.7454, "step": 523376 }, { "epoch": 45.15458937198068, "grad_norm": 0.3917866349220276, "learning_rate": 0.001, "loss": 1.7397, "step": 523432 }, { "epoch": 45.15942028985507, "grad_norm": 0.33339712023735046, "learning_rate": 0.001, "loss": 1.7478, "step": 523488 }, { "epoch": 45.16425120772947, "grad_norm": 0.32521870732307434, "learning_rate": 0.001, "loss": 1.7441, "step": 523544 }, { "epoch": 45.169082125603865, "grad_norm": 0.2854542136192322, "learning_rate": 0.001, "loss": 1.7371, "step": 523600 }, { "epoch": 45.17391304347826, "grad_norm": 0.3545491099357605, "learning_rate": 0.001, "loss": 1.7458, "step": 523656 }, { "epoch": 45.17874396135266, "grad_norm": 2.9559524059295654, "learning_rate": 0.001, "loss": 1.7522, "step": 523712 }, { "epoch": 45.18357487922705, "grad_norm": 0.3445627987384796, "learning_rate": 0.001, "loss": 1.7375, "step": 523768 }, { "epoch": 45.18840579710145, "grad_norm": 0.6340352296829224, "learning_rate": 0.001, "loss": 1.7369, "step": 523824 }, { "epoch": 45.193236714975846, "grad_norm": 1.5123846530914307, "learning_rate": 0.001, "loss": 1.7268, "step": 523880 }, { "epoch": 45.19806763285024, "grad_norm": 0.33506837487220764, "learning_rate": 0.001, "loss": 1.73, "step": 523936 }, { "epoch": 45.20289855072464, "grad_norm": 10.253279685974121, "learning_rate": 0.001, "loss": 1.7378, "step": 523992 }, { "epoch": 45.207729468599034, "grad_norm": 0.45385637879371643, "learning_rate": 0.001, "loss": 1.7486, "step": 524048 }, { "epoch": 45.21256038647343, "grad_norm": 5.902604103088379, "learning_rate": 0.001, "loss": 1.7363, "step": 524104 }, { "epoch": 45.21739130434783, "grad_norm": 5.08174991607666, "learning_rate": 0.001, "loss": 1.819, "step": 524160 }, { "epoch": 45.22222222222222, "grad_norm": 0.6695230007171631, "learning_rate": 0.001, "loss": 1.8925, "step": 524216 }, { "epoch": 45.227053140096615, "grad_norm": 0.6429935693740845, "learning_rate": 0.001, "loss": 1.8022, "step": 524272 }, { "epoch": 45.231884057971016, "grad_norm": 0.25663134455680847, "learning_rate": 0.001, "loss": 1.7548, "step": 524328 }, { "epoch": 45.23671497584541, "grad_norm": 0.5481548309326172, "learning_rate": 0.001, "loss": 1.7535, "step": 524384 }, { "epoch": 45.24154589371981, "grad_norm": 0.2849384844303131, "learning_rate": 0.001, "loss": 1.7562, "step": 524440 }, { "epoch": 45.2463768115942, "grad_norm": 0.37499096989631653, "learning_rate": 0.001, "loss": 1.7626, "step": 524496 }, { "epoch": 45.2512077294686, "grad_norm": 2.353937864303589, "learning_rate": 0.001, "loss": 1.7944, "step": 524552 }, { "epoch": 45.256038647343, "grad_norm": 3.2369601726531982, "learning_rate": 0.001, "loss": 1.7825, "step": 524608 }, { "epoch": 45.26086956521739, "grad_norm": 0.302329957485199, "learning_rate": 0.001, "loss": 1.7773, "step": 524664 }, { "epoch": 45.265700483091784, "grad_norm": 0.23675577342510223, "learning_rate": 0.001, "loss": 1.7471, "step": 524720 }, { "epoch": 45.270531400966185, "grad_norm": 0.4364728331565857, "learning_rate": 0.001, "loss": 1.743, "step": 524776 }, { "epoch": 45.27536231884058, "grad_norm": 0.33125922083854675, "learning_rate": 0.001, "loss": 1.743, "step": 524832 }, { "epoch": 45.28019323671498, "grad_norm": 0.3779846727848053, "learning_rate": 0.001, "loss": 1.7355, "step": 524888 }, { "epoch": 45.28502415458937, "grad_norm": 0.40882301330566406, "learning_rate": 0.001, "loss": 1.7306, "step": 524944 }, { "epoch": 45.289855072463766, "grad_norm": 2.2268364429473877, "learning_rate": 0.001, "loss": 1.7389, "step": 525000 }, { "epoch": 45.29468599033817, "grad_norm": 1.0919376611709595, "learning_rate": 0.001, "loss": 1.7353, "step": 525056 }, { "epoch": 45.29951690821256, "grad_norm": 0.5641513466835022, "learning_rate": 0.001, "loss": 1.741, "step": 525112 }, { "epoch": 45.30434782608695, "grad_norm": 14.438705444335938, "learning_rate": 0.001, "loss": 1.7687, "step": 525168 }, { "epoch": 45.309178743961354, "grad_norm": 2.010845899581909, "learning_rate": 0.001, "loss": 1.785, "step": 525224 }, { "epoch": 45.31400966183575, "grad_norm": 0.4663834273815155, "learning_rate": 0.001, "loss": 1.7641, "step": 525280 }, { "epoch": 45.31884057971015, "grad_norm": 7.143008708953857, "learning_rate": 0.001, "loss": 1.7597, "step": 525336 }, { "epoch": 45.32367149758454, "grad_norm": 0.46525809168815613, "learning_rate": 0.001, "loss": 1.8088, "step": 525392 }, { "epoch": 45.328502415458935, "grad_norm": 16.268165588378906, "learning_rate": 0.001, "loss": 1.8063, "step": 525448 }, { "epoch": 45.333333333333336, "grad_norm": 0.48218870162963867, "learning_rate": 0.001, "loss": 1.786, "step": 525504 }, { "epoch": 45.33816425120773, "grad_norm": 0.47329452633857727, "learning_rate": 0.001, "loss": 1.7701, "step": 525560 }, { "epoch": 45.34299516908212, "grad_norm": 0.2616460919380188, "learning_rate": 0.001, "loss": 1.7773, "step": 525616 }, { "epoch": 45.34782608695652, "grad_norm": 1.672616720199585, "learning_rate": 0.001, "loss": 1.7675, "step": 525672 }, { "epoch": 45.35265700483092, "grad_norm": 1.5290451049804688, "learning_rate": 0.001, "loss": 1.7564, "step": 525728 }, { "epoch": 45.35748792270532, "grad_norm": 0.3674416244029999, "learning_rate": 0.001, "loss": 1.7572, "step": 525784 }, { "epoch": 45.36231884057971, "grad_norm": 0.4735299348831177, "learning_rate": 0.001, "loss": 1.7532, "step": 525840 }, { "epoch": 45.367149758454104, "grad_norm": 0.24400125443935394, "learning_rate": 0.001, "loss": 1.7524, "step": 525896 }, { "epoch": 45.371980676328505, "grad_norm": 9.190494537353516, "learning_rate": 0.001, "loss": 1.7554, "step": 525952 }, { "epoch": 45.3768115942029, "grad_norm": 1.2265187501907349, "learning_rate": 0.001, "loss": 1.7475, "step": 526008 }, { "epoch": 45.38164251207729, "grad_norm": 6.495782375335693, "learning_rate": 0.001, "loss": 1.7498, "step": 526064 }, { "epoch": 45.38647342995169, "grad_norm": 0.25140345096588135, "learning_rate": 0.001, "loss": 1.7492, "step": 526120 }, { "epoch": 45.391304347826086, "grad_norm": 0.38593724370002747, "learning_rate": 0.001, "loss": 1.7638, "step": 526176 }, { "epoch": 45.39613526570048, "grad_norm": 0.2932702600955963, "learning_rate": 0.001, "loss": 1.7672, "step": 526232 }, { "epoch": 45.40096618357488, "grad_norm": 1.4268132448196411, "learning_rate": 0.001, "loss": 1.7651, "step": 526288 }, { "epoch": 45.405797101449274, "grad_norm": 0.7168495655059814, "learning_rate": 0.001, "loss": 1.7389, "step": 526344 }, { "epoch": 45.410628019323674, "grad_norm": 0.2927975356578827, "learning_rate": 0.001, "loss": 1.7399, "step": 526400 }, { "epoch": 45.41545893719807, "grad_norm": 2.0651614665985107, "learning_rate": 0.001, "loss": 1.7417, "step": 526456 }, { "epoch": 45.42028985507246, "grad_norm": 1.1292849779129028, "learning_rate": 0.001, "loss": 1.7335, "step": 526512 }, { "epoch": 45.42512077294686, "grad_norm": 0.3622138500213623, "learning_rate": 0.001, "loss": 1.7491, "step": 526568 }, { "epoch": 45.429951690821255, "grad_norm": 0.43543022871017456, "learning_rate": 0.001, "loss": 1.7442, "step": 526624 }, { "epoch": 45.43478260869565, "grad_norm": 0.3022296130657196, "learning_rate": 0.001, "loss": 1.7427, "step": 526680 }, { "epoch": 45.43961352657005, "grad_norm": 0.2741853892803192, "learning_rate": 0.001, "loss": 1.737, "step": 526736 }, { "epoch": 45.44444444444444, "grad_norm": 0.30984988808631897, "learning_rate": 0.001, "loss": 1.7353, "step": 526792 }, { "epoch": 45.44927536231884, "grad_norm": 0.3662015497684479, "learning_rate": 0.001, "loss": 1.7413, "step": 526848 }, { "epoch": 45.45410628019324, "grad_norm": 0.3618333637714386, "learning_rate": 0.001, "loss": 1.7421, "step": 526904 }, { "epoch": 45.45893719806763, "grad_norm": 0.36521196365356445, "learning_rate": 0.001, "loss": 1.7418, "step": 526960 }, { "epoch": 45.46376811594203, "grad_norm": 0.7569295763969421, "learning_rate": 0.001, "loss": 1.7457, "step": 527016 }, { "epoch": 45.468599033816425, "grad_norm": 0.32151591777801514, "learning_rate": 0.001, "loss": 1.742, "step": 527072 }, { "epoch": 45.47342995169082, "grad_norm": 0.4429345726966858, "learning_rate": 0.001, "loss": 1.7354, "step": 527128 }, { "epoch": 45.47826086956522, "grad_norm": 0.26373571157455444, "learning_rate": 0.001, "loss": 1.7371, "step": 527184 }, { "epoch": 45.48309178743961, "grad_norm": 0.3029117286205292, "learning_rate": 0.001, "loss": 1.7371, "step": 527240 }, { "epoch": 45.48792270531401, "grad_norm": 1.0480002164840698, "learning_rate": 0.001, "loss": 1.7337, "step": 527296 }, { "epoch": 45.492753623188406, "grad_norm": 0.30854931473731995, "learning_rate": 0.001, "loss": 1.7365, "step": 527352 }, { "epoch": 45.4975845410628, "grad_norm": 0.3456036448478699, "learning_rate": 0.001, "loss": 1.7438, "step": 527408 }, { "epoch": 45.5024154589372, "grad_norm": 0.30500760674476624, "learning_rate": 0.001, "loss": 1.7403, "step": 527464 }, { "epoch": 45.507246376811594, "grad_norm": 0.26798027753829956, "learning_rate": 0.001, "loss": 1.7391, "step": 527520 }, { "epoch": 45.51207729468599, "grad_norm": 0.3296271860599518, "learning_rate": 0.001, "loss": 1.7553, "step": 527576 }, { "epoch": 45.51690821256039, "grad_norm": 0.3124648332595825, "learning_rate": 0.001, "loss": 1.7399, "step": 527632 }, { "epoch": 45.52173913043478, "grad_norm": 0.36314842104911804, "learning_rate": 0.001, "loss": 1.7394, "step": 527688 }, { "epoch": 45.52657004830918, "grad_norm": 0.7436350584030151, "learning_rate": 0.001, "loss": 1.7322, "step": 527744 }, { "epoch": 45.531400966183575, "grad_norm": 7.111041069030762, "learning_rate": 0.001, "loss": 1.7308, "step": 527800 }, { "epoch": 45.53623188405797, "grad_norm": 0.7580754160881042, "learning_rate": 0.001, "loss": 1.7389, "step": 527856 }, { "epoch": 45.54106280193237, "grad_norm": 4.935938358306885, "learning_rate": 0.001, "loss": 1.7477, "step": 527912 }, { "epoch": 45.54589371980676, "grad_norm": 0.4116394817829132, "learning_rate": 0.001, "loss": 1.7557, "step": 527968 }, { "epoch": 45.55072463768116, "grad_norm": 0.28215518593788147, "learning_rate": 0.001, "loss": 1.7843, "step": 528024 }, { "epoch": 45.55555555555556, "grad_norm": 2.5026497840881348, "learning_rate": 0.001, "loss": 1.7833, "step": 528080 }, { "epoch": 45.56038647342995, "grad_norm": 3.8257017135620117, "learning_rate": 0.001, "loss": 1.8122, "step": 528136 }, { "epoch": 45.56521739130435, "grad_norm": 8.211630821228027, "learning_rate": 0.001, "loss": 1.8626, "step": 528192 }, { "epoch": 45.570048309178745, "grad_norm": 7.574403285980225, "learning_rate": 0.001, "loss": 1.8375, "step": 528248 }, { "epoch": 45.57487922705314, "grad_norm": 3.6380040645599365, "learning_rate": 0.001, "loss": 1.802, "step": 528304 }, { "epoch": 45.57971014492754, "grad_norm": 0.7546054720878601, "learning_rate": 0.001, "loss": 1.7798, "step": 528360 }, { "epoch": 45.58454106280193, "grad_norm": 0.8628254532814026, "learning_rate": 0.001, "loss": 1.7794, "step": 528416 }, { "epoch": 45.589371980676326, "grad_norm": 3.9354124069213867, "learning_rate": 0.001, "loss": 1.7819, "step": 528472 }, { "epoch": 45.594202898550726, "grad_norm": 0.7690363526344299, "learning_rate": 0.001, "loss": 1.7771, "step": 528528 }, { "epoch": 45.59903381642512, "grad_norm": 10.157276153564453, "learning_rate": 0.001, "loss": 1.7866, "step": 528584 }, { "epoch": 45.60386473429952, "grad_norm": 0.6456630825996399, "learning_rate": 0.001, "loss": 1.7698, "step": 528640 }, { "epoch": 45.608695652173914, "grad_norm": 0.3934164345264435, "learning_rate": 0.001, "loss": 1.7736, "step": 528696 }, { "epoch": 45.61352657004831, "grad_norm": 0.3815837502479553, "learning_rate": 0.001, "loss": 1.7799, "step": 528752 }, { "epoch": 45.61835748792271, "grad_norm": 3.295544385910034, "learning_rate": 0.001, "loss": 1.7709, "step": 528808 }, { "epoch": 45.6231884057971, "grad_norm": 1.811916708946228, "learning_rate": 0.001, "loss": 1.7739, "step": 528864 }, { "epoch": 45.628019323671495, "grad_norm": 0.625079333782196, "learning_rate": 0.001, "loss": 1.8083, "step": 528920 }, { "epoch": 45.632850241545896, "grad_norm": 0.39647549390792847, "learning_rate": 0.001, "loss": 1.7836, "step": 528976 }, { "epoch": 45.63768115942029, "grad_norm": 1.930590271949768, "learning_rate": 0.001, "loss": 1.7692, "step": 529032 }, { "epoch": 45.64251207729468, "grad_norm": 0.9271938800811768, "learning_rate": 0.001, "loss": 1.7714, "step": 529088 }, { "epoch": 45.64734299516908, "grad_norm": 0.2554655373096466, "learning_rate": 0.001, "loss": 1.7741, "step": 529144 }, { "epoch": 45.65217391304348, "grad_norm": 0.6618444919586182, "learning_rate": 0.001, "loss": 1.7716, "step": 529200 }, { "epoch": 45.65700483091788, "grad_norm": 1.9374746084213257, "learning_rate": 0.001, "loss": 1.7678, "step": 529256 }, { "epoch": 45.66183574879227, "grad_norm": 0.2606915533542633, "learning_rate": 0.001, "loss": 1.7545, "step": 529312 }, { "epoch": 45.666666666666664, "grad_norm": 0.8805327415466309, "learning_rate": 0.001, "loss": 1.7641, "step": 529368 }, { "epoch": 45.671497584541065, "grad_norm": 3.211629629135132, "learning_rate": 0.001, "loss": 1.7737, "step": 529424 }, { "epoch": 45.67632850241546, "grad_norm": 3.8423213958740234, "learning_rate": 0.001, "loss": 1.7681, "step": 529480 }, { "epoch": 45.68115942028985, "grad_norm": 0.24217258393764496, "learning_rate": 0.001, "loss": 1.7757, "step": 529536 }, { "epoch": 45.68599033816425, "grad_norm": 0.3422798812389374, "learning_rate": 0.001, "loss": 1.7692, "step": 529592 }, { "epoch": 45.690821256038646, "grad_norm": 5.129047393798828, "learning_rate": 0.001, "loss": 1.7711, "step": 529648 }, { "epoch": 45.69565217391305, "grad_norm": 6.970626354217529, "learning_rate": 0.001, "loss": 1.767, "step": 529704 }, { "epoch": 45.70048309178744, "grad_norm": 5.012514591217041, "learning_rate": 0.001, "loss": 1.7791, "step": 529760 }, { "epoch": 45.70531400966183, "grad_norm": 2.616478681564331, "learning_rate": 0.001, "loss": 1.769, "step": 529816 }, { "epoch": 45.710144927536234, "grad_norm": 2.122823715209961, "learning_rate": 0.001, "loss": 1.7714, "step": 529872 }, { "epoch": 45.71497584541063, "grad_norm": 0.5691376328468323, "learning_rate": 0.001, "loss": 1.7876, "step": 529928 }, { "epoch": 45.71980676328502, "grad_norm": 4.813388824462891, "learning_rate": 0.001, "loss": 1.7888, "step": 529984 }, { "epoch": 45.72463768115942, "grad_norm": 0.5019924640655518, "learning_rate": 0.001, "loss": 1.7923, "step": 530040 }, { "epoch": 45.729468599033815, "grad_norm": 0.3909951448440552, "learning_rate": 0.001, "loss": 1.7851, "step": 530096 }, { "epoch": 45.734299516908216, "grad_norm": 9.063587188720703, "learning_rate": 0.001, "loss": 1.7759, "step": 530152 }, { "epoch": 45.73913043478261, "grad_norm": 0.250916063785553, "learning_rate": 0.001, "loss": 1.7784, "step": 530208 }, { "epoch": 45.743961352657, "grad_norm": 1.456973671913147, "learning_rate": 0.001, "loss": 1.7824, "step": 530264 }, { "epoch": 45.7487922705314, "grad_norm": 0.32225966453552246, "learning_rate": 0.001, "loss": 1.7797, "step": 530320 }, { "epoch": 45.7536231884058, "grad_norm": 1.5743563175201416, "learning_rate": 0.001, "loss": 1.7767, "step": 530376 }, { "epoch": 45.75845410628019, "grad_norm": 2.4193952083587646, "learning_rate": 0.001, "loss": 1.7769, "step": 530432 }, { "epoch": 45.76328502415459, "grad_norm": 5.2086920738220215, "learning_rate": 0.001, "loss": 1.7673, "step": 530488 }, { "epoch": 45.768115942028984, "grad_norm": 0.6869384050369263, "learning_rate": 0.001, "loss": 1.773, "step": 530544 }, { "epoch": 45.772946859903385, "grad_norm": 0.5429085493087769, "learning_rate": 0.001, "loss": 1.7759, "step": 530600 }, { "epoch": 45.77777777777778, "grad_norm": 0.2612639367580414, "learning_rate": 0.001, "loss": 1.7753, "step": 530656 }, { "epoch": 45.78260869565217, "grad_norm": 1.1171563863754272, "learning_rate": 0.001, "loss": 1.7655, "step": 530712 }, { "epoch": 45.78743961352657, "grad_norm": 1.7402960062026978, "learning_rate": 0.001, "loss": 1.7672, "step": 530768 }, { "epoch": 45.792270531400966, "grad_norm": 0.26652708649635315, "learning_rate": 0.001, "loss": 1.7707, "step": 530824 }, { "epoch": 45.79710144927536, "grad_norm": 0.4806079566478729, "learning_rate": 0.001, "loss": 1.7691, "step": 530880 }, { "epoch": 45.80193236714976, "grad_norm": 0.5488345623016357, "learning_rate": 0.001, "loss": 1.7636, "step": 530936 }, { "epoch": 45.806763285024154, "grad_norm": 0.3176897466182709, "learning_rate": 0.001, "loss": 1.761, "step": 530992 }, { "epoch": 45.81159420289855, "grad_norm": 0.32820042967796326, "learning_rate": 0.001, "loss": 1.7589, "step": 531048 }, { "epoch": 45.81642512077295, "grad_norm": 0.3281714618206024, "learning_rate": 0.001, "loss": 1.7723, "step": 531104 }, { "epoch": 45.82125603864734, "grad_norm": 0.31178396940231323, "learning_rate": 0.001, "loss": 1.7651, "step": 531160 }, { "epoch": 45.82608695652174, "grad_norm": 0.8093405961990356, "learning_rate": 0.001, "loss": 1.7573, "step": 531216 }, { "epoch": 45.830917874396135, "grad_norm": 0.5577892661094666, "learning_rate": 0.001, "loss": 1.7603, "step": 531272 }, { "epoch": 45.83574879227053, "grad_norm": 0.5364841222763062, "learning_rate": 0.001, "loss": 1.7479, "step": 531328 }, { "epoch": 45.84057971014493, "grad_norm": 1.711414098739624, "learning_rate": 0.001, "loss": 1.7619, "step": 531384 }, { "epoch": 45.84541062801932, "grad_norm": 0.2967108488082886, "learning_rate": 0.001, "loss": 1.7564, "step": 531440 }, { "epoch": 45.85024154589372, "grad_norm": 0.27044159173965454, "learning_rate": 0.001, "loss": 1.763, "step": 531496 }, { "epoch": 45.85507246376812, "grad_norm": 0.4724573791027069, "learning_rate": 0.001, "loss": 1.7609, "step": 531552 }, { "epoch": 45.85990338164251, "grad_norm": 0.2533262073993683, "learning_rate": 0.001, "loss": 1.771, "step": 531608 }, { "epoch": 45.86473429951691, "grad_norm": 0.3292151987552643, "learning_rate": 0.001, "loss": 1.7606, "step": 531664 }, { "epoch": 45.869565217391305, "grad_norm": 1.7710400819778442, "learning_rate": 0.001, "loss": 1.7592, "step": 531720 }, { "epoch": 45.8743961352657, "grad_norm": 0.3514461815357208, "learning_rate": 0.001, "loss": 1.7612, "step": 531776 }, { "epoch": 45.8792270531401, "grad_norm": 2.956660747528076, "learning_rate": 0.001, "loss": 1.7516, "step": 531832 }, { "epoch": 45.88405797101449, "grad_norm": 2.4839417934417725, "learning_rate": 0.001, "loss": 1.7552, "step": 531888 }, { "epoch": 45.888888888888886, "grad_norm": 0.3897237181663513, "learning_rate": 0.001, "loss": 1.7531, "step": 531944 }, { "epoch": 45.893719806763286, "grad_norm": 0.35215574502944946, "learning_rate": 0.001, "loss": 1.7659, "step": 532000 }, { "epoch": 45.89855072463768, "grad_norm": 1.2172863483428955, "learning_rate": 0.001, "loss": 1.764, "step": 532056 }, { "epoch": 45.90338164251208, "grad_norm": 0.3998480439186096, "learning_rate": 0.001, "loss": 1.7611, "step": 532112 }, { "epoch": 45.908212560386474, "grad_norm": 1.906186580657959, "learning_rate": 0.001, "loss": 1.7497, "step": 532168 }, { "epoch": 45.91304347826087, "grad_norm": 0.7061434388160706, "learning_rate": 0.001, "loss": 1.7451, "step": 532224 }, { "epoch": 45.91787439613527, "grad_norm": 3.7805590629577637, "learning_rate": 0.001, "loss": 1.7538, "step": 532280 }, { "epoch": 45.92270531400966, "grad_norm": 0.27729344367980957, "learning_rate": 0.001, "loss": 1.7497, "step": 532336 }, { "epoch": 45.927536231884055, "grad_norm": 1.9163427352905273, "learning_rate": 0.001, "loss": 1.7518, "step": 532392 }, { "epoch": 45.932367149758456, "grad_norm": 0.3306645154953003, "learning_rate": 0.001, "loss": 1.7557, "step": 532448 }, { "epoch": 45.93719806763285, "grad_norm": 0.38886746764183044, "learning_rate": 0.001, "loss": 1.7504, "step": 532504 }, { "epoch": 45.94202898550725, "grad_norm": 0.27934107184410095, "learning_rate": 0.001, "loss": 1.745, "step": 532560 }, { "epoch": 45.94685990338164, "grad_norm": 0.2990480661392212, "learning_rate": 0.001, "loss": 1.7437, "step": 532616 }, { "epoch": 45.95169082125604, "grad_norm": 0.3589654862880707, "learning_rate": 0.001, "loss": 1.744, "step": 532672 }, { "epoch": 45.95652173913044, "grad_norm": 0.3306331932544708, "learning_rate": 0.001, "loss": 1.7476, "step": 532728 }, { "epoch": 45.96135265700483, "grad_norm": 0.6184709072113037, "learning_rate": 0.001, "loss": 1.7417, "step": 532784 }, { "epoch": 45.966183574879224, "grad_norm": 0.4644886553287506, "learning_rate": 0.001, "loss": 1.7504, "step": 532840 }, { "epoch": 45.971014492753625, "grad_norm": 0.6293536424636841, "learning_rate": 0.001, "loss": 1.7417, "step": 532896 }, { "epoch": 45.97584541062802, "grad_norm": 1.7369558811187744, "learning_rate": 0.001, "loss": 1.7451, "step": 532952 }, { "epoch": 45.98067632850242, "grad_norm": 2.3743200302124023, "learning_rate": 0.001, "loss": 1.759, "step": 533008 }, { "epoch": 45.98550724637681, "grad_norm": 6.140804767608643, "learning_rate": 0.001, "loss": 1.7624, "step": 533064 }, { "epoch": 45.990338164251206, "grad_norm": 0.9559650421142578, "learning_rate": 0.001, "loss": 1.7587, "step": 533120 }, { "epoch": 45.99516908212561, "grad_norm": 0.6048048734664917, "learning_rate": 0.001, "loss": 1.7609, "step": 533176 }, { "epoch": 46.0, "grad_norm": 3.2695298194885254, "learning_rate": 0.001, "loss": 1.7532, "step": 533232 }, { "epoch": 46.00483091787439, "grad_norm": 0.417667955160141, "learning_rate": 0.001, "loss": 1.7176, "step": 533288 }, { "epoch": 46.009661835748794, "grad_norm": 0.32400092482566833, "learning_rate": 0.001, "loss": 1.7262, "step": 533344 }, { "epoch": 46.01449275362319, "grad_norm": 0.33220767974853516, "learning_rate": 0.001, "loss": 1.7115, "step": 533400 }, { "epoch": 46.01932367149758, "grad_norm": 0.6974681615829468, "learning_rate": 0.001, "loss": 1.7153, "step": 533456 }, { "epoch": 46.02415458937198, "grad_norm": 0.2924492657184601, "learning_rate": 0.001, "loss": 1.7127, "step": 533512 }, { "epoch": 46.028985507246375, "grad_norm": 0.8584845066070557, "learning_rate": 0.001, "loss": 1.7162, "step": 533568 }, { "epoch": 46.033816425120776, "grad_norm": 0.3093520402908325, "learning_rate": 0.001, "loss": 1.7066, "step": 533624 }, { "epoch": 46.03864734299517, "grad_norm": 0.7262521982192993, "learning_rate": 0.001, "loss": 1.7073, "step": 533680 }, { "epoch": 46.04347826086956, "grad_norm": 0.24096333980560303, "learning_rate": 0.001, "loss": 1.7105, "step": 533736 }, { "epoch": 46.04830917874396, "grad_norm": 0.8534532785415649, "learning_rate": 0.001, "loss": 1.7149, "step": 533792 }, { "epoch": 46.05314009661836, "grad_norm": 0.29234328866004944, "learning_rate": 0.001, "loss": 1.7089, "step": 533848 }, { "epoch": 46.05797101449275, "grad_norm": 0.46962398290634155, "learning_rate": 0.001, "loss": 1.7069, "step": 533904 }, { "epoch": 46.06280193236715, "grad_norm": 0.5291991233825684, "learning_rate": 0.001, "loss": 1.7037, "step": 533960 }, { "epoch": 46.067632850241544, "grad_norm": 0.4565190374851227, "learning_rate": 0.001, "loss": 1.7075, "step": 534016 }, { "epoch": 46.072463768115945, "grad_norm": 1.0419261455535889, "learning_rate": 0.001, "loss": 1.711, "step": 534072 }, { "epoch": 46.07729468599034, "grad_norm": 1.149330973625183, "learning_rate": 0.001, "loss": 1.7098, "step": 534128 }, { "epoch": 46.08212560386473, "grad_norm": 1.413597822189331, "learning_rate": 0.001, "loss": 1.7178, "step": 534184 }, { "epoch": 46.08695652173913, "grad_norm": 0.6423186659812927, "learning_rate": 0.001, "loss": 1.7172, "step": 534240 }, { "epoch": 46.091787439613526, "grad_norm": 2.29268741607666, "learning_rate": 0.001, "loss": 1.7175, "step": 534296 }, { "epoch": 46.09661835748792, "grad_norm": 1.1235376596450806, "learning_rate": 0.001, "loss": 1.7174, "step": 534352 }, { "epoch": 46.10144927536232, "grad_norm": 0.26134398579597473, "learning_rate": 0.001, "loss": 1.7134, "step": 534408 }, { "epoch": 46.106280193236714, "grad_norm": 16.597261428833008, "learning_rate": 0.001, "loss": 1.708, "step": 534464 }, { "epoch": 46.111111111111114, "grad_norm": 0.8168239593505859, "learning_rate": 0.001, "loss": 1.7133, "step": 534520 }, { "epoch": 46.11594202898551, "grad_norm": 0.33824869990348816, "learning_rate": 0.001, "loss": 1.718, "step": 534576 }, { "epoch": 46.1207729468599, "grad_norm": 0.4143065810203552, "learning_rate": 0.001, "loss": 1.7271, "step": 534632 }, { "epoch": 46.1256038647343, "grad_norm": 0.39663827419281006, "learning_rate": 0.001, "loss": 1.7172, "step": 534688 }, { "epoch": 46.130434782608695, "grad_norm": 0.7273496389389038, "learning_rate": 0.001, "loss": 1.7076, "step": 534744 }, { "epoch": 46.13526570048309, "grad_norm": 0.3638724982738495, "learning_rate": 0.001, "loss": 1.7195, "step": 534800 }, { "epoch": 46.14009661835749, "grad_norm": 0.2951245605945587, "learning_rate": 0.001, "loss": 1.7237, "step": 534856 }, { "epoch": 46.14492753623188, "grad_norm": 0.9037827849388123, "learning_rate": 0.001, "loss": 1.7212, "step": 534912 }, { "epoch": 46.14975845410628, "grad_norm": 0.3330898582935333, "learning_rate": 0.001, "loss": 1.7279, "step": 534968 }, { "epoch": 46.15458937198068, "grad_norm": 0.39309096336364746, "learning_rate": 0.001, "loss": 1.7221, "step": 535024 }, { "epoch": 46.15942028985507, "grad_norm": 0.3493063151836395, "learning_rate": 0.001, "loss": 1.7194, "step": 535080 }, { "epoch": 46.16425120772947, "grad_norm": 0.6391283273696899, "learning_rate": 0.001, "loss": 1.7174, "step": 535136 }, { "epoch": 46.169082125603865, "grad_norm": 0.7131687998771667, "learning_rate": 0.001, "loss": 1.7149, "step": 535192 }, { "epoch": 46.17391304347826, "grad_norm": 1.149593710899353, "learning_rate": 0.001, "loss": 1.7336, "step": 535248 }, { "epoch": 46.17874396135266, "grad_norm": 0.43514567613601685, "learning_rate": 0.001, "loss": 1.742, "step": 535304 }, { "epoch": 46.18357487922705, "grad_norm": 1.86967933177948, "learning_rate": 0.001, "loss": 1.7353, "step": 535360 }, { "epoch": 46.18840579710145, "grad_norm": 1.0727717876434326, "learning_rate": 0.001, "loss": 1.7352, "step": 535416 }, { "epoch": 46.193236714975846, "grad_norm": 0.3126072585582733, "learning_rate": 0.001, "loss": 1.7327, "step": 535472 }, { "epoch": 46.19806763285024, "grad_norm": 0.35572314262390137, "learning_rate": 0.001, "loss": 1.7189, "step": 535528 }, { "epoch": 46.20289855072464, "grad_norm": 1.61357843875885, "learning_rate": 0.001, "loss": 1.7172, "step": 535584 }, { "epoch": 46.207729468599034, "grad_norm": 0.6157678365707397, "learning_rate": 0.001, "loss": 1.73, "step": 535640 }, { "epoch": 46.21256038647343, "grad_norm": 0.4308922290802002, "learning_rate": 0.001, "loss": 1.7231, "step": 535696 }, { "epoch": 46.21739130434783, "grad_norm": 1.9200184345245361, "learning_rate": 0.001, "loss": 1.7377, "step": 535752 }, { "epoch": 46.22222222222222, "grad_norm": 2.3621315956115723, "learning_rate": 0.001, "loss": 1.749, "step": 535808 }, { "epoch": 46.227053140096615, "grad_norm": 1.0110433101654053, "learning_rate": 0.001, "loss": 1.7473, "step": 535864 }, { "epoch": 46.231884057971016, "grad_norm": 1.275486946105957, "learning_rate": 0.001, "loss": 1.7452, "step": 535920 }, { "epoch": 46.23671497584541, "grad_norm": 0.5857816338539124, "learning_rate": 0.001, "loss": 1.7395, "step": 535976 }, { "epoch": 46.24154589371981, "grad_norm": 0.9050594568252563, "learning_rate": 0.001, "loss": 1.7308, "step": 536032 }, { "epoch": 46.2463768115942, "grad_norm": 0.35458317399024963, "learning_rate": 0.001, "loss": 1.7363, "step": 536088 }, { "epoch": 46.2512077294686, "grad_norm": 0.43981805443763733, "learning_rate": 0.001, "loss": 1.7341, "step": 536144 }, { "epoch": 46.256038647343, "grad_norm": 0.31950825452804565, "learning_rate": 0.001, "loss": 1.7346, "step": 536200 }, { "epoch": 46.26086956521739, "grad_norm": 0.30714520812034607, "learning_rate": 0.001, "loss": 1.7341, "step": 536256 }, { "epoch": 46.265700483091784, "grad_norm": 0.32240891456604004, "learning_rate": 0.001, "loss": 1.7293, "step": 536312 }, { "epoch": 46.270531400966185, "grad_norm": 0.5001716613769531, "learning_rate": 0.001, "loss": 1.7281, "step": 536368 }, { "epoch": 46.27536231884058, "grad_norm": 0.32608523964881897, "learning_rate": 0.001, "loss": 1.7398, "step": 536424 }, { "epoch": 46.28019323671498, "grad_norm": 0.35022953152656555, "learning_rate": 0.001, "loss": 1.7329, "step": 536480 }, { "epoch": 46.28502415458937, "grad_norm": 0.6778161525726318, "learning_rate": 0.001, "loss": 1.7277, "step": 536536 }, { "epoch": 46.289855072463766, "grad_norm": 0.3277319669723511, "learning_rate": 0.001, "loss": 1.7306, "step": 536592 }, { "epoch": 46.29468599033817, "grad_norm": 0.32074132561683655, "learning_rate": 0.001, "loss": 1.7179, "step": 536648 }, { "epoch": 46.29951690821256, "grad_norm": 0.3303510248661041, "learning_rate": 0.001, "loss": 1.7171, "step": 536704 }, { "epoch": 46.30434782608695, "grad_norm": 0.2950139045715332, "learning_rate": 0.001, "loss": 1.7258, "step": 536760 }, { "epoch": 46.309178743961354, "grad_norm": 0.35521501302719116, "learning_rate": 0.001, "loss": 1.718, "step": 536816 }, { "epoch": 46.31400966183575, "grad_norm": 0.5644594430923462, "learning_rate": 0.001, "loss": 1.7132, "step": 536872 }, { "epoch": 46.31884057971015, "grad_norm": 12.377921104431152, "learning_rate": 0.001, "loss": 1.7156, "step": 536928 }, { "epoch": 46.32367149758454, "grad_norm": 0.4371606111526489, "learning_rate": 0.001, "loss": 1.7214, "step": 536984 }, { "epoch": 46.328502415458935, "grad_norm": 0.3676202595233917, "learning_rate": 0.001, "loss": 1.7313, "step": 537040 }, { "epoch": 46.333333333333336, "grad_norm": 0.43548282980918884, "learning_rate": 0.001, "loss": 1.7198, "step": 537096 }, { "epoch": 46.33816425120773, "grad_norm": 0.9101641178131104, "learning_rate": 0.001, "loss": 1.7228, "step": 537152 }, { "epoch": 46.34299516908212, "grad_norm": 0.4429427981376648, "learning_rate": 0.001, "loss": 1.7182, "step": 537208 }, { "epoch": 46.34782608695652, "grad_norm": 0.26476481556892395, "learning_rate": 0.001, "loss": 1.7208, "step": 537264 }, { "epoch": 46.35265700483092, "grad_norm": 0.43423348665237427, "learning_rate": 0.001, "loss": 1.7369, "step": 537320 }, { "epoch": 46.35748792270532, "grad_norm": 0.5661461353302002, "learning_rate": 0.001, "loss": 1.7317, "step": 537376 }, { "epoch": 46.36231884057971, "grad_norm": 0.9679610729217529, "learning_rate": 0.001, "loss": 1.7426, "step": 537432 }, { "epoch": 46.367149758454104, "grad_norm": 0.4791892170906067, "learning_rate": 0.001, "loss": 1.7547, "step": 537488 }, { "epoch": 46.371980676328505, "grad_norm": 0.9721389412879944, "learning_rate": 0.001, "loss": 1.7462, "step": 537544 }, { "epoch": 46.3768115942029, "grad_norm": 0.45305702090263367, "learning_rate": 0.001, "loss": 1.739, "step": 537600 }, { "epoch": 46.38164251207729, "grad_norm": 1.2942910194396973, "learning_rate": 0.001, "loss": 1.7451, "step": 537656 }, { "epoch": 46.38647342995169, "grad_norm": 2.3137528896331787, "learning_rate": 0.001, "loss": 1.7539, "step": 537712 }, { "epoch": 46.391304347826086, "grad_norm": 2.452158212661743, "learning_rate": 0.001, "loss": 1.7542, "step": 537768 }, { "epoch": 46.39613526570048, "grad_norm": 0.3273489475250244, "learning_rate": 0.001, "loss": 1.7699, "step": 537824 }, { "epoch": 46.40096618357488, "grad_norm": 0.4359605014324188, "learning_rate": 0.001, "loss": 1.7544, "step": 537880 }, { "epoch": 46.405797101449274, "grad_norm": 0.3248874843120575, "learning_rate": 0.001, "loss": 1.7507, "step": 537936 }, { "epoch": 46.410628019323674, "grad_norm": 0.3856216371059418, "learning_rate": 0.001, "loss": 1.7545, "step": 537992 }, { "epoch": 46.41545893719807, "grad_norm": 0.3041902184486389, "learning_rate": 0.001, "loss": 1.7463, "step": 538048 }, { "epoch": 46.42028985507246, "grad_norm": 1.364767074584961, "learning_rate": 0.001, "loss": 1.7482, "step": 538104 }, { "epoch": 46.42512077294686, "grad_norm": 2.3974130153656006, "learning_rate": 0.001, "loss": 1.7352, "step": 538160 }, { "epoch": 46.429951690821255, "grad_norm": 1.3930416107177734, "learning_rate": 0.001, "loss": 1.7404, "step": 538216 }, { "epoch": 46.43478260869565, "grad_norm": 0.34964656829833984, "learning_rate": 0.001, "loss": 1.7432, "step": 538272 }, { "epoch": 46.43961352657005, "grad_norm": 3.9416284561157227, "learning_rate": 0.001, "loss": 1.7403, "step": 538328 }, { "epoch": 46.44444444444444, "grad_norm": 0.3690131604671478, "learning_rate": 0.001, "loss": 1.7337, "step": 538384 }, { "epoch": 46.44927536231884, "grad_norm": 1.0517922639846802, "learning_rate": 0.001, "loss": 1.7437, "step": 538440 }, { "epoch": 46.45410628019324, "grad_norm": 1.3773114681243896, "learning_rate": 0.001, "loss": 1.735, "step": 538496 }, { "epoch": 46.45893719806763, "grad_norm": 7.648189544677734, "learning_rate": 0.001, "loss": 1.7393, "step": 538552 }, { "epoch": 46.46376811594203, "grad_norm": 7.102615833282471, "learning_rate": 0.001, "loss": 1.7344, "step": 538608 }, { "epoch": 46.468599033816425, "grad_norm": 3.7616474628448486, "learning_rate": 0.001, "loss": 1.734, "step": 538664 }, { "epoch": 46.47342995169082, "grad_norm": 1.7518610954284668, "learning_rate": 0.001, "loss": 1.7406, "step": 538720 }, { "epoch": 46.47826086956522, "grad_norm": 0.27856943011283875, "learning_rate": 0.001, "loss": 1.7356, "step": 538776 }, { "epoch": 46.48309178743961, "grad_norm": 0.9059680700302124, "learning_rate": 0.001, "loss": 1.7402, "step": 538832 }, { "epoch": 46.48792270531401, "grad_norm": 1.0681747198104858, "learning_rate": 0.001, "loss": 1.7355, "step": 538888 }, { "epoch": 46.492753623188406, "grad_norm": 0.2725263833999634, "learning_rate": 0.001, "loss": 1.7301, "step": 538944 }, { "epoch": 46.4975845410628, "grad_norm": 0.26980385184288025, "learning_rate": 0.001, "loss": 1.7339, "step": 539000 }, { "epoch": 46.5024154589372, "grad_norm": 0.3551305830478668, "learning_rate": 0.001, "loss": 1.7256, "step": 539056 }, { "epoch": 46.507246376811594, "grad_norm": 0.386801153421402, "learning_rate": 0.001, "loss": 1.7281, "step": 539112 }, { "epoch": 46.51207729468599, "grad_norm": 0.39928942918777466, "learning_rate": 0.001, "loss": 1.7285, "step": 539168 }, { "epoch": 46.51690821256039, "grad_norm": 0.786837637424469, "learning_rate": 0.001, "loss": 1.7275, "step": 539224 }, { "epoch": 46.52173913043478, "grad_norm": 0.3427911102771759, "learning_rate": 0.001, "loss": 1.7254, "step": 539280 }, { "epoch": 46.52657004830918, "grad_norm": 0.34961777925491333, "learning_rate": 0.001, "loss": 1.7342, "step": 539336 }, { "epoch": 46.531400966183575, "grad_norm": 0.34977880120277405, "learning_rate": 0.001, "loss": 1.7441, "step": 539392 }, { "epoch": 46.53623188405797, "grad_norm": 2.2069408893585205, "learning_rate": 0.001, "loss": 1.7314, "step": 539448 }, { "epoch": 46.54106280193237, "grad_norm": 0.3914407193660736, "learning_rate": 0.001, "loss": 1.7443, "step": 539504 }, { "epoch": 46.54589371980676, "grad_norm": 0.3044123649597168, "learning_rate": 0.001, "loss": 1.7459, "step": 539560 }, { "epoch": 46.55072463768116, "grad_norm": 0.3202945590019226, "learning_rate": 0.001, "loss": 1.7411, "step": 539616 }, { "epoch": 46.55555555555556, "grad_norm": 0.5264212489128113, "learning_rate": 0.001, "loss": 1.7302, "step": 539672 }, { "epoch": 46.56038647342995, "grad_norm": 0.46689650416374207, "learning_rate": 0.001, "loss": 1.7402, "step": 539728 }, { "epoch": 46.56521739130435, "grad_norm": 2.5595059394836426, "learning_rate": 0.001, "loss": 1.7353, "step": 539784 }, { "epoch": 46.570048309178745, "grad_norm": 0.29652321338653564, "learning_rate": 0.001, "loss": 1.7334, "step": 539840 }, { "epoch": 46.57487922705314, "grad_norm": 0.4661213457584381, "learning_rate": 0.001, "loss": 1.729, "step": 539896 }, { "epoch": 46.57971014492754, "grad_norm": 0.25999516248703003, "learning_rate": 0.001, "loss": 1.7341, "step": 539952 }, { "epoch": 46.58454106280193, "grad_norm": 0.3534297049045563, "learning_rate": 0.001, "loss": 1.7371, "step": 540008 }, { "epoch": 46.589371980676326, "grad_norm": 0.3848990201950073, "learning_rate": 0.001, "loss": 1.7257, "step": 540064 }, { "epoch": 46.594202898550726, "grad_norm": 0.4281889498233795, "learning_rate": 0.001, "loss": 1.7261, "step": 540120 }, { "epoch": 46.59903381642512, "grad_norm": 0.42716988921165466, "learning_rate": 0.001, "loss": 1.7366, "step": 540176 }, { "epoch": 46.60386473429952, "grad_norm": 0.2850320041179657, "learning_rate": 0.001, "loss": 1.7304, "step": 540232 }, { "epoch": 46.608695652173914, "grad_norm": 0.5041074752807617, "learning_rate": 0.001, "loss": 1.7313, "step": 540288 }, { "epoch": 46.61352657004831, "grad_norm": 0.26991644501686096, "learning_rate": 0.001, "loss": 1.7295, "step": 540344 }, { "epoch": 46.61835748792271, "grad_norm": 0.40907764434814453, "learning_rate": 0.001, "loss": 1.7272, "step": 540400 }, { "epoch": 46.6231884057971, "grad_norm": 0.7233055830001831, "learning_rate": 0.001, "loss": 1.7244, "step": 540456 }, { "epoch": 46.628019323671495, "grad_norm": 0.341800719499588, "learning_rate": 0.001, "loss": 1.7251, "step": 540512 }, { "epoch": 46.632850241545896, "grad_norm": 0.3241802752017975, "learning_rate": 0.001, "loss": 1.726, "step": 540568 }, { "epoch": 46.63768115942029, "grad_norm": 0.48334747552871704, "learning_rate": 0.001, "loss": 1.7313, "step": 540624 }, { "epoch": 46.64251207729468, "grad_norm": 0.38246187567710876, "learning_rate": 0.001, "loss": 1.7363, "step": 540680 }, { "epoch": 46.64734299516908, "grad_norm": 0.31606099009513855, "learning_rate": 0.001, "loss": 1.7342, "step": 540736 }, { "epoch": 46.65217391304348, "grad_norm": 0.291831910610199, "learning_rate": 0.001, "loss": 1.7356, "step": 540792 }, { "epoch": 46.65700483091788, "grad_norm": 0.34247252345085144, "learning_rate": 0.001, "loss": 1.7283, "step": 540848 }, { "epoch": 46.66183574879227, "grad_norm": 0.29882514476776123, "learning_rate": 0.001, "loss": 1.7242, "step": 540904 }, { "epoch": 46.666666666666664, "grad_norm": 1.3509570360183716, "learning_rate": 0.001, "loss": 1.7246, "step": 540960 }, { "epoch": 46.671497584541065, "grad_norm": 1.558355450630188, "learning_rate": 0.001, "loss": 1.735, "step": 541016 }, { "epoch": 46.67632850241546, "grad_norm": 0.5476917028427124, "learning_rate": 0.001, "loss": 1.7425, "step": 541072 }, { "epoch": 46.68115942028985, "grad_norm": 0.7144020795822144, "learning_rate": 0.001, "loss": 1.7423, "step": 541128 }, { "epoch": 46.68599033816425, "grad_norm": 0.7284640073776245, "learning_rate": 0.001, "loss": 1.7259, "step": 541184 }, { "epoch": 46.690821256038646, "grad_norm": 0.31424710154533386, "learning_rate": 0.001, "loss": 1.7368, "step": 541240 }, { "epoch": 46.69565217391305, "grad_norm": 0.3097711503505707, "learning_rate": 0.001, "loss": 1.7415, "step": 541296 }, { "epoch": 46.70048309178744, "grad_norm": 0.7392817735671997, "learning_rate": 0.001, "loss": 1.7327, "step": 541352 }, { "epoch": 46.70531400966183, "grad_norm": 0.4241707921028137, "learning_rate": 0.001, "loss": 1.7405, "step": 541408 }, { "epoch": 46.710144927536234, "grad_norm": 0.43632781505584717, "learning_rate": 0.001, "loss": 1.7344, "step": 541464 }, { "epoch": 46.71497584541063, "grad_norm": 0.27016007900238037, "learning_rate": 0.001, "loss": 1.7333, "step": 541520 }, { "epoch": 46.71980676328502, "grad_norm": 1.3281503915786743, "learning_rate": 0.001, "loss": 1.7332, "step": 541576 }, { "epoch": 46.72463768115942, "grad_norm": 0.2695807218551636, "learning_rate": 0.001, "loss": 1.7404, "step": 541632 }, { "epoch": 46.729468599033815, "grad_norm": 0.5190613269805908, "learning_rate": 0.001, "loss": 1.7393, "step": 541688 }, { "epoch": 46.734299516908216, "grad_norm": 0.6109021306037903, "learning_rate": 0.001, "loss": 1.7572, "step": 541744 }, { "epoch": 46.73913043478261, "grad_norm": 0.3460198938846588, "learning_rate": 0.001, "loss": 1.7492, "step": 541800 }, { "epoch": 46.743961352657, "grad_norm": 1.3889919519424438, "learning_rate": 0.001, "loss": 1.7509, "step": 541856 }, { "epoch": 46.7487922705314, "grad_norm": 1.2590762376785278, "learning_rate": 0.001, "loss": 1.7482, "step": 541912 }, { "epoch": 46.7536231884058, "grad_norm": 7.283885478973389, "learning_rate": 0.001, "loss": 1.7389, "step": 541968 }, { "epoch": 46.75845410628019, "grad_norm": 0.3557608127593994, "learning_rate": 0.001, "loss": 1.7341, "step": 542024 }, { "epoch": 46.76328502415459, "grad_norm": 0.41444477438926697, "learning_rate": 0.001, "loss": 1.7486, "step": 542080 }, { "epoch": 46.768115942028984, "grad_norm": 0.8650222420692444, "learning_rate": 0.001, "loss": 1.7346, "step": 542136 }, { "epoch": 46.772946859903385, "grad_norm": 0.31386256217956543, "learning_rate": 0.001, "loss": 1.7419, "step": 542192 }, { "epoch": 46.77777777777778, "grad_norm": 0.2825224697589874, "learning_rate": 0.001, "loss": 1.7399, "step": 542248 }, { "epoch": 46.78260869565217, "grad_norm": 2.5417585372924805, "learning_rate": 0.001, "loss": 1.7486, "step": 542304 }, { "epoch": 46.78743961352657, "grad_norm": 0.6257044672966003, "learning_rate": 0.001, "loss": 1.7526, "step": 542360 }, { "epoch": 46.792270531400966, "grad_norm": 0.45079246163368225, "learning_rate": 0.001, "loss": 1.7484, "step": 542416 }, { "epoch": 46.79710144927536, "grad_norm": 0.3512585759162903, "learning_rate": 0.001, "loss": 1.7587, "step": 542472 }, { "epoch": 46.80193236714976, "grad_norm": 1.1498621702194214, "learning_rate": 0.001, "loss": 1.7635, "step": 542528 }, { "epoch": 46.806763285024154, "grad_norm": 3.546875238418579, "learning_rate": 0.001, "loss": 1.7576, "step": 542584 }, { "epoch": 46.81159420289855, "grad_norm": 0.3002912998199463, "learning_rate": 0.001, "loss": 1.7381, "step": 542640 }, { "epoch": 46.81642512077295, "grad_norm": 4.210538387298584, "learning_rate": 0.001, "loss": 1.731, "step": 542696 }, { "epoch": 46.82125603864734, "grad_norm": 0.335534930229187, "learning_rate": 0.001, "loss": 1.7382, "step": 542752 }, { "epoch": 46.82608695652174, "grad_norm": 0.4776569604873657, "learning_rate": 0.001, "loss": 1.7368, "step": 542808 }, { "epoch": 46.830917874396135, "grad_norm": 0.6448674201965332, "learning_rate": 0.001, "loss": 1.7391, "step": 542864 }, { "epoch": 46.83574879227053, "grad_norm": 0.27452728152275085, "learning_rate": 0.001, "loss": 1.7341, "step": 542920 }, { "epoch": 46.84057971014493, "grad_norm": 0.774319589138031, "learning_rate": 0.001, "loss": 1.7408, "step": 542976 }, { "epoch": 46.84541062801932, "grad_norm": 1.252493143081665, "learning_rate": 0.001, "loss": 1.743, "step": 543032 }, { "epoch": 46.85024154589372, "grad_norm": 0.7029665112495422, "learning_rate": 0.001, "loss": 1.7362, "step": 543088 }, { "epoch": 46.85507246376812, "grad_norm": 0.3758379817008972, "learning_rate": 0.001, "loss": 1.7336, "step": 543144 }, { "epoch": 46.85990338164251, "grad_norm": 0.865254819393158, "learning_rate": 0.001, "loss": 1.7261, "step": 543200 }, { "epoch": 46.86473429951691, "grad_norm": 0.25620636343955994, "learning_rate": 0.001, "loss": 1.7402, "step": 543256 }, { "epoch": 46.869565217391305, "grad_norm": 0.4489244818687439, "learning_rate": 0.001, "loss": 1.7299, "step": 543312 }, { "epoch": 46.8743961352657, "grad_norm": 0.8276216387748718, "learning_rate": 0.001, "loss": 1.737, "step": 543368 }, { "epoch": 46.8792270531401, "grad_norm": 0.7146296501159668, "learning_rate": 0.001, "loss": 1.7277, "step": 543424 }, { "epoch": 46.88405797101449, "grad_norm": 0.353871613740921, "learning_rate": 0.001, "loss": 1.7465, "step": 543480 }, { "epoch": 46.888888888888886, "grad_norm": 0.357257217168808, "learning_rate": 0.001, "loss": 1.7508, "step": 543536 }, { "epoch": 46.893719806763286, "grad_norm": 19.562957763671875, "learning_rate": 0.001, "loss": 1.752, "step": 543592 }, { "epoch": 46.89855072463768, "grad_norm": 0.2644149959087372, "learning_rate": 0.001, "loss": 1.7452, "step": 543648 }, { "epoch": 46.90338164251208, "grad_norm": 0.6094096899032593, "learning_rate": 0.001, "loss": 1.7363, "step": 543704 }, { "epoch": 46.908212560386474, "grad_norm": 0.7662749886512756, "learning_rate": 0.001, "loss": 1.7393, "step": 543760 }, { "epoch": 46.91304347826087, "grad_norm": 0.5080996751785278, "learning_rate": 0.001, "loss": 1.737, "step": 543816 }, { "epoch": 46.91787439613527, "grad_norm": 0.7678481936454773, "learning_rate": 0.001, "loss": 1.7495, "step": 543872 }, { "epoch": 46.92270531400966, "grad_norm": 0.30250343680381775, "learning_rate": 0.001, "loss": 1.7327, "step": 543928 }, { "epoch": 46.927536231884055, "grad_norm": 0.33577293157577515, "learning_rate": 0.001, "loss": 1.7298, "step": 543984 }, { "epoch": 46.932367149758456, "grad_norm": 0.5459432005882263, "learning_rate": 0.001, "loss": 1.7336, "step": 544040 }, { "epoch": 46.93719806763285, "grad_norm": 0.2931721806526184, "learning_rate": 0.001, "loss": 1.7373, "step": 544096 }, { "epoch": 46.94202898550725, "grad_norm": 0.3553711771965027, "learning_rate": 0.001, "loss": 1.7328, "step": 544152 }, { "epoch": 46.94685990338164, "grad_norm": 0.3601447343826294, "learning_rate": 0.001, "loss": 1.731, "step": 544208 }, { "epoch": 46.95169082125604, "grad_norm": 0.5757013559341431, "learning_rate": 0.001, "loss": 1.7315, "step": 544264 }, { "epoch": 46.95652173913044, "grad_norm": 0.48101431131362915, "learning_rate": 0.001, "loss": 1.7267, "step": 544320 }, { "epoch": 46.96135265700483, "grad_norm": 0.4263179302215576, "learning_rate": 0.001, "loss": 1.73, "step": 544376 }, { "epoch": 46.966183574879224, "grad_norm": 0.8635637164115906, "learning_rate": 0.001, "loss": 1.7217, "step": 544432 }, { "epoch": 46.971014492753625, "grad_norm": 0.3279145658016205, "learning_rate": 0.001, "loss": 1.7371, "step": 544488 }, { "epoch": 46.97584541062802, "grad_norm": 0.2874996066093445, "learning_rate": 0.001, "loss": 1.7335, "step": 544544 }, { "epoch": 46.98067632850242, "grad_norm": 1.1116864681243896, "learning_rate": 0.001, "loss": 1.7358, "step": 544600 }, { "epoch": 46.98550724637681, "grad_norm": 0.6899238228797913, "learning_rate": 0.001, "loss": 1.7432, "step": 544656 }, { "epoch": 46.990338164251206, "grad_norm": 0.30474957823753357, "learning_rate": 0.001, "loss": 1.7381, "step": 544712 }, { "epoch": 46.99516908212561, "grad_norm": 0.38092026114463806, "learning_rate": 0.001, "loss": 1.7353, "step": 544768 }, { "epoch": 47.0, "grad_norm": 0.747328519821167, "learning_rate": 0.001, "loss": 1.7429, "step": 544824 }, { "epoch": 47.00483091787439, "grad_norm": 4.281112194061279, "learning_rate": 0.001, "loss": 1.7392, "step": 544880 }, { "epoch": 47.009661835748794, "grad_norm": 1.3495209217071533, "learning_rate": 0.001, "loss": 1.7578, "step": 544936 }, { "epoch": 47.01449275362319, "grad_norm": 0.4235178530216217, "learning_rate": 0.001, "loss": 1.7515, "step": 544992 }, { "epoch": 47.01932367149758, "grad_norm": 0.33219489455223083, "learning_rate": 0.001, "loss": 1.7618, "step": 545048 }, { "epoch": 47.02415458937198, "grad_norm": 0.3000887930393219, "learning_rate": 0.001, "loss": 1.766, "step": 545104 }, { "epoch": 47.028985507246375, "grad_norm": 0.27524304389953613, "learning_rate": 0.001, "loss": 1.758, "step": 545160 }, { "epoch": 47.033816425120776, "grad_norm": 3.1589064598083496, "learning_rate": 0.001, "loss": 1.7591, "step": 545216 }, { "epoch": 47.03864734299517, "grad_norm": 0.4939311444759369, "learning_rate": 0.001, "loss": 1.7937, "step": 545272 }, { "epoch": 47.04347826086956, "grad_norm": 0.4727461636066437, "learning_rate": 0.001, "loss": 1.774, "step": 545328 }, { "epoch": 47.04830917874396, "grad_norm": 0.4854380488395691, "learning_rate": 0.001, "loss": 1.7726, "step": 545384 }, { "epoch": 47.05314009661836, "grad_norm": 0.6815846562385559, "learning_rate": 0.001, "loss": 1.7664, "step": 545440 }, { "epoch": 47.05797101449275, "grad_norm": 0.5356012582778931, "learning_rate": 0.001, "loss": 1.7589, "step": 545496 }, { "epoch": 47.06280193236715, "grad_norm": 0.279079794883728, "learning_rate": 0.001, "loss": 1.769, "step": 545552 }, { "epoch": 47.067632850241544, "grad_norm": 0.429047167301178, "learning_rate": 0.001, "loss": 1.7648, "step": 545608 }, { "epoch": 47.072463768115945, "grad_norm": 0.4604443609714508, "learning_rate": 0.001, "loss": 1.7635, "step": 545664 }, { "epoch": 47.07729468599034, "grad_norm": 1.3425155878067017, "learning_rate": 0.001, "loss": 1.7561, "step": 545720 }, { "epoch": 47.08212560386473, "grad_norm": 1.7754504680633545, "learning_rate": 0.001, "loss": 1.7487, "step": 545776 }, { "epoch": 47.08695652173913, "grad_norm": 7.971505641937256, "learning_rate": 0.001, "loss": 1.7464, "step": 545832 }, { "epoch": 47.091787439613526, "grad_norm": 0.37450724840164185, "learning_rate": 0.001, "loss": 1.7363, "step": 545888 }, { "epoch": 47.09661835748792, "grad_norm": 0.3493143916130066, "learning_rate": 0.001, "loss": 1.7375, "step": 545944 }, { "epoch": 47.10144927536232, "grad_norm": 6.405582427978516, "learning_rate": 0.001, "loss": 1.7468, "step": 546000 }, { "epoch": 47.106280193236714, "grad_norm": 3.2116167545318604, "learning_rate": 0.001, "loss": 1.7487, "step": 546056 }, { "epoch": 47.111111111111114, "grad_norm": 0.40074628591537476, "learning_rate": 0.001, "loss": 1.758, "step": 546112 }, { "epoch": 47.11594202898551, "grad_norm": 0.31763288378715515, "learning_rate": 0.001, "loss": 1.7527, "step": 546168 }, { "epoch": 47.1207729468599, "grad_norm": 0.366273432970047, "learning_rate": 0.001, "loss": 1.7485, "step": 546224 }, { "epoch": 47.1256038647343, "grad_norm": 1.172105073928833, "learning_rate": 0.001, "loss": 1.7456, "step": 546280 }, { "epoch": 47.130434782608695, "grad_norm": 2.0263142585754395, "learning_rate": 0.001, "loss": 1.7519, "step": 546336 }, { "epoch": 47.13526570048309, "grad_norm": 0.8143089413642883, "learning_rate": 0.001, "loss": 1.7483, "step": 546392 }, { "epoch": 47.14009661835749, "grad_norm": 0.34187301993370056, "learning_rate": 0.001, "loss": 1.7384, "step": 546448 }, { "epoch": 47.14492753623188, "grad_norm": 0.31419092416763306, "learning_rate": 0.001, "loss": 1.7311, "step": 546504 }, { "epoch": 47.14975845410628, "grad_norm": 0.3252299129962921, "learning_rate": 0.001, "loss": 1.7319, "step": 546560 }, { "epoch": 47.15458937198068, "grad_norm": 0.387053519487381, "learning_rate": 0.001, "loss": 1.7309, "step": 546616 }, { "epoch": 47.15942028985507, "grad_norm": 2.4877116680145264, "learning_rate": 0.001, "loss": 1.7397, "step": 546672 }, { "epoch": 47.16425120772947, "grad_norm": 15.878884315490723, "learning_rate": 0.001, "loss": 1.7734, "step": 546728 }, { "epoch": 47.169082125603865, "grad_norm": 0.4047360420227051, "learning_rate": 0.001, "loss": 1.7612, "step": 546784 }, { "epoch": 47.17391304347826, "grad_norm": 0.2872651517391205, "learning_rate": 0.001, "loss": 1.7452, "step": 546840 }, { "epoch": 47.17874396135266, "grad_norm": 3.3258631229400635, "learning_rate": 0.001, "loss": 1.7472, "step": 546896 }, { "epoch": 47.18357487922705, "grad_norm": 1.3399384021759033, "learning_rate": 0.001, "loss": 1.7447, "step": 546952 }, { "epoch": 47.18840579710145, "grad_norm": 0.7725517749786377, "learning_rate": 0.001, "loss": 1.7476, "step": 547008 }, { "epoch": 47.193236714975846, "grad_norm": 0.5984556674957275, "learning_rate": 0.001, "loss": 1.7414, "step": 547064 }, { "epoch": 47.19806763285024, "grad_norm": 4.47747802734375, "learning_rate": 0.001, "loss": 1.7373, "step": 547120 }, { "epoch": 47.20289855072464, "grad_norm": 0.42242303490638733, "learning_rate": 0.001, "loss": 1.7357, "step": 547176 }, { "epoch": 47.207729468599034, "grad_norm": 0.40525195002555847, "learning_rate": 0.001, "loss": 1.734, "step": 547232 }, { "epoch": 47.21256038647343, "grad_norm": 5.895687580108643, "learning_rate": 0.001, "loss": 1.7382, "step": 547288 }, { "epoch": 47.21739130434783, "grad_norm": 3.2632696628570557, "learning_rate": 0.001, "loss": 1.7376, "step": 547344 }, { "epoch": 47.22222222222222, "grad_norm": 0.48902297019958496, "learning_rate": 0.001, "loss": 1.7386, "step": 547400 }, { "epoch": 47.227053140096615, "grad_norm": 0.8248130679130554, "learning_rate": 0.001, "loss": 1.7277, "step": 547456 }, { "epoch": 47.231884057971016, "grad_norm": 1.2479369640350342, "learning_rate": 0.001, "loss": 1.7216, "step": 547512 }, { "epoch": 47.23671497584541, "grad_norm": 0.30356669425964355, "learning_rate": 0.001, "loss": 1.7309, "step": 547568 }, { "epoch": 47.24154589371981, "grad_norm": 1.1951662302017212, "learning_rate": 0.001, "loss": 1.7333, "step": 547624 }, { "epoch": 47.2463768115942, "grad_norm": 1.8391238451004028, "learning_rate": 0.001, "loss": 1.7373, "step": 547680 }, { "epoch": 47.2512077294686, "grad_norm": 1.4917492866516113, "learning_rate": 0.001, "loss": 1.7377, "step": 547736 }, { "epoch": 47.256038647343, "grad_norm": 1.1511569023132324, "learning_rate": 0.001, "loss": 1.7318, "step": 547792 }, { "epoch": 47.26086956521739, "grad_norm": 0.8874995708465576, "learning_rate": 0.001, "loss": 1.7233, "step": 547848 }, { "epoch": 47.265700483091784, "grad_norm": 0.4187708795070648, "learning_rate": 0.001, "loss": 1.7295, "step": 547904 }, { "epoch": 47.270531400966185, "grad_norm": 0.7397943139076233, "learning_rate": 0.001, "loss": 1.721, "step": 547960 }, { "epoch": 47.27536231884058, "grad_norm": 2.0461182594299316, "learning_rate": 0.001, "loss": 1.726, "step": 548016 }, { "epoch": 47.28019323671498, "grad_norm": 2.1561331748962402, "learning_rate": 0.001, "loss": 1.7265, "step": 548072 }, { "epoch": 47.28502415458937, "grad_norm": 6.230227470397949, "learning_rate": 0.001, "loss": 1.7419, "step": 548128 }, { "epoch": 47.289855072463766, "grad_norm": 12.692964553833008, "learning_rate": 0.001, "loss": 1.731, "step": 548184 }, { "epoch": 47.29468599033817, "grad_norm": 0.911510169506073, "learning_rate": 0.001, "loss": 1.7347, "step": 548240 }, { "epoch": 47.29951690821256, "grad_norm": 1.0388526916503906, "learning_rate": 0.001, "loss": 1.7321, "step": 548296 }, { "epoch": 47.30434782608695, "grad_norm": 0.8128241300582886, "learning_rate": 0.001, "loss": 1.7231, "step": 548352 }, { "epoch": 47.309178743961354, "grad_norm": 0.2854093611240387, "learning_rate": 0.001, "loss": 1.7238, "step": 548408 }, { "epoch": 47.31400966183575, "grad_norm": 0.7462406754493713, "learning_rate": 0.001, "loss": 1.719, "step": 548464 }, { "epoch": 47.31884057971015, "grad_norm": 0.27985551953315735, "learning_rate": 0.001, "loss": 1.7244, "step": 548520 }, { "epoch": 47.32367149758454, "grad_norm": 0.4242180287837982, "learning_rate": 0.001, "loss": 1.7317, "step": 548576 }, { "epoch": 47.328502415458935, "grad_norm": 0.26534503698349, "learning_rate": 0.001, "loss": 1.7329, "step": 548632 }, { "epoch": 47.333333333333336, "grad_norm": 0.370477557182312, "learning_rate": 0.001, "loss": 1.7186, "step": 548688 }, { "epoch": 47.33816425120773, "grad_norm": 3.09329891204834, "learning_rate": 0.001, "loss": 1.7169, "step": 548744 }, { "epoch": 47.34299516908212, "grad_norm": 0.4720314145088196, "learning_rate": 0.001, "loss": 1.7182, "step": 548800 }, { "epoch": 47.34782608695652, "grad_norm": 0.3731083571910858, "learning_rate": 0.001, "loss": 1.7196, "step": 548856 }, { "epoch": 47.35265700483092, "grad_norm": 0.44405561685562134, "learning_rate": 0.001, "loss": 1.7227, "step": 548912 }, { "epoch": 47.35748792270532, "grad_norm": 0.32539740204811096, "learning_rate": 0.001, "loss": 1.8006, "step": 548968 }, { "epoch": 47.36231884057971, "grad_norm": 2.0203771591186523, "learning_rate": 0.001, "loss": 1.736, "step": 549024 }, { "epoch": 47.367149758454104, "grad_norm": 0.3143254220485687, "learning_rate": 0.001, "loss": 1.7275, "step": 549080 }, { "epoch": 47.371980676328505, "grad_norm": 0.9952269196510315, "learning_rate": 0.001, "loss": 1.7299, "step": 549136 }, { "epoch": 47.3768115942029, "grad_norm": 0.5755278468132019, "learning_rate": 0.001, "loss": 1.729, "step": 549192 }, { "epoch": 47.38164251207729, "grad_norm": 0.6705668568611145, "learning_rate": 0.001, "loss": 1.7282, "step": 549248 }, { "epoch": 47.38647342995169, "grad_norm": 0.35457879304885864, "learning_rate": 0.001, "loss": 1.7283, "step": 549304 }, { "epoch": 47.391304347826086, "grad_norm": 4.718808174133301, "learning_rate": 0.001, "loss": 1.7318, "step": 549360 }, { "epoch": 47.39613526570048, "grad_norm": 0.8548343181610107, "learning_rate": 0.001, "loss": 1.737, "step": 549416 }, { "epoch": 47.40096618357488, "grad_norm": 0.5011637210845947, "learning_rate": 0.001, "loss": 1.7298, "step": 549472 }, { "epoch": 47.405797101449274, "grad_norm": 1.8740864992141724, "learning_rate": 0.001, "loss": 1.7426, "step": 549528 }, { "epoch": 47.410628019323674, "grad_norm": 0.9155641794204712, "learning_rate": 0.001, "loss": 1.7445, "step": 549584 }, { "epoch": 47.41545893719807, "grad_norm": 0.335338294506073, "learning_rate": 0.001, "loss": 1.763, "step": 549640 }, { "epoch": 47.42028985507246, "grad_norm": 0.40270140767097473, "learning_rate": 0.001, "loss": 1.7523, "step": 549696 }, { "epoch": 47.42512077294686, "grad_norm": 0.3840565085411072, "learning_rate": 0.001, "loss": 1.7444, "step": 549752 }, { "epoch": 47.429951690821255, "grad_norm": 1.7191141843795776, "learning_rate": 0.001, "loss": 1.7434, "step": 549808 }, { "epoch": 47.43478260869565, "grad_norm": 1.3776488304138184, "learning_rate": 0.001, "loss": 1.7463, "step": 549864 }, { "epoch": 47.43961352657005, "grad_norm": 2.7461485862731934, "learning_rate": 0.001, "loss": 1.7368, "step": 549920 }, { "epoch": 47.44444444444444, "grad_norm": 1.8133947849273682, "learning_rate": 0.001, "loss": 1.7466, "step": 549976 }, { "epoch": 47.44927536231884, "grad_norm": 0.6772338151931763, "learning_rate": 0.001, "loss": 1.7417, "step": 550032 }, { "epoch": 47.45410628019324, "grad_norm": 3.5082292556762695, "learning_rate": 0.001, "loss": 1.741, "step": 550088 }, { "epoch": 47.45893719806763, "grad_norm": 12.43543529510498, "learning_rate": 0.001, "loss": 1.7355, "step": 550144 }, { "epoch": 47.46376811594203, "grad_norm": 1.3880958557128906, "learning_rate": 0.001, "loss": 1.7534, "step": 550200 }, { "epoch": 47.468599033816425, "grad_norm": 1.864904522895813, "learning_rate": 0.001, "loss": 1.7439, "step": 550256 }, { "epoch": 47.47342995169082, "grad_norm": 0.6884762644767761, "learning_rate": 0.001, "loss": 1.7556, "step": 550312 }, { "epoch": 47.47826086956522, "grad_norm": 0.4680933356285095, "learning_rate": 0.001, "loss": 1.7554, "step": 550368 }, { "epoch": 47.48309178743961, "grad_norm": 0.4483138620853424, "learning_rate": 0.001, "loss": 1.7552, "step": 550424 }, { "epoch": 47.48792270531401, "grad_norm": 2.099839448928833, "learning_rate": 0.001, "loss": 1.7552, "step": 550480 }, { "epoch": 47.492753623188406, "grad_norm": 0.963566243648529, "learning_rate": 0.001, "loss": 1.776, "step": 550536 }, { "epoch": 47.4975845410628, "grad_norm": 0.31251609325408936, "learning_rate": 0.001, "loss": 1.7659, "step": 550592 }, { "epoch": 47.5024154589372, "grad_norm": 0.4312027394771576, "learning_rate": 0.001, "loss": 1.7572, "step": 550648 }, { "epoch": 47.507246376811594, "grad_norm": 2.301367998123169, "learning_rate": 0.001, "loss": 1.7415, "step": 550704 }, { "epoch": 47.51207729468599, "grad_norm": 1.1503932476043701, "learning_rate": 0.001, "loss": 1.7532, "step": 550760 }, { "epoch": 47.51690821256039, "grad_norm": 0.6705819964408875, "learning_rate": 0.001, "loss": 1.7696, "step": 550816 }, { "epoch": 47.52173913043478, "grad_norm": 0.7047042846679688, "learning_rate": 0.001, "loss": 1.7577, "step": 550872 }, { "epoch": 47.52657004830918, "grad_norm": 0.2805469036102295, "learning_rate": 0.001, "loss": 1.7516, "step": 550928 }, { "epoch": 47.531400966183575, "grad_norm": 0.577847421169281, "learning_rate": 0.001, "loss": 1.7581, "step": 550984 }, { "epoch": 47.53623188405797, "grad_norm": 0.41907113790512085, "learning_rate": 0.001, "loss": 1.7597, "step": 551040 }, { "epoch": 47.54106280193237, "grad_norm": 0.5313217043876648, "learning_rate": 0.001, "loss": 1.7595, "step": 551096 }, { "epoch": 47.54589371980676, "grad_norm": 0.47615495324134827, "learning_rate": 0.001, "loss": 1.7417, "step": 551152 }, { "epoch": 47.55072463768116, "grad_norm": 0.27895495295524597, "learning_rate": 0.001, "loss": 1.7547, "step": 551208 }, { "epoch": 47.55555555555556, "grad_norm": 0.3781746029853821, "learning_rate": 0.001, "loss": 1.7547, "step": 551264 }, { "epoch": 47.56038647342995, "grad_norm": 0.6573802828788757, "learning_rate": 0.001, "loss": 1.7543, "step": 551320 }, { "epoch": 47.56521739130435, "grad_norm": 1.1413826942443848, "learning_rate": 0.001, "loss": 1.7521, "step": 551376 }, { "epoch": 47.570048309178745, "grad_norm": 1.2589777708053589, "learning_rate": 0.001, "loss": 1.7505, "step": 551432 }, { "epoch": 47.57487922705314, "grad_norm": 1.6205741167068481, "learning_rate": 0.001, "loss": 1.751, "step": 551488 }, { "epoch": 47.57971014492754, "grad_norm": 0.8519698977470398, "learning_rate": 0.001, "loss": 1.755, "step": 551544 }, { "epoch": 47.58454106280193, "grad_norm": 0.3133470416069031, "learning_rate": 0.001, "loss": 1.7631, "step": 551600 }, { "epoch": 47.589371980676326, "grad_norm": 1.2281559705734253, "learning_rate": 0.001, "loss": 1.758, "step": 551656 }, { "epoch": 47.594202898550726, "grad_norm": 1.2636651992797852, "learning_rate": 0.001, "loss": 1.7692, "step": 551712 }, { "epoch": 47.59903381642512, "grad_norm": 8.77667236328125, "learning_rate": 0.001, "loss": 1.759, "step": 551768 }, { "epoch": 47.60386473429952, "grad_norm": 0.447468638420105, "learning_rate": 0.001, "loss": 1.7526, "step": 551824 }, { "epoch": 47.608695652173914, "grad_norm": 3.714233160018921, "learning_rate": 0.001, "loss": 1.7488, "step": 551880 }, { "epoch": 47.61352657004831, "grad_norm": 0.35020801424980164, "learning_rate": 0.001, "loss": 1.7542, "step": 551936 }, { "epoch": 47.61835748792271, "grad_norm": 0.27114036679267883, "learning_rate": 0.001, "loss": 1.7486, "step": 551992 }, { "epoch": 47.6231884057971, "grad_norm": 0.5277588963508606, "learning_rate": 0.001, "loss": 1.748, "step": 552048 }, { "epoch": 47.628019323671495, "grad_norm": 4.6601972579956055, "learning_rate": 0.001, "loss": 1.7477, "step": 552104 }, { "epoch": 47.632850241545896, "grad_norm": 0.4733608365058899, "learning_rate": 0.001, "loss": 1.7482, "step": 552160 }, { "epoch": 47.63768115942029, "grad_norm": 0.2772434949874878, "learning_rate": 0.001, "loss": 1.7721, "step": 552216 }, { "epoch": 47.64251207729468, "grad_norm": 0.38333505392074585, "learning_rate": 0.001, "loss": 1.7704, "step": 552272 }, { "epoch": 47.64734299516908, "grad_norm": 0.9257436394691467, "learning_rate": 0.001, "loss": 1.7686, "step": 552328 }, { "epoch": 47.65217391304348, "grad_norm": 0.9447434544563293, "learning_rate": 0.001, "loss": 1.7812, "step": 552384 }, { "epoch": 47.65700483091788, "grad_norm": 0.27587616443634033, "learning_rate": 0.001, "loss": 1.7617, "step": 552440 }, { "epoch": 47.66183574879227, "grad_norm": 0.31905314326286316, "learning_rate": 0.001, "loss": 1.7443, "step": 552496 }, { "epoch": 47.666666666666664, "grad_norm": 2.9687397480010986, "learning_rate": 0.001, "loss": 1.764, "step": 552552 }, { "epoch": 47.671497584541065, "grad_norm": 0.2674528956413269, "learning_rate": 0.001, "loss": 1.7485, "step": 552608 }, { "epoch": 47.67632850241546, "grad_norm": 0.26541611552238464, "learning_rate": 0.001, "loss": 1.7458, "step": 552664 }, { "epoch": 47.68115942028985, "grad_norm": 13.463668823242188, "learning_rate": 0.001, "loss": 1.7376, "step": 552720 }, { "epoch": 47.68599033816425, "grad_norm": 0.30317020416259766, "learning_rate": 0.001, "loss": 1.7492, "step": 552776 }, { "epoch": 47.690821256038646, "grad_norm": 0.33294859528541565, "learning_rate": 0.001, "loss": 1.7497, "step": 552832 }, { "epoch": 47.69565217391305, "grad_norm": 1.5886945724487305, "learning_rate": 0.001, "loss": 1.7517, "step": 552888 }, { "epoch": 47.70048309178744, "grad_norm": 0.261288046836853, "learning_rate": 0.001, "loss": 1.7422, "step": 552944 }, { "epoch": 47.70531400966183, "grad_norm": 0.2990473806858063, "learning_rate": 0.001, "loss": 1.7425, "step": 553000 }, { "epoch": 47.710144927536234, "grad_norm": 0.2454865723848343, "learning_rate": 0.001, "loss": 1.7386, "step": 553056 }, { "epoch": 47.71497584541063, "grad_norm": 0.3279196321964264, "learning_rate": 0.001, "loss": 1.7381, "step": 553112 }, { "epoch": 47.71980676328502, "grad_norm": 10.061687469482422, "learning_rate": 0.001, "loss": 1.736, "step": 553168 }, { "epoch": 47.72463768115942, "grad_norm": 0.24158822000026703, "learning_rate": 0.001, "loss": 1.7391, "step": 553224 }, { "epoch": 47.729468599033815, "grad_norm": 12.058940887451172, "learning_rate": 0.001, "loss": 1.7414, "step": 553280 }, { "epoch": 47.734299516908216, "grad_norm": 0.2691340446472168, "learning_rate": 0.001, "loss": 1.7357, "step": 553336 }, { "epoch": 47.73913043478261, "grad_norm": 0.396634966135025, "learning_rate": 0.001, "loss": 1.7501, "step": 553392 }, { "epoch": 47.743961352657, "grad_norm": 1.7510796785354614, "learning_rate": 0.001, "loss": 1.7385, "step": 553448 }, { "epoch": 47.7487922705314, "grad_norm": 0.3275125324726105, "learning_rate": 0.001, "loss": 1.7517, "step": 553504 }, { "epoch": 47.7536231884058, "grad_norm": 0.5373415946960449, "learning_rate": 0.001, "loss": 1.7363, "step": 553560 }, { "epoch": 47.75845410628019, "grad_norm": 0.2426336109638214, "learning_rate": 0.001, "loss": 1.7395, "step": 553616 }, { "epoch": 47.76328502415459, "grad_norm": 4.613277912139893, "learning_rate": 0.001, "loss": 1.7367, "step": 553672 }, { "epoch": 47.768115942028984, "grad_norm": 0.44359657168388367, "learning_rate": 0.001, "loss": 1.7381, "step": 553728 }, { "epoch": 47.772946859903385, "grad_norm": 0.3343617916107178, "learning_rate": 0.001, "loss": 1.7344, "step": 553784 }, { "epoch": 47.77777777777778, "grad_norm": 0.3519558012485504, "learning_rate": 0.001, "loss": 1.729, "step": 553840 }, { "epoch": 47.78260869565217, "grad_norm": 0.8114468455314636, "learning_rate": 0.001, "loss": 1.7287, "step": 553896 }, { "epoch": 47.78743961352657, "grad_norm": 1.0570392608642578, "learning_rate": 0.001, "loss": 1.7371, "step": 553952 }, { "epoch": 47.792270531400966, "grad_norm": 9.910186767578125, "learning_rate": 0.001, "loss": 1.7439, "step": 554008 }, { "epoch": 47.79710144927536, "grad_norm": 0.24146710336208344, "learning_rate": 0.001, "loss": 1.7394, "step": 554064 }, { "epoch": 47.80193236714976, "grad_norm": 0.31374940276145935, "learning_rate": 0.001, "loss": 1.7351, "step": 554120 }, { "epoch": 47.806763285024154, "grad_norm": 0.3112371563911438, "learning_rate": 0.001, "loss": 1.7319, "step": 554176 }, { "epoch": 47.81159420289855, "grad_norm": 0.39495551586151123, "learning_rate": 0.001, "loss": 1.7313, "step": 554232 }, { "epoch": 47.81642512077295, "grad_norm": 11.296889305114746, "learning_rate": 0.001, "loss": 1.7424, "step": 554288 }, { "epoch": 47.82125603864734, "grad_norm": 0.31350523233413696, "learning_rate": 0.001, "loss": 1.7467, "step": 554344 }, { "epoch": 47.82608695652174, "grad_norm": 0.3233942985534668, "learning_rate": 0.001, "loss": 1.7391, "step": 554400 }, { "epoch": 47.830917874396135, "grad_norm": 0.29573899507522583, "learning_rate": 0.001, "loss": 1.7373, "step": 554456 }, { "epoch": 47.83574879227053, "grad_norm": 0.3130510747432709, "learning_rate": 0.001, "loss": 1.7383, "step": 554512 }, { "epoch": 47.84057971014493, "grad_norm": 0.2704949378967285, "learning_rate": 0.001, "loss": 1.7323, "step": 554568 }, { "epoch": 47.84541062801932, "grad_norm": 1.3435925245285034, "learning_rate": 0.001, "loss": 1.7311, "step": 554624 }, { "epoch": 47.85024154589372, "grad_norm": 18.459707260131836, "learning_rate": 0.001, "loss": 1.7372, "step": 554680 }, { "epoch": 47.85507246376812, "grad_norm": 1.997094750404358, "learning_rate": 0.001, "loss": 1.7435, "step": 554736 }, { "epoch": 47.85990338164251, "grad_norm": 0.3167450428009033, "learning_rate": 0.001, "loss": 1.7569, "step": 554792 }, { "epoch": 47.86473429951691, "grad_norm": 2.537158250808716, "learning_rate": 0.001, "loss": 1.7544, "step": 554848 }, { "epoch": 47.869565217391305, "grad_norm": 0.2570529282093048, "learning_rate": 0.001, "loss": 1.7469, "step": 554904 }, { "epoch": 47.8743961352657, "grad_norm": 0.42896637320518494, "learning_rate": 0.001, "loss": 1.7555, "step": 554960 }, { "epoch": 47.8792270531401, "grad_norm": 0.3532427251338959, "learning_rate": 0.001, "loss": 1.7619, "step": 555016 }, { "epoch": 47.88405797101449, "grad_norm": 0.4625174105167389, "learning_rate": 0.001, "loss": 1.7427, "step": 555072 }, { "epoch": 47.888888888888886, "grad_norm": 0.8862326741218567, "learning_rate": 0.001, "loss": 1.7421, "step": 555128 }, { "epoch": 47.893719806763286, "grad_norm": 0.36017340421676636, "learning_rate": 0.001, "loss": 1.7324, "step": 555184 }, { "epoch": 47.89855072463768, "grad_norm": 0.6789189577102661, "learning_rate": 0.001, "loss": 1.73, "step": 555240 }, { "epoch": 47.90338164251208, "grad_norm": 0.4127628207206726, "learning_rate": 0.001, "loss": 1.7473, "step": 555296 }, { "epoch": 47.908212560386474, "grad_norm": 0.25015756487846375, "learning_rate": 0.001, "loss": 1.7662, "step": 555352 }, { "epoch": 47.91304347826087, "grad_norm": 0.3704906105995178, "learning_rate": 0.001, "loss": 1.755, "step": 555408 }, { "epoch": 47.91787439613527, "grad_norm": 0.6751772165298462, "learning_rate": 0.001, "loss": 1.7556, "step": 555464 }, { "epoch": 47.92270531400966, "grad_norm": 1.0634523630142212, "learning_rate": 0.001, "loss": 1.7581, "step": 555520 }, { "epoch": 47.927536231884055, "grad_norm": 0.33919262886047363, "learning_rate": 0.001, "loss": 1.763, "step": 555576 }, { "epoch": 47.932367149758456, "grad_norm": 0.2908060848712921, "learning_rate": 0.001, "loss": 1.7561, "step": 555632 }, { "epoch": 47.93719806763285, "grad_norm": 0.23702818155288696, "learning_rate": 0.001, "loss": 1.7568, "step": 555688 }, { "epoch": 47.94202898550725, "grad_norm": 0.2846589684486389, "learning_rate": 0.001, "loss": 1.7532, "step": 555744 }, { "epoch": 47.94685990338164, "grad_norm": 0.30871111154556274, "learning_rate": 0.001, "loss": 1.7457, "step": 555800 }, { "epoch": 47.95169082125604, "grad_norm": 0.5630412101745605, "learning_rate": 0.001, "loss": 1.7477, "step": 555856 }, { "epoch": 47.95652173913044, "grad_norm": 0.35094401240348816, "learning_rate": 0.001, "loss": 1.7552, "step": 555912 }, { "epoch": 47.96135265700483, "grad_norm": 4.494852542877197, "learning_rate": 0.001, "loss": 1.7622, "step": 555968 }, { "epoch": 47.966183574879224, "grad_norm": 2.3337714672088623, "learning_rate": 0.001, "loss": 1.7628, "step": 556024 }, { "epoch": 47.971014492753625, "grad_norm": 1.6576848030090332, "learning_rate": 0.001, "loss": 1.76, "step": 556080 }, { "epoch": 47.97584541062802, "grad_norm": 0.6846519112586975, "learning_rate": 0.001, "loss": 1.7509, "step": 556136 }, { "epoch": 47.98067632850242, "grad_norm": 0.2737486660480499, "learning_rate": 0.001, "loss": 1.7459, "step": 556192 }, { "epoch": 47.98550724637681, "grad_norm": 0.6786449551582336, "learning_rate": 0.001, "loss": 1.7522, "step": 556248 }, { "epoch": 47.990338164251206, "grad_norm": 0.597537636756897, "learning_rate": 0.001, "loss": 1.7505, "step": 556304 }, { "epoch": 47.99516908212561, "grad_norm": 0.45557186007499695, "learning_rate": 0.001, "loss": 1.7531, "step": 556360 }, { "epoch": 48.0, "grad_norm": 0.419938325881958, "learning_rate": 0.001, "loss": 1.7476, "step": 556416 }, { "epoch": 48.00483091787439, "grad_norm": 0.9752640724182129, "learning_rate": 0.001, "loss": 1.7125, "step": 556472 }, { "epoch": 48.009661835748794, "grad_norm": 1.3594614267349243, "learning_rate": 0.001, "loss": 1.7111, "step": 556528 }, { "epoch": 48.01449275362319, "grad_norm": 0.6443732976913452, "learning_rate": 0.001, "loss": 1.7124, "step": 556584 }, { "epoch": 48.01932367149758, "grad_norm": 15.07887077331543, "learning_rate": 0.001, "loss": 1.7139, "step": 556640 }, { "epoch": 48.02415458937198, "grad_norm": 3.3458821773529053, "learning_rate": 0.001, "loss": 1.7082, "step": 556696 }, { "epoch": 48.028985507246375, "grad_norm": 4.041109561920166, "learning_rate": 0.001, "loss": 1.728, "step": 556752 }, { "epoch": 48.033816425120776, "grad_norm": 2.4486095905303955, "learning_rate": 0.001, "loss": 1.7207, "step": 556808 }, { "epoch": 48.03864734299517, "grad_norm": 4.796956539154053, "learning_rate": 0.001, "loss": 1.7232, "step": 556864 }, { "epoch": 48.04347826086956, "grad_norm": 0.46816253662109375, "learning_rate": 0.001, "loss": 1.7198, "step": 556920 }, { "epoch": 48.04830917874396, "grad_norm": 0.3836163282394409, "learning_rate": 0.001, "loss": 1.7177, "step": 556976 }, { "epoch": 48.05314009661836, "grad_norm": 1.2982035875320435, "learning_rate": 0.001, "loss": 1.7114, "step": 557032 }, { "epoch": 48.05797101449275, "grad_norm": 0.3175109326839447, "learning_rate": 0.001, "loss": 1.7396, "step": 557088 }, { "epoch": 48.06280193236715, "grad_norm": 4.622042179107666, "learning_rate": 0.001, "loss": 1.7247, "step": 557144 }, { "epoch": 48.067632850241544, "grad_norm": 0.27719756960868835, "learning_rate": 0.001, "loss": 1.7267, "step": 557200 }, { "epoch": 48.072463768115945, "grad_norm": 0.2828962206840515, "learning_rate": 0.001, "loss": 1.7265, "step": 557256 }, { "epoch": 48.07729468599034, "grad_norm": 0.36798495054244995, "learning_rate": 0.001, "loss": 1.7219, "step": 557312 }, { "epoch": 48.08212560386473, "grad_norm": 0.2621324956417084, "learning_rate": 0.001, "loss": 1.7188, "step": 557368 }, { "epoch": 48.08695652173913, "grad_norm": 0.471993625164032, "learning_rate": 0.001, "loss": 1.7236, "step": 557424 }, { "epoch": 48.091787439613526, "grad_norm": 0.6498216986656189, "learning_rate": 0.001, "loss": 1.7256, "step": 557480 }, { "epoch": 48.09661835748792, "grad_norm": 0.9547773599624634, "learning_rate": 0.001, "loss": 1.719, "step": 557536 }, { "epoch": 48.10144927536232, "grad_norm": 0.904961109161377, "learning_rate": 0.001, "loss": 1.7269, "step": 557592 }, { "epoch": 48.106280193236714, "grad_norm": 0.3333018124103546, "learning_rate": 0.001, "loss": 1.7224, "step": 557648 }, { "epoch": 48.111111111111114, "grad_norm": 0.2940675616264343, "learning_rate": 0.001, "loss": 1.7216, "step": 557704 }, { "epoch": 48.11594202898551, "grad_norm": 0.3581346869468689, "learning_rate": 0.001, "loss": 1.7253, "step": 557760 }, { "epoch": 48.1207729468599, "grad_norm": 2.423856735229492, "learning_rate": 0.001, "loss": 1.7238, "step": 557816 }, { "epoch": 48.1256038647343, "grad_norm": 0.5842982530593872, "learning_rate": 0.001, "loss": 1.7433, "step": 557872 }, { "epoch": 48.130434782608695, "grad_norm": 1.020571231842041, "learning_rate": 0.001, "loss": 1.746, "step": 557928 }, { "epoch": 48.13526570048309, "grad_norm": 2.6577956676483154, "learning_rate": 0.001, "loss": 1.7533, "step": 557984 }, { "epoch": 48.14009661835749, "grad_norm": 0.7910611033439636, "learning_rate": 0.001, "loss": 1.7539, "step": 558040 }, { "epoch": 48.14492753623188, "grad_norm": 0.5281345248222351, "learning_rate": 0.001, "loss": 1.7597, "step": 558096 }, { "epoch": 48.14975845410628, "grad_norm": 1.455918312072754, "learning_rate": 0.001, "loss": 1.7369, "step": 558152 }, { "epoch": 48.15458937198068, "grad_norm": 0.8949226140975952, "learning_rate": 0.001, "loss": 1.7358, "step": 558208 }, { "epoch": 48.15942028985507, "grad_norm": 2.0396342277526855, "learning_rate": 0.001, "loss": 1.7451, "step": 558264 }, { "epoch": 48.16425120772947, "grad_norm": 0.5577055215835571, "learning_rate": 0.001, "loss": 1.7322, "step": 558320 }, { "epoch": 48.169082125603865, "grad_norm": 4.517914295196533, "learning_rate": 0.001, "loss": 1.736, "step": 558376 }, { "epoch": 48.17391304347826, "grad_norm": 1.4687488079071045, "learning_rate": 0.001, "loss": 1.7373, "step": 558432 }, { "epoch": 48.17874396135266, "grad_norm": 0.3286985456943512, "learning_rate": 0.001, "loss": 1.734, "step": 558488 }, { "epoch": 48.18357487922705, "grad_norm": 1.4441635608673096, "learning_rate": 0.001, "loss": 1.7384, "step": 558544 }, { "epoch": 48.18840579710145, "grad_norm": 0.964182436466217, "learning_rate": 0.001, "loss": 1.7405, "step": 558600 }, { "epoch": 48.193236714975846, "grad_norm": 1.1161614656448364, "learning_rate": 0.001, "loss": 1.7311, "step": 558656 }, { "epoch": 48.19806763285024, "grad_norm": 2.071441173553467, "learning_rate": 0.001, "loss": 1.725, "step": 558712 }, { "epoch": 48.20289855072464, "grad_norm": 0.32621249556541443, "learning_rate": 0.001, "loss": 1.727, "step": 558768 }, { "epoch": 48.207729468599034, "grad_norm": 0.7437166571617126, "learning_rate": 0.001, "loss": 1.7314, "step": 558824 }, { "epoch": 48.21256038647343, "grad_norm": 2.8595383167266846, "learning_rate": 0.001, "loss": 1.7421, "step": 558880 }, { "epoch": 48.21739130434783, "grad_norm": 0.6153905987739563, "learning_rate": 0.001, "loss": 1.7388, "step": 558936 }, { "epoch": 48.22222222222222, "grad_norm": 0.3360987901687622, "learning_rate": 0.001, "loss": 1.7218, "step": 558992 }, { "epoch": 48.227053140096615, "grad_norm": 3.065673828125, "learning_rate": 0.001, "loss": 1.7176, "step": 559048 }, { "epoch": 48.231884057971016, "grad_norm": 0.512309193611145, "learning_rate": 0.001, "loss": 1.7125, "step": 559104 }, { "epoch": 48.23671497584541, "grad_norm": 0.3625718951225281, "learning_rate": 0.001, "loss": 1.7143, "step": 559160 }, { "epoch": 48.24154589371981, "grad_norm": 0.4617823660373688, "learning_rate": 0.001, "loss": 1.7206, "step": 559216 }, { "epoch": 48.2463768115942, "grad_norm": 0.7270511388778687, "learning_rate": 0.001, "loss": 1.7171, "step": 559272 }, { "epoch": 48.2512077294686, "grad_norm": 0.46190470457077026, "learning_rate": 0.001, "loss": 1.714, "step": 559328 }, { "epoch": 48.256038647343, "grad_norm": 0.7493250370025635, "learning_rate": 0.001, "loss": 1.7245, "step": 559384 }, { "epoch": 48.26086956521739, "grad_norm": 1.0108582973480225, "learning_rate": 0.001, "loss": 1.7223, "step": 559440 }, { "epoch": 48.265700483091784, "grad_norm": 3.6153907775878906, "learning_rate": 0.001, "loss": 1.7162, "step": 559496 }, { "epoch": 48.270531400966185, "grad_norm": 0.6265458464622498, "learning_rate": 0.001, "loss": 1.7295, "step": 559552 }, { "epoch": 48.27536231884058, "grad_norm": 0.45238104462623596, "learning_rate": 0.001, "loss": 1.7308, "step": 559608 }, { "epoch": 48.28019323671498, "grad_norm": 0.5736666917800903, "learning_rate": 0.001, "loss": 1.7225, "step": 559664 }, { "epoch": 48.28502415458937, "grad_norm": 0.2857624292373657, "learning_rate": 0.001, "loss": 1.729, "step": 559720 }, { "epoch": 48.289855072463766, "grad_norm": 0.4967600703239441, "learning_rate": 0.001, "loss": 1.7281, "step": 559776 }, { "epoch": 48.29468599033817, "grad_norm": 0.339504212141037, "learning_rate": 0.001, "loss": 1.7297, "step": 559832 }, { "epoch": 48.29951690821256, "grad_norm": 3.0593347549438477, "learning_rate": 0.001, "loss": 1.739, "step": 559888 }, { "epoch": 48.30434782608695, "grad_norm": 0.3582685589790344, "learning_rate": 0.001, "loss": 1.7407, "step": 559944 }, { "epoch": 48.309178743961354, "grad_norm": 0.45325610041618347, "learning_rate": 0.001, "loss": 1.736, "step": 560000 }, { "epoch": 48.31400966183575, "grad_norm": 0.7752774357795715, "learning_rate": 0.001, "loss": 1.7346, "step": 560056 }, { "epoch": 48.31884057971015, "grad_norm": 3.353123664855957, "learning_rate": 0.001, "loss": 1.7296, "step": 560112 }, { "epoch": 48.32367149758454, "grad_norm": 22.690156936645508, "learning_rate": 0.001, "loss": 1.7245, "step": 560168 }, { "epoch": 48.328502415458935, "grad_norm": 0.4600237011909485, "learning_rate": 0.001, "loss": 1.7328, "step": 560224 }, { "epoch": 48.333333333333336, "grad_norm": 0.3787469267845154, "learning_rate": 0.001, "loss": 1.7213, "step": 560280 }, { "epoch": 48.33816425120773, "grad_norm": 0.3996583819389343, "learning_rate": 0.001, "loss": 1.72, "step": 560336 }, { "epoch": 48.34299516908212, "grad_norm": 0.34201109409332275, "learning_rate": 0.001, "loss": 1.7196, "step": 560392 }, { "epoch": 48.34782608695652, "grad_norm": 0.4338032603263855, "learning_rate": 0.001, "loss": 1.7309, "step": 560448 }, { "epoch": 48.35265700483092, "grad_norm": 1.1121406555175781, "learning_rate": 0.001, "loss": 1.7262, "step": 560504 }, { "epoch": 48.35748792270532, "grad_norm": 0.32640036940574646, "learning_rate": 0.001, "loss": 1.7242, "step": 560560 }, { "epoch": 48.36231884057971, "grad_norm": 3.5201947689056396, "learning_rate": 0.001, "loss": 1.7222, "step": 560616 }, { "epoch": 48.367149758454104, "grad_norm": 2.2100040912628174, "learning_rate": 0.001, "loss": 1.7124, "step": 560672 }, { "epoch": 48.371980676328505, "grad_norm": 3.5138890743255615, "learning_rate": 0.001, "loss": 1.7193, "step": 560728 }, { "epoch": 48.3768115942029, "grad_norm": 0.6353521347045898, "learning_rate": 0.001, "loss": 1.7266, "step": 560784 }, { "epoch": 48.38164251207729, "grad_norm": 0.39309829473495483, "learning_rate": 0.001, "loss": 1.7168, "step": 560840 }, { "epoch": 48.38647342995169, "grad_norm": 0.8769246935844421, "learning_rate": 0.001, "loss": 1.7295, "step": 560896 }, { "epoch": 48.391304347826086, "grad_norm": 1.155418872833252, "learning_rate": 0.001, "loss": 1.7255, "step": 560952 }, { "epoch": 48.39613526570048, "grad_norm": 0.478248655796051, "learning_rate": 0.001, "loss": 1.7193, "step": 561008 }, { "epoch": 48.40096618357488, "grad_norm": 0.8789826035499573, "learning_rate": 0.001, "loss": 1.7315, "step": 561064 }, { "epoch": 48.405797101449274, "grad_norm": 0.47951775789260864, "learning_rate": 0.001, "loss": 1.7256, "step": 561120 }, { "epoch": 48.410628019323674, "grad_norm": 0.5099565386772156, "learning_rate": 0.001, "loss": 1.7193, "step": 561176 }, { "epoch": 48.41545893719807, "grad_norm": 1.1364742517471313, "learning_rate": 0.001, "loss": 1.7286, "step": 561232 }, { "epoch": 48.42028985507246, "grad_norm": 5.860898017883301, "learning_rate": 0.001, "loss": 1.7191, "step": 561288 }, { "epoch": 48.42512077294686, "grad_norm": 0.3071814477443695, "learning_rate": 0.001, "loss": 1.7247, "step": 561344 }, { "epoch": 48.429951690821255, "grad_norm": 0.2678964138031006, "learning_rate": 0.001, "loss": 1.7182, "step": 561400 }, { "epoch": 48.43478260869565, "grad_norm": 0.5519323945045471, "learning_rate": 0.001, "loss": 1.7203, "step": 561456 }, { "epoch": 48.43961352657005, "grad_norm": 0.9246322512626648, "learning_rate": 0.001, "loss": 1.7225, "step": 561512 }, { "epoch": 48.44444444444444, "grad_norm": 0.350729376077652, "learning_rate": 0.001, "loss": 1.7248, "step": 561568 }, { "epoch": 48.44927536231884, "grad_norm": 3.221393346786499, "learning_rate": 0.001, "loss": 1.7186, "step": 561624 }, { "epoch": 48.45410628019324, "grad_norm": 0.2818219065666199, "learning_rate": 0.001, "loss": 1.7366, "step": 561680 }, { "epoch": 48.45893719806763, "grad_norm": 0.3236127197742462, "learning_rate": 0.001, "loss": 1.7378, "step": 561736 }, { "epoch": 48.46376811594203, "grad_norm": 0.8377506732940674, "learning_rate": 0.001, "loss": 1.7331, "step": 561792 }, { "epoch": 48.468599033816425, "grad_norm": 0.2894747853279114, "learning_rate": 0.001, "loss": 1.7384, "step": 561848 }, { "epoch": 48.47342995169082, "grad_norm": 0.23993971943855286, "learning_rate": 0.001, "loss": 1.7274, "step": 561904 }, { "epoch": 48.47826086956522, "grad_norm": 18.647411346435547, "learning_rate": 0.001, "loss": 1.7194, "step": 561960 }, { "epoch": 48.48309178743961, "grad_norm": 1.3642023801803589, "learning_rate": 0.001, "loss": 1.7371, "step": 562016 }, { "epoch": 48.48792270531401, "grad_norm": 1.2759517431259155, "learning_rate": 0.001, "loss": 1.7269, "step": 562072 }, { "epoch": 48.492753623188406, "grad_norm": 1.9285154342651367, "learning_rate": 0.001, "loss": 1.7282, "step": 562128 }, { "epoch": 48.4975845410628, "grad_norm": 0.509515643119812, "learning_rate": 0.001, "loss": 1.7214, "step": 562184 }, { "epoch": 48.5024154589372, "grad_norm": 0.31755873560905457, "learning_rate": 0.001, "loss": 1.7186, "step": 562240 }, { "epoch": 48.507246376811594, "grad_norm": 1.1850541830062866, "learning_rate": 0.001, "loss": 1.7084, "step": 562296 }, { "epoch": 48.51207729468599, "grad_norm": 1.3775087594985962, "learning_rate": 0.001, "loss": 1.7238, "step": 562352 }, { "epoch": 48.51690821256039, "grad_norm": 1.3933364152908325, "learning_rate": 0.001, "loss": 1.7394, "step": 562408 }, { "epoch": 48.52173913043478, "grad_norm": 1.7489244937896729, "learning_rate": 0.001, "loss": 1.7362, "step": 562464 }, { "epoch": 48.52657004830918, "grad_norm": 0.9871833920478821, "learning_rate": 0.001, "loss": 1.7418, "step": 562520 }, { "epoch": 48.531400966183575, "grad_norm": 2.2161624431610107, "learning_rate": 0.001, "loss": 1.7406, "step": 562576 }, { "epoch": 48.53623188405797, "grad_norm": 0.2898215353488922, "learning_rate": 0.001, "loss": 1.7387, "step": 562632 }, { "epoch": 48.54106280193237, "grad_norm": 4.362494945526123, "learning_rate": 0.001, "loss": 1.7382, "step": 562688 }, { "epoch": 48.54589371980676, "grad_norm": 0.8695025444030762, "learning_rate": 0.001, "loss": 1.7449, "step": 562744 }, { "epoch": 48.55072463768116, "grad_norm": 0.610435962677002, "learning_rate": 0.001, "loss": 1.7422, "step": 562800 }, { "epoch": 48.55555555555556, "grad_norm": 1.1652413606643677, "learning_rate": 0.001, "loss": 1.7444, "step": 562856 }, { "epoch": 48.56038647342995, "grad_norm": 0.2796342968940735, "learning_rate": 0.001, "loss": 1.7417, "step": 562912 }, { "epoch": 48.56521739130435, "grad_norm": 0.6395411491394043, "learning_rate": 0.001, "loss": 1.741, "step": 562968 }, { "epoch": 48.570048309178745, "grad_norm": 3.499129295349121, "learning_rate": 0.001, "loss": 1.7342, "step": 563024 }, { "epoch": 48.57487922705314, "grad_norm": 0.3522055149078369, "learning_rate": 0.001, "loss": 1.7374, "step": 563080 }, { "epoch": 48.57971014492754, "grad_norm": 0.4047297239303589, "learning_rate": 0.001, "loss": 1.7255, "step": 563136 }, { "epoch": 48.58454106280193, "grad_norm": 0.3754846751689911, "learning_rate": 0.001, "loss": 1.73, "step": 563192 }, { "epoch": 48.589371980676326, "grad_norm": 0.2958768606185913, "learning_rate": 0.001, "loss": 1.7374, "step": 563248 }, { "epoch": 48.594202898550726, "grad_norm": 1.9761005640029907, "learning_rate": 0.001, "loss": 1.7299, "step": 563304 }, { "epoch": 48.59903381642512, "grad_norm": 0.9507854580879211, "learning_rate": 0.001, "loss": 1.7302, "step": 563360 }, { "epoch": 48.60386473429952, "grad_norm": 0.672998309135437, "learning_rate": 0.001, "loss": 1.7193, "step": 563416 }, { "epoch": 48.608695652173914, "grad_norm": 0.28299403190612793, "learning_rate": 0.001, "loss": 1.7179, "step": 563472 }, { "epoch": 48.61352657004831, "grad_norm": 0.9326180815696716, "learning_rate": 0.001, "loss": 1.733, "step": 563528 }, { "epoch": 48.61835748792271, "grad_norm": 0.8623196482658386, "learning_rate": 0.001, "loss": 1.7325, "step": 563584 }, { "epoch": 48.6231884057971, "grad_norm": 0.30137598514556885, "learning_rate": 0.001, "loss": 1.7259, "step": 563640 }, { "epoch": 48.628019323671495, "grad_norm": 7.764924049377441, "learning_rate": 0.001, "loss": 1.7308, "step": 563696 }, { "epoch": 48.632850241545896, "grad_norm": 0.28031742572784424, "learning_rate": 0.001, "loss": 1.7154, "step": 563752 }, { "epoch": 48.63768115942029, "grad_norm": 1.5746824741363525, "learning_rate": 0.001, "loss": 1.717, "step": 563808 }, { "epoch": 48.64251207729468, "grad_norm": 0.2937065362930298, "learning_rate": 0.001, "loss": 1.7229, "step": 563864 }, { "epoch": 48.64734299516908, "grad_norm": 0.23285731673240662, "learning_rate": 0.001, "loss": 1.7159, "step": 563920 }, { "epoch": 48.65217391304348, "grad_norm": 0.27084076404571533, "learning_rate": 0.001, "loss": 1.7165, "step": 563976 }, { "epoch": 48.65700483091788, "grad_norm": 3.0396528244018555, "learning_rate": 0.001, "loss": 1.7235, "step": 564032 }, { "epoch": 48.66183574879227, "grad_norm": 0.2727288007736206, "learning_rate": 0.001, "loss": 1.7159, "step": 564088 }, { "epoch": 48.666666666666664, "grad_norm": 0.7118105292320251, "learning_rate": 0.001, "loss": 1.7382, "step": 564144 }, { "epoch": 48.671497584541065, "grad_norm": 0.3286385238170624, "learning_rate": 0.001, "loss": 1.7392, "step": 564200 }, { "epoch": 48.67632850241546, "grad_norm": 0.7671298980712891, "learning_rate": 0.001, "loss": 1.7346, "step": 564256 }, { "epoch": 48.68115942028985, "grad_norm": 1.0957244634628296, "learning_rate": 0.001, "loss": 1.7303, "step": 564312 }, { "epoch": 48.68599033816425, "grad_norm": 0.6638689041137695, "learning_rate": 0.001, "loss": 1.7273, "step": 564368 }, { "epoch": 48.690821256038646, "grad_norm": 1.309252142906189, "learning_rate": 0.001, "loss": 1.7141, "step": 564424 }, { "epoch": 48.69565217391305, "grad_norm": 0.7591997981071472, "learning_rate": 0.001, "loss": 1.7265, "step": 564480 }, { "epoch": 48.70048309178744, "grad_norm": 0.3621346354484558, "learning_rate": 0.001, "loss": 1.7226, "step": 564536 }, { "epoch": 48.70531400966183, "grad_norm": 2.139744520187378, "learning_rate": 0.001, "loss": 1.7339, "step": 564592 }, { "epoch": 48.710144927536234, "grad_norm": 1.14698326587677, "learning_rate": 0.001, "loss": 1.7357, "step": 564648 }, { "epoch": 48.71497584541063, "grad_norm": 0.3161656856536865, "learning_rate": 0.001, "loss": 1.7294, "step": 564704 }, { "epoch": 48.71980676328502, "grad_norm": 1.520839810371399, "learning_rate": 0.001, "loss": 1.7531, "step": 564760 }, { "epoch": 48.72463768115942, "grad_norm": 1.4619337320327759, "learning_rate": 0.001, "loss": 1.7615, "step": 564816 }, { "epoch": 48.729468599033815, "grad_norm": 0.27585041522979736, "learning_rate": 0.001, "loss": 1.7472, "step": 564872 }, { "epoch": 48.734299516908216, "grad_norm": 0.7427679300308228, "learning_rate": 0.001, "loss": 1.7485, "step": 564928 }, { "epoch": 48.73913043478261, "grad_norm": 0.36053940653800964, "learning_rate": 0.001, "loss": 1.7474, "step": 564984 }, { "epoch": 48.743961352657, "grad_norm": 0.31082770228385925, "learning_rate": 0.001, "loss": 1.74, "step": 565040 }, { "epoch": 48.7487922705314, "grad_norm": 0.5567958950996399, "learning_rate": 0.001, "loss": 1.7454, "step": 565096 }, { "epoch": 48.7536231884058, "grad_norm": 3.210038185119629, "learning_rate": 0.001, "loss": 1.7451, "step": 565152 }, { "epoch": 48.75845410628019, "grad_norm": 0.27859172224998474, "learning_rate": 0.001, "loss": 1.7382, "step": 565208 }, { "epoch": 48.76328502415459, "grad_norm": 0.35330328345298767, "learning_rate": 0.001, "loss": 1.7415, "step": 565264 }, { "epoch": 48.768115942028984, "grad_norm": 0.25735074281692505, "learning_rate": 0.001, "loss": 1.7331, "step": 565320 }, { "epoch": 48.772946859903385, "grad_norm": 1.1551278829574585, "learning_rate": 0.001, "loss": 1.7343, "step": 565376 }, { "epoch": 48.77777777777778, "grad_norm": 4.618515968322754, "learning_rate": 0.001, "loss": 1.7353, "step": 565432 }, { "epoch": 48.78260869565217, "grad_norm": 0.4553910791873932, "learning_rate": 0.001, "loss": 1.7381, "step": 565488 }, { "epoch": 48.78743961352657, "grad_norm": 1.0614721775054932, "learning_rate": 0.001, "loss": 1.7347, "step": 565544 }, { "epoch": 48.792270531400966, "grad_norm": 0.30600351095199585, "learning_rate": 0.001, "loss": 1.7432, "step": 565600 }, { "epoch": 48.79710144927536, "grad_norm": 0.9706910848617554, "learning_rate": 0.001, "loss": 1.7419, "step": 565656 }, { "epoch": 48.80193236714976, "grad_norm": 0.7892006635665894, "learning_rate": 0.001, "loss": 1.7442, "step": 565712 }, { "epoch": 48.806763285024154, "grad_norm": 0.31674396991729736, "learning_rate": 0.001, "loss": 1.7409, "step": 565768 }, { "epoch": 48.81159420289855, "grad_norm": 0.873042106628418, "learning_rate": 0.001, "loss": 1.7434, "step": 565824 }, { "epoch": 48.81642512077295, "grad_norm": 0.24458180367946625, "learning_rate": 0.001, "loss": 1.7448, "step": 565880 }, { "epoch": 48.82125603864734, "grad_norm": 0.7202498912811279, "learning_rate": 0.001, "loss": 1.7505, "step": 565936 }, { "epoch": 48.82608695652174, "grad_norm": 0.49932006001472473, "learning_rate": 0.001, "loss": 1.7559, "step": 565992 }, { "epoch": 48.830917874396135, "grad_norm": 0.266579806804657, "learning_rate": 0.001, "loss": 1.7474, "step": 566048 }, { "epoch": 48.83574879227053, "grad_norm": 0.8755822777748108, "learning_rate": 0.001, "loss": 1.7364, "step": 566104 }, { "epoch": 48.84057971014493, "grad_norm": 0.49510353803634644, "learning_rate": 0.001, "loss": 1.7396, "step": 566160 }, { "epoch": 48.84541062801932, "grad_norm": 1.3598757982254028, "learning_rate": 0.001, "loss": 1.7399, "step": 566216 }, { "epoch": 48.85024154589372, "grad_norm": 0.5199295878410339, "learning_rate": 0.001, "loss": 1.7408, "step": 566272 }, { "epoch": 48.85507246376812, "grad_norm": 0.2606726288795471, "learning_rate": 0.001, "loss": 1.7495, "step": 566328 }, { "epoch": 48.85990338164251, "grad_norm": 2.651616096496582, "learning_rate": 0.001, "loss": 1.7447, "step": 566384 }, { "epoch": 48.86473429951691, "grad_norm": 0.2846638858318329, "learning_rate": 0.001, "loss": 1.7244, "step": 566440 }, { "epoch": 48.869565217391305, "grad_norm": 0.7517786622047424, "learning_rate": 0.001, "loss": 1.728, "step": 566496 }, { "epoch": 48.8743961352657, "grad_norm": 0.5236542224884033, "learning_rate": 0.001, "loss": 1.7325, "step": 566552 }, { "epoch": 48.8792270531401, "grad_norm": 5.7897868156433105, "learning_rate": 0.001, "loss": 1.7352, "step": 566608 }, { "epoch": 48.88405797101449, "grad_norm": 1.2427327632904053, "learning_rate": 0.001, "loss": 1.741, "step": 566664 }, { "epoch": 48.888888888888886, "grad_norm": 0.5720710754394531, "learning_rate": 0.001, "loss": 1.7366, "step": 566720 }, { "epoch": 48.893719806763286, "grad_norm": 0.4397614598274231, "learning_rate": 0.001, "loss": 1.7368, "step": 566776 }, { "epoch": 48.89855072463768, "grad_norm": 0.35932597517967224, "learning_rate": 0.001, "loss": 1.739, "step": 566832 }, { "epoch": 48.90338164251208, "grad_norm": 0.36754271388053894, "learning_rate": 0.001, "loss": 1.742, "step": 566888 }, { "epoch": 48.908212560386474, "grad_norm": 1.1754306554794312, "learning_rate": 0.001, "loss": 1.7379, "step": 566944 }, { "epoch": 48.91304347826087, "grad_norm": 0.3826800584793091, "learning_rate": 0.001, "loss": 1.7385, "step": 567000 }, { "epoch": 48.91787439613527, "grad_norm": 0.24651701748371124, "learning_rate": 0.001, "loss": 1.7326, "step": 567056 }, { "epoch": 48.92270531400966, "grad_norm": 0.5642040967941284, "learning_rate": 0.001, "loss": 1.7325, "step": 567112 }, { "epoch": 48.927536231884055, "grad_norm": 0.3014202117919922, "learning_rate": 0.001, "loss": 1.7398, "step": 567168 }, { "epoch": 48.932367149758456, "grad_norm": 0.438322514295578, "learning_rate": 0.001, "loss": 1.7245, "step": 567224 }, { "epoch": 48.93719806763285, "grad_norm": 0.35445329546928406, "learning_rate": 0.001, "loss": 1.7346, "step": 567280 }, { "epoch": 48.94202898550725, "grad_norm": 0.3005673289299011, "learning_rate": 0.001, "loss": 1.7378, "step": 567336 }, { "epoch": 48.94685990338164, "grad_norm": 0.6546096205711365, "learning_rate": 0.001, "loss": 1.7342, "step": 567392 }, { "epoch": 48.95169082125604, "grad_norm": 0.28775474429130554, "learning_rate": 0.001, "loss": 1.733, "step": 567448 }, { "epoch": 48.95652173913044, "grad_norm": 32.09855651855469, "learning_rate": 0.001, "loss": 1.727, "step": 567504 }, { "epoch": 48.96135265700483, "grad_norm": 0.304426908493042, "learning_rate": 0.001, "loss": 1.736, "step": 567560 }, { "epoch": 48.966183574879224, "grad_norm": 0.29832354187965393, "learning_rate": 0.001, "loss": 1.742, "step": 567616 }, { "epoch": 48.971014492753625, "grad_norm": 0.4310396611690521, "learning_rate": 0.001, "loss": 1.7321, "step": 567672 }, { "epoch": 48.97584541062802, "grad_norm": 0.36771413683891296, "learning_rate": 0.001, "loss": 1.7349, "step": 567728 }, { "epoch": 48.98067632850242, "grad_norm": 0.60688316822052, "learning_rate": 0.001, "loss": 1.7329, "step": 567784 }, { "epoch": 48.98550724637681, "grad_norm": 4.7237019538879395, "learning_rate": 0.001, "loss": 1.7397, "step": 567840 }, { "epoch": 48.990338164251206, "grad_norm": 0.3570004999637604, "learning_rate": 0.001, "loss": 1.7487, "step": 567896 }, { "epoch": 48.99516908212561, "grad_norm": 0.3104717433452606, "learning_rate": 0.001, "loss": 1.7467, "step": 567952 }, { "epoch": 49.0, "grad_norm": 0.5126457214355469, "learning_rate": 0.001, "loss": 1.7367, "step": 568008 }, { "epoch": 49.00483091787439, "grad_norm": 0.5701302289962769, "learning_rate": 0.001, "loss": 1.7067, "step": 568064 }, { "epoch": 49.009661835748794, "grad_norm": 3.032111406326294, "learning_rate": 0.001, "loss": 1.704, "step": 568120 }, { "epoch": 49.01449275362319, "grad_norm": 1.6264134645462036, "learning_rate": 0.001, "loss": 1.7269, "step": 568176 }, { "epoch": 49.01932367149758, "grad_norm": 0.37111741304397583, "learning_rate": 0.001, "loss": 1.7134, "step": 568232 }, { "epoch": 49.02415458937198, "grad_norm": 0.3367540240287781, "learning_rate": 0.001, "loss": 1.7019, "step": 568288 }, { "epoch": 49.028985507246375, "grad_norm": 0.6716726422309875, "learning_rate": 0.001, "loss": 1.7177, "step": 568344 }, { "epoch": 49.033816425120776, "grad_norm": 0.5578674674034119, "learning_rate": 0.001, "loss": 1.7171, "step": 568400 }, { "epoch": 49.03864734299517, "grad_norm": 0.2774312198162079, "learning_rate": 0.001, "loss": 1.7078, "step": 568456 }, { "epoch": 49.04347826086956, "grad_norm": 0.3385161757469177, "learning_rate": 0.001, "loss": 1.7065, "step": 568512 }, { "epoch": 49.04830917874396, "grad_norm": 1.0968073606491089, "learning_rate": 0.001, "loss": 1.7201, "step": 568568 }, { "epoch": 49.05314009661836, "grad_norm": 2.4758543968200684, "learning_rate": 0.001, "loss": 1.711, "step": 568624 }, { "epoch": 49.05797101449275, "grad_norm": 27.111942291259766, "learning_rate": 0.001, "loss": 1.7196, "step": 568680 }, { "epoch": 49.06280193236715, "grad_norm": 0.7593479156494141, "learning_rate": 0.001, "loss": 1.7196, "step": 568736 }, { "epoch": 49.067632850241544, "grad_norm": 17.206069946289062, "learning_rate": 0.001, "loss": 1.729, "step": 568792 }, { "epoch": 49.072463768115945, "grad_norm": 0.28531450033187866, "learning_rate": 0.001, "loss": 1.737, "step": 568848 }, { "epoch": 49.07729468599034, "grad_norm": 2.2749199867248535, "learning_rate": 0.001, "loss": 1.7353, "step": 568904 }, { "epoch": 49.08212560386473, "grad_norm": 1.751023769378662, "learning_rate": 0.001, "loss": 1.7164, "step": 568960 }, { "epoch": 49.08695652173913, "grad_norm": 3.920602321624756, "learning_rate": 0.001, "loss": 1.7172, "step": 569016 }, { "epoch": 49.091787439613526, "grad_norm": 0.4069245457649231, "learning_rate": 0.001, "loss": 1.7188, "step": 569072 }, { "epoch": 49.09661835748792, "grad_norm": 0.28642383217811584, "learning_rate": 0.001, "loss": 1.7196, "step": 569128 }, { "epoch": 49.10144927536232, "grad_norm": 1.017494797706604, "learning_rate": 0.001, "loss": 1.717, "step": 569184 }, { "epoch": 49.106280193236714, "grad_norm": 55.02179718017578, "learning_rate": 0.001, "loss": 1.7366, "step": 569240 }, { "epoch": 49.111111111111114, "grad_norm": 0.4570782482624054, "learning_rate": 0.001, "loss": 1.7311, "step": 569296 }, { "epoch": 49.11594202898551, "grad_norm": 1.3910317420959473, "learning_rate": 0.001, "loss": 1.733, "step": 569352 }, { "epoch": 49.1207729468599, "grad_norm": 2.5197677612304688, "learning_rate": 0.001, "loss": 1.7288, "step": 569408 }, { "epoch": 49.1256038647343, "grad_norm": 0.28853344917297363, "learning_rate": 0.001, "loss": 1.737, "step": 569464 }, { "epoch": 49.130434782608695, "grad_norm": 0.3744446933269501, "learning_rate": 0.001, "loss": 1.7444, "step": 569520 }, { "epoch": 49.13526570048309, "grad_norm": 1.2704921960830688, "learning_rate": 0.001, "loss": 1.7524, "step": 569576 }, { "epoch": 49.14009661835749, "grad_norm": 0.31797486543655396, "learning_rate": 0.001, "loss": 1.7393, "step": 569632 }, { "epoch": 49.14492753623188, "grad_norm": 0.3209443986415863, "learning_rate": 0.001, "loss": 1.7478, "step": 569688 }, { "epoch": 49.14975845410628, "grad_norm": 0.3100528120994568, "learning_rate": 0.001, "loss": 1.7368, "step": 569744 }, { "epoch": 49.15458937198068, "grad_norm": 0.38484281301498413, "learning_rate": 0.001, "loss": 1.7283, "step": 569800 }, { "epoch": 49.15942028985507, "grad_norm": 3.3547329902648926, "learning_rate": 0.001, "loss": 1.7298, "step": 569856 }, { "epoch": 49.16425120772947, "grad_norm": 1.142453670501709, "learning_rate": 0.001, "loss": 1.7264, "step": 569912 }, { "epoch": 49.169082125603865, "grad_norm": 0.5741724967956543, "learning_rate": 0.001, "loss": 1.7185, "step": 569968 }, { "epoch": 49.17391304347826, "grad_norm": 0.36683547496795654, "learning_rate": 0.001, "loss": 1.7221, "step": 570024 }, { "epoch": 49.17874396135266, "grad_norm": 0.884395956993103, "learning_rate": 0.001, "loss": 1.7208, "step": 570080 }, { "epoch": 49.18357487922705, "grad_norm": 0.671528697013855, "learning_rate": 0.001, "loss": 1.72, "step": 570136 }, { "epoch": 49.18840579710145, "grad_norm": 0.6723687052726746, "learning_rate": 0.001, "loss": 1.7151, "step": 570192 }, { "epoch": 49.193236714975846, "grad_norm": 0.30434340238571167, "learning_rate": 0.001, "loss": 1.7196, "step": 570248 }, { "epoch": 49.19806763285024, "grad_norm": 0.8924299478530884, "learning_rate": 0.001, "loss": 1.7141, "step": 570304 }, { "epoch": 49.20289855072464, "grad_norm": 0.4536803662776947, "learning_rate": 0.001, "loss": 1.717, "step": 570360 }, { "epoch": 49.207729468599034, "grad_norm": 4.203451156616211, "learning_rate": 0.001, "loss": 1.7177, "step": 570416 }, { "epoch": 49.21256038647343, "grad_norm": 0.7614284753799438, "learning_rate": 0.001, "loss": 1.7179, "step": 570472 }, { "epoch": 49.21739130434783, "grad_norm": 1.0777647495269775, "learning_rate": 0.001, "loss": 1.7162, "step": 570528 }, { "epoch": 49.22222222222222, "grad_norm": 0.3041408658027649, "learning_rate": 0.001, "loss": 1.7108, "step": 570584 }, { "epoch": 49.227053140096615, "grad_norm": 1.3315868377685547, "learning_rate": 0.001, "loss": 1.7292, "step": 570640 }, { "epoch": 49.231884057971016, "grad_norm": 8.672272682189941, "learning_rate": 0.001, "loss": 1.7259, "step": 570696 }, { "epoch": 49.23671497584541, "grad_norm": 3.1668944358825684, "learning_rate": 0.001, "loss": 1.7381, "step": 570752 }, { "epoch": 49.24154589371981, "grad_norm": 7.479694843292236, "learning_rate": 0.001, "loss": 1.7296, "step": 570808 }, { "epoch": 49.2463768115942, "grad_norm": 0.46210435032844543, "learning_rate": 0.001, "loss": 1.7243, "step": 570864 }, { "epoch": 49.2512077294686, "grad_norm": 16.812578201293945, "learning_rate": 0.001, "loss": 1.7334, "step": 570920 }, { "epoch": 49.256038647343, "grad_norm": 2.497494697570801, "learning_rate": 0.001, "loss": 1.7318, "step": 570976 }, { "epoch": 49.26086956521739, "grad_norm": 0.4537156820297241, "learning_rate": 0.001, "loss": 1.7361, "step": 571032 }, { "epoch": 49.265700483091784, "grad_norm": 0.8682971000671387, "learning_rate": 0.001, "loss": 1.7264, "step": 571088 }, { "epoch": 49.270531400966185, "grad_norm": 0.8241739273071289, "learning_rate": 0.001, "loss": 1.7369, "step": 571144 }, { "epoch": 49.27536231884058, "grad_norm": 0.5132399201393127, "learning_rate": 0.001, "loss": 1.7273, "step": 571200 }, { "epoch": 49.28019323671498, "grad_norm": 1.0879685878753662, "learning_rate": 0.001, "loss": 1.724, "step": 571256 }, { "epoch": 49.28502415458937, "grad_norm": 0.8041641712188721, "learning_rate": 0.001, "loss": 1.7223, "step": 571312 }, { "epoch": 49.289855072463766, "grad_norm": 0.3210260272026062, "learning_rate": 0.001, "loss": 1.7354, "step": 571368 }, { "epoch": 49.29468599033817, "grad_norm": 0.4556408226490021, "learning_rate": 0.001, "loss": 1.7273, "step": 571424 }, { "epoch": 49.29951690821256, "grad_norm": 0.6946066617965698, "learning_rate": 0.001, "loss": 1.7168, "step": 571480 }, { "epoch": 49.30434782608695, "grad_norm": 0.7491017580032349, "learning_rate": 0.001, "loss": 1.7308, "step": 571536 }, { "epoch": 49.309178743961354, "grad_norm": 1.2588515281677246, "learning_rate": 0.001, "loss": 1.7308, "step": 571592 }, { "epoch": 49.31400966183575, "grad_norm": 0.9831664562225342, "learning_rate": 0.001, "loss": 1.7458, "step": 571648 }, { "epoch": 49.31884057971015, "grad_norm": 1.5311278104782104, "learning_rate": 0.001, "loss": 1.7493, "step": 571704 }, { "epoch": 49.32367149758454, "grad_norm": 1.0603083372116089, "learning_rate": 0.001, "loss": 1.7465, "step": 571760 }, { "epoch": 49.328502415458935, "grad_norm": 1.5105196237564087, "learning_rate": 0.001, "loss": 1.7263, "step": 571816 }, { "epoch": 49.333333333333336, "grad_norm": 0.24933646619319916, "learning_rate": 0.001, "loss": 1.734, "step": 571872 }, { "epoch": 49.33816425120773, "grad_norm": 0.3715778887271881, "learning_rate": 0.001, "loss": 1.7175, "step": 571928 }, { "epoch": 49.34299516908212, "grad_norm": 0.4938346743583679, "learning_rate": 0.001, "loss": 1.7213, "step": 571984 }, { "epoch": 49.34782608695652, "grad_norm": 0.41109439730644226, "learning_rate": 0.001, "loss": 1.7237, "step": 572040 }, { "epoch": 49.35265700483092, "grad_norm": 0.32072731852531433, "learning_rate": 0.001, "loss": 1.7198, "step": 572096 }, { "epoch": 49.35748792270532, "grad_norm": 1.0099269151687622, "learning_rate": 0.001, "loss": 1.7242, "step": 572152 }, { "epoch": 49.36231884057971, "grad_norm": 1.7681523561477661, "learning_rate": 0.001, "loss": 1.7203, "step": 572208 }, { "epoch": 49.367149758454104, "grad_norm": 0.29904553294181824, "learning_rate": 0.001, "loss": 1.7226, "step": 572264 }, { "epoch": 49.371980676328505, "grad_norm": 0.7818116545677185, "learning_rate": 0.001, "loss": 1.7127, "step": 572320 }, { "epoch": 49.3768115942029, "grad_norm": 41.033355712890625, "learning_rate": 0.001, "loss": 1.7182, "step": 572376 }, { "epoch": 49.38164251207729, "grad_norm": 0.4807460606098175, "learning_rate": 0.001, "loss": 1.7292, "step": 572432 }, { "epoch": 49.38647342995169, "grad_norm": 1.3753248453140259, "learning_rate": 0.001, "loss": 1.7265, "step": 572488 }, { "epoch": 49.391304347826086, "grad_norm": 0.5084253549575806, "learning_rate": 0.001, "loss": 1.7346, "step": 572544 }, { "epoch": 49.39613526570048, "grad_norm": 0.31414175033569336, "learning_rate": 0.001, "loss": 1.7317, "step": 572600 }, { "epoch": 49.40096618357488, "grad_norm": 0.25817960500717163, "learning_rate": 0.001, "loss": 1.7314, "step": 572656 }, { "epoch": 49.405797101449274, "grad_norm": 0.3373993933200836, "learning_rate": 0.001, "loss": 1.7361, "step": 572712 }, { "epoch": 49.410628019323674, "grad_norm": 0.27275413274765015, "learning_rate": 0.001, "loss": 1.7333, "step": 572768 }, { "epoch": 49.41545893719807, "grad_norm": 2.0878312587738037, "learning_rate": 0.001, "loss": 1.7205, "step": 572824 }, { "epoch": 49.42028985507246, "grad_norm": 0.6426116228103638, "learning_rate": 0.001, "loss": 1.7071, "step": 572880 }, { "epoch": 49.42512077294686, "grad_norm": 1.2486121654510498, "learning_rate": 0.001, "loss": 1.7166, "step": 572936 }, { "epoch": 49.429951690821255, "grad_norm": 0.33519333600997925, "learning_rate": 0.001, "loss": 1.7117, "step": 572992 }, { "epoch": 49.43478260869565, "grad_norm": 3.0863380432128906, "learning_rate": 0.001, "loss": 1.7228, "step": 573048 }, { "epoch": 49.43961352657005, "grad_norm": 0.2836727797985077, "learning_rate": 0.001, "loss": 1.725, "step": 573104 }, { "epoch": 49.44444444444444, "grad_norm": 1.4792267084121704, "learning_rate": 0.001, "loss": 1.7128, "step": 573160 }, { "epoch": 49.44927536231884, "grad_norm": 0.32153230905532837, "learning_rate": 0.001, "loss": 1.7181, "step": 573216 }, { "epoch": 49.45410628019324, "grad_norm": 0.34001925587654114, "learning_rate": 0.001, "loss": 1.7171, "step": 573272 }, { "epoch": 49.45893719806763, "grad_norm": 0.3107743263244629, "learning_rate": 0.001, "loss": 1.7134, "step": 573328 }, { "epoch": 49.46376811594203, "grad_norm": 0.27029359340667725, "learning_rate": 0.001, "loss": 1.7129, "step": 573384 }, { "epoch": 49.468599033816425, "grad_norm": 0.27811601758003235, "learning_rate": 0.001, "loss": 1.7178, "step": 573440 }, { "epoch": 49.47342995169082, "grad_norm": 0.34726932644844055, "learning_rate": 0.001, "loss": 1.7216, "step": 573496 }, { "epoch": 49.47826086956522, "grad_norm": 0.7316891551017761, "learning_rate": 0.001, "loss": 1.7241, "step": 573552 }, { "epoch": 49.48309178743961, "grad_norm": 0.3362341821193695, "learning_rate": 0.001, "loss": 1.7281, "step": 573608 }, { "epoch": 49.48792270531401, "grad_norm": 0.25283247232437134, "learning_rate": 0.001, "loss": 1.7277, "step": 573664 }, { "epoch": 49.492753623188406, "grad_norm": 0.2722793221473694, "learning_rate": 0.001, "loss": 1.729, "step": 573720 }, { "epoch": 49.4975845410628, "grad_norm": 5.289150238037109, "learning_rate": 0.001, "loss": 1.7175, "step": 573776 }, { "epoch": 49.5024154589372, "grad_norm": 0.46168652176856995, "learning_rate": 0.001, "loss": 1.7115, "step": 573832 }, { "epoch": 49.507246376811594, "grad_norm": 0.33269959688186646, "learning_rate": 0.001, "loss": 1.7157, "step": 573888 }, { "epoch": 49.51207729468599, "grad_norm": 0.26609066128730774, "learning_rate": 0.001, "loss": 1.7176, "step": 573944 }, { "epoch": 49.51690821256039, "grad_norm": 0.3149854838848114, "learning_rate": 0.001, "loss": 1.7106, "step": 574000 }, { "epoch": 49.52173913043478, "grad_norm": 1.7439597845077515, "learning_rate": 0.001, "loss": 1.7219, "step": 574056 }, { "epoch": 49.52657004830918, "grad_norm": 0.3215422034263611, "learning_rate": 0.001, "loss": 1.7182, "step": 574112 }, { "epoch": 49.531400966183575, "grad_norm": 13.473784446716309, "learning_rate": 0.001, "loss": 1.7235, "step": 574168 }, { "epoch": 49.53623188405797, "grad_norm": 0.6081889867782593, "learning_rate": 0.001, "loss": 1.7195, "step": 574224 }, { "epoch": 49.54106280193237, "grad_norm": 0.8500441312789917, "learning_rate": 0.001, "loss": 1.7273, "step": 574280 }, { "epoch": 49.54589371980676, "grad_norm": 0.35605868697166443, "learning_rate": 0.001, "loss": 1.7266, "step": 574336 }, { "epoch": 49.55072463768116, "grad_norm": 0.3302740156650543, "learning_rate": 0.001, "loss": 1.7131, "step": 574392 }, { "epoch": 49.55555555555556, "grad_norm": 0.32609379291534424, "learning_rate": 0.001, "loss": 1.7125, "step": 574448 }, { "epoch": 49.56038647342995, "grad_norm": 3.1898558139801025, "learning_rate": 0.001, "loss": 1.7229, "step": 574504 }, { "epoch": 49.56521739130435, "grad_norm": 0.2807445228099823, "learning_rate": 0.001, "loss": 1.7106, "step": 574560 }, { "epoch": 49.570048309178745, "grad_norm": 0.8145819306373596, "learning_rate": 0.001, "loss": 1.7167, "step": 574616 }, { "epoch": 49.57487922705314, "grad_norm": 0.40503638982772827, "learning_rate": 0.001, "loss": 1.7107, "step": 574672 }, { "epoch": 49.57971014492754, "grad_norm": 0.2745690941810608, "learning_rate": 0.001, "loss": 1.7064, "step": 574728 }, { "epoch": 49.58454106280193, "grad_norm": 1.760508418083191, "learning_rate": 0.001, "loss": 1.7154, "step": 574784 }, { "epoch": 49.589371980676326, "grad_norm": 0.4716476798057556, "learning_rate": 0.001, "loss": 1.7186, "step": 574840 }, { "epoch": 49.594202898550726, "grad_norm": 0.2968688905239105, "learning_rate": 0.001, "loss": 1.7268, "step": 574896 }, { "epoch": 49.59903381642512, "grad_norm": 0.29158657789230347, "learning_rate": 0.001, "loss": 1.7146, "step": 574952 }, { "epoch": 49.60386473429952, "grad_norm": 0.43078359961509705, "learning_rate": 0.001, "loss": 1.7228, "step": 575008 }, { "epoch": 49.608695652173914, "grad_norm": 0.7296096086502075, "learning_rate": 0.001, "loss": 1.7294, "step": 575064 }, { "epoch": 49.61352657004831, "grad_norm": 0.29182037711143494, "learning_rate": 0.001, "loss": 1.7422, "step": 575120 }, { "epoch": 49.61835748792271, "grad_norm": 0.9701921343803406, "learning_rate": 0.001, "loss": 1.7281, "step": 575176 }, { "epoch": 49.6231884057971, "grad_norm": 0.6487398743629456, "learning_rate": 0.001, "loss": 1.7256, "step": 575232 }, { "epoch": 49.628019323671495, "grad_norm": 0.3682495653629303, "learning_rate": 0.001, "loss": 1.7346, "step": 575288 }, { "epoch": 49.632850241545896, "grad_norm": 1.5476772785186768, "learning_rate": 0.001, "loss": 1.738, "step": 575344 }, { "epoch": 49.63768115942029, "grad_norm": 0.6587009429931641, "learning_rate": 0.001, "loss": 1.7442, "step": 575400 }, { "epoch": 49.64251207729468, "grad_norm": 0.34173107147216797, "learning_rate": 0.001, "loss": 1.7477, "step": 575456 }, { "epoch": 49.64734299516908, "grad_norm": 0.46623316407203674, "learning_rate": 0.001, "loss": 1.741, "step": 575512 }, { "epoch": 49.65217391304348, "grad_norm": 0.30779990553855896, "learning_rate": 0.001, "loss": 1.748, "step": 575568 }, { "epoch": 49.65700483091788, "grad_norm": 0.5317642092704773, "learning_rate": 0.001, "loss": 1.7279, "step": 575624 }, { "epoch": 49.66183574879227, "grad_norm": 6.003671169281006, "learning_rate": 0.001, "loss": 1.7246, "step": 575680 }, { "epoch": 49.666666666666664, "grad_norm": 0.277752548456192, "learning_rate": 0.001, "loss": 1.7244, "step": 575736 }, { "epoch": 49.671497584541065, "grad_norm": 0.40121662616729736, "learning_rate": 0.001, "loss": 1.7249, "step": 575792 }, { "epoch": 49.67632850241546, "grad_norm": 2.356654644012451, "learning_rate": 0.001, "loss": 1.7243, "step": 575848 }, { "epoch": 49.68115942028985, "grad_norm": 0.8912389874458313, "learning_rate": 0.001, "loss": 1.7164, "step": 575904 }, { "epoch": 49.68599033816425, "grad_norm": 2.393475294113159, "learning_rate": 0.001, "loss": 1.7197, "step": 575960 }, { "epoch": 49.690821256038646, "grad_norm": 0.28964176774024963, "learning_rate": 0.001, "loss": 1.7297, "step": 576016 }, { "epoch": 49.69565217391305, "grad_norm": 0.33668196201324463, "learning_rate": 0.001, "loss": 1.7243, "step": 576072 }, { "epoch": 49.70048309178744, "grad_norm": 0.3650907278060913, "learning_rate": 0.001, "loss": 1.7324, "step": 576128 }, { "epoch": 49.70531400966183, "grad_norm": 1.5397624969482422, "learning_rate": 0.001, "loss": 1.7293, "step": 576184 }, { "epoch": 49.710144927536234, "grad_norm": 0.7408667802810669, "learning_rate": 0.001, "loss": 1.7283, "step": 576240 }, { "epoch": 49.71497584541063, "grad_norm": 0.46333566308021545, "learning_rate": 0.001, "loss": 1.7232, "step": 576296 }, { "epoch": 49.71980676328502, "grad_norm": 0.44951748847961426, "learning_rate": 0.001, "loss": 1.7216, "step": 576352 }, { "epoch": 49.72463768115942, "grad_norm": 1.2410917282104492, "learning_rate": 0.001, "loss": 1.7242, "step": 576408 }, { "epoch": 49.729468599033815, "grad_norm": 0.4852130115032196, "learning_rate": 0.001, "loss": 1.7204, "step": 576464 }, { "epoch": 49.734299516908216, "grad_norm": 2.499396324157715, "learning_rate": 0.001, "loss": 1.7205, "step": 576520 }, { "epoch": 49.73913043478261, "grad_norm": 0.6509780883789062, "learning_rate": 0.001, "loss": 1.7236, "step": 576576 }, { "epoch": 49.743961352657, "grad_norm": 0.5429540872573853, "learning_rate": 0.001, "loss": 1.7217, "step": 576632 }, { "epoch": 49.7487922705314, "grad_norm": 0.30181884765625, "learning_rate": 0.001, "loss": 1.7106, "step": 576688 }, { "epoch": 49.7536231884058, "grad_norm": 0.37169164419174194, "learning_rate": 0.001, "loss": 1.7248, "step": 576744 }, { "epoch": 49.75845410628019, "grad_norm": 0.29045841097831726, "learning_rate": 0.001, "loss": 1.7087, "step": 576800 }, { "epoch": 49.76328502415459, "grad_norm": 0.47012314200401306, "learning_rate": 0.001, "loss": 1.7141, "step": 576856 }, { "epoch": 49.768115942028984, "grad_norm": 0.32227471470832825, "learning_rate": 0.001, "loss": 1.7174, "step": 576912 }, { "epoch": 49.772946859903385, "grad_norm": 0.2925737202167511, "learning_rate": 0.001, "loss": 1.72, "step": 576968 }, { "epoch": 49.77777777777778, "grad_norm": 0.7784834504127502, "learning_rate": 0.001, "loss": 1.7142, "step": 577024 }, { "epoch": 49.78260869565217, "grad_norm": 0.7404419779777527, "learning_rate": 0.001, "loss": 1.716, "step": 577080 }, { "epoch": 49.78743961352657, "grad_norm": 0.661585807800293, "learning_rate": 0.001, "loss": 1.7264, "step": 577136 }, { "epoch": 49.792270531400966, "grad_norm": 0.3852500915527344, "learning_rate": 0.001, "loss": 1.7265, "step": 577192 }, { "epoch": 49.79710144927536, "grad_norm": 0.48991668224334717, "learning_rate": 0.001, "loss": 1.7351, "step": 577248 }, { "epoch": 49.80193236714976, "grad_norm": 1.069893717765808, "learning_rate": 0.001, "loss": 1.7432, "step": 577304 }, { "epoch": 49.806763285024154, "grad_norm": 0.6361894607543945, "learning_rate": 0.001, "loss": 1.7573, "step": 577360 }, { "epoch": 49.81159420289855, "grad_norm": 0.43160852789878845, "learning_rate": 0.001, "loss": 1.7438, "step": 577416 }, { "epoch": 49.81642512077295, "grad_norm": 0.30941981077194214, "learning_rate": 0.001, "loss": 1.7476, "step": 577472 }, { "epoch": 49.82125603864734, "grad_norm": 0.5266588926315308, "learning_rate": 0.001, "loss": 1.7882, "step": 577528 }, { "epoch": 49.82608695652174, "grad_norm": 2.7150566577911377, "learning_rate": 0.001, "loss": 1.7564, "step": 577584 }, { "epoch": 49.830917874396135, "grad_norm": 0.40805700421333313, "learning_rate": 0.001, "loss": 1.7623, "step": 577640 }, { "epoch": 49.83574879227053, "grad_norm": 0.2916834056377411, "learning_rate": 0.001, "loss": 1.762, "step": 577696 }, { "epoch": 49.84057971014493, "grad_norm": 0.5613381862640381, "learning_rate": 0.001, "loss": 1.7414, "step": 577752 }, { "epoch": 49.84541062801932, "grad_norm": 1.4183069467544556, "learning_rate": 0.001, "loss": 1.7436, "step": 577808 }, { "epoch": 49.85024154589372, "grad_norm": 0.37347427010536194, "learning_rate": 0.001, "loss": 1.7438, "step": 577864 }, { "epoch": 49.85507246376812, "grad_norm": 0.6687127947807312, "learning_rate": 0.001, "loss": 1.7367, "step": 577920 }, { "epoch": 49.85990338164251, "grad_norm": 0.7395805716514587, "learning_rate": 0.001, "loss": 1.7466, "step": 577976 }, { "epoch": 49.86473429951691, "grad_norm": 1.0020817518234253, "learning_rate": 0.001, "loss": 1.7421, "step": 578032 }, { "epoch": 49.869565217391305, "grad_norm": 0.33438247442245483, "learning_rate": 0.001, "loss": 1.7329, "step": 578088 }, { "epoch": 49.8743961352657, "grad_norm": 2.732793092727661, "learning_rate": 0.001, "loss": 1.7407, "step": 578144 }, { "epoch": 49.8792270531401, "grad_norm": 0.3167946934700012, "learning_rate": 0.001, "loss": 1.751, "step": 578200 }, { "epoch": 49.88405797101449, "grad_norm": 0.34583985805511475, "learning_rate": 0.001, "loss": 1.7377, "step": 578256 }, { "epoch": 49.888888888888886, "grad_norm": 10.259955406188965, "learning_rate": 0.001, "loss": 1.7303, "step": 578312 }, { "epoch": 49.893719806763286, "grad_norm": 0.6764304041862488, "learning_rate": 0.001, "loss": 1.7257, "step": 578368 }, { "epoch": 49.89855072463768, "grad_norm": 1.349168300628662, "learning_rate": 0.001, "loss": 1.7378, "step": 578424 }, { "epoch": 49.90338164251208, "grad_norm": 0.29108908772468567, "learning_rate": 0.001, "loss": 1.7459, "step": 578480 }, { "epoch": 49.908212560386474, "grad_norm": 1.0709141492843628, "learning_rate": 0.001, "loss": 1.7554, "step": 578536 }, { "epoch": 49.91304347826087, "grad_norm": 0.3281553089618683, "learning_rate": 0.001, "loss": 1.7655, "step": 578592 }, { "epoch": 49.91787439613527, "grad_norm": 0.9314797520637512, "learning_rate": 0.001, "loss": 1.7524, "step": 578648 }, { "epoch": 49.92270531400966, "grad_norm": 0.557636559009552, "learning_rate": 0.001, "loss": 1.7467, "step": 578704 }, { "epoch": 49.927536231884055, "grad_norm": 1.226763129234314, "learning_rate": 0.001, "loss": 1.739, "step": 578760 }, { "epoch": 49.932367149758456, "grad_norm": 0.3198475241661072, "learning_rate": 0.001, "loss": 1.7438, "step": 578816 }, { "epoch": 49.93719806763285, "grad_norm": 0.2477244883775711, "learning_rate": 0.001, "loss": 1.7395, "step": 578872 }, { "epoch": 49.94202898550725, "grad_norm": 0.30659806728363037, "learning_rate": 0.001, "loss": 1.7443, "step": 578928 }, { "epoch": 49.94685990338164, "grad_norm": 0.5598821640014648, "learning_rate": 0.001, "loss": 1.7497, "step": 578984 }, { "epoch": 49.95169082125604, "grad_norm": 0.30812034010887146, "learning_rate": 0.001, "loss": 1.7421, "step": 579040 }, { "epoch": 49.95652173913044, "grad_norm": 0.8377183079719543, "learning_rate": 0.001, "loss": 1.7308, "step": 579096 }, { "epoch": 49.96135265700483, "grad_norm": 1.4127155542373657, "learning_rate": 0.001, "loss": 1.7338, "step": 579152 }, { "epoch": 49.966183574879224, "grad_norm": 0.30669254064559937, "learning_rate": 0.001, "loss": 1.7293, "step": 579208 }, { "epoch": 49.971014492753625, "grad_norm": 0.42687511444091797, "learning_rate": 0.001, "loss": 1.7349, "step": 579264 }, { "epoch": 49.97584541062802, "grad_norm": 0.7905052304267883, "learning_rate": 0.001, "loss": 1.724, "step": 579320 }, { "epoch": 49.98067632850242, "grad_norm": 2.008997678756714, "learning_rate": 0.001, "loss": 1.7413, "step": 579376 }, { "epoch": 49.98550724637681, "grad_norm": 0.31003424525260925, "learning_rate": 0.001, "loss": 1.7438, "step": 579432 }, { "epoch": 49.990338164251206, "grad_norm": 0.6134177446365356, "learning_rate": 0.001, "loss": 1.7418, "step": 579488 }, { "epoch": 49.99516908212561, "grad_norm": 0.3617597222328186, "learning_rate": 0.001, "loss": 1.7374, "step": 579544 }, { "epoch": 50.0, "grad_norm": 0.2905556559562683, "learning_rate": 0.001, "loss": 1.7342, "step": 579600 } ], "logging_steps": 56, "max_steps": 579600, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 278, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5529316325754143e+19, "train_batch_size": 230, "trial_name": null, "trial_params": null }